test: mock retry backoff and compression sleeps in slow tests

Cuts ~65s off shard 3's local runtime (108s \u2192 48s) by neutralizing real wall-clock waits in backoff/compression/retry paths. Tests assert behavior (retry count, final result, error handling), never timing. Changes: - tests/run_agent/conftest.py (NEW): autouse fixture mocks run_agent.jittered_backoff to 0.0 for all tests in the directory. Collapses the `while time.time() < sleep_end` busy-loop to a no-op. Does NOT mock time.sleep globally (breaks threading tests). - test_anthropic_error_handling.py: per-file fixture mocks time.sleep and asyncio.sleep for this test's retry paths (6 tests \u00d7 10s \u2192 ~2s each). - test_413_compression.py: mocks time.sleep for the 2s compression retry pauses (9 tests \u00d7 2s \u2192 millisecond range). - test_run_agent_codex_responses.py: mocks time.sleep for Codex retry path (6.8s \u2192 0.24s on the empty-output retry test). - test_fallback_model.py: mocks time.sleep for transport-recovery path. - test_retaindb_plugin.py: caps retaindb module's time.sleep to 0.05s so background writer-thread sleeps don't block tests. Replaces arbitrary time.sleep(N) waits with polling loops. Validation: - tests/run_agent/ + tests/plugins/test_retaindb_plugin.py: 827 passed, 0 failed, 22.9s (was ~75s before). - Matrix shard 3 local: 3098 passed, 48.2s (was 108s). - No test's timing-assertion contract is changed (tests still verify retry happens, just don't wait 5s for it).
fix(tests): make AIAgent constructor calls self-contained (#11755 )
2026-04-17 13:19:00 -07:00 · 2026-04-17 12:32:03 -07:00 · 2026-04-17 12:05:22 -07:00 · 2026-04-17 06:50:36 -07:00 · 2026-04-17 06:50:36 -07:00 · 2026-04-17 06:50:36 -07:00
152 changed files with 15690 additions and 3147 deletions
@@ -458,13 +458,45 @@ def profile_env(tmp_path, monkeypatch):

 ## Testing

+**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
+hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
+4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
+developer machine with API keys set diverges from CI in ways that have caused
+multiple "works locally, fails in CI" incidents (and the reverse).
+
 ```bash
-source venv/bin/activate
-python -m pytest tests/ -q          # Full suite (~3000 tests, ~3 min)
-python -m pytest tests/test_model_tools.py -q   # Toolset resolution
-python -m pytest tests/test_cli_init.py -q       # CLI config loading
-python -m pytest tests/gateway/ -q               # Gateway tests
-python -m pytest tests/tools/ -q                 # Tool-level tests
+scripts/run_tests.sh                                  # full suite, CI-parity
+scripts/run_tests.sh tests/gateway/                   # one directory
+scripts/run_tests.sh tests/agent/test_foo.py::test_x  # one test
+scripts/run_tests.sh -v --tb=long                     # pass-through pytest flags
 ```

+### Why the wrapper (and why the old "just call pytest" doesn't work)
+
+Five real sources of local-vs-CI drift the script closes:
+
+| | Without wrapper | With wrapper |
+|---|---|---|
+| Provider API keys | Whatever is in your env (auto-detects pool) | All `*_API_KEY`/`*_TOKEN`/etc. unset |
+| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
+| Timezone | Local TZ (PDT etc.) | UTC |
+| Locale | Whatever is set | C.UTF-8 |
+| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
+
+`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
+invocation (including IDE integrations) gets hermetic behavior — but the wrapper
+is belt-and-suspenders.
+
+### Running without the wrapper (only if you must)
+
+If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
+pytest directly), at minimum activate the venv and pass `-n 4`:
+
+```bash
+source venv/bin/activate
+python -m pytest tests/ -q -n 4
+```
+
+Worker count above 4 will surface test-ordering flakes that CI never sees.
+
 Always run the full suite before pushing changes.
@@ -1208,6 +1208,19 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            logger.debug("Qwen OAuth token seed failed: %s", exc)

    elif provider == "openai-codex":
+        # Respect user suppression — `hermes auth remove openai-codex` marks
+        # the device_code source as suppressed so it won't be re-seeded from
+        # either the Hermes auth store or ~/.codex/auth.json.  Without this
+        # gate the removal is instantly undone on the next load_pool() call.
+        codex_suppressed = False
+        try:
+            from hermes_cli.auth import is_source_suppressed
+            codex_suppressed = is_source_suppressed(provider, "device_code")
+        except ImportError:
+            pass
+        if codex_suppressed:
+            return changed, active_sources
+
        state = _load_provider_state(auth_store, "openai-codex")
        tokens = state.get("tokens") if isinstance(state, dict) else None
        # Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth
@@ -634,13 +634,7 @@ class InsightsEngine:
        lines.append(f"  Sessions:          {o['total_sessions']:<12}  Messages:        {o['total_messages']:,}")
        lines.append(f"  Tool calls:        {o['total_tool_calls']:<12,}  User messages:   {o['user_messages']:,}")
        lines.append(f"  Input tokens:      {o['total_input_tokens']:<12,}  Output tokens:   {o['total_output_tokens']:,}")
-        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
-        if cache_total > 0:
-            lines.append(f"  Cache read:        {o['total_cache_read_tokens']:<12,}  Cache write:     {o['total_cache_write_tokens']:,}")
-        cost_str = f"${o['estimated_cost']:.2f}"
-        if o.get("models_without_pricing"):
-            cost_str += " *"
-        lines.append(f"  Total tokens:      {o['total_tokens']:<12,}  Est. cost:       {cost_str}")
+        lines.append(f"  Total tokens:      {o['total_tokens']:,}")
        if o["total_hours"] > 0:
            lines.append(f"  Active time:       ~{_format_duration(o['total_hours'] * 3600):<11}  Avg session:     ~{_format_duration(o['avg_session_duration'])}")
        lines.append(f"  Avg msgs/session:  {o['avg_messages_per_session']:.1f}")
@@ -650,16 +644,10 @@ class InsightsEngine:
        if report["models"]:
            lines.append("  🤖 Models Used")
            lines.append("  " + "─" * 56)
-            lines.append(f"  {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
+            lines.append(f"  {'Model':<30} {'Sessions':>8} {'Tokens':>12}")
            for m in report["models"]:
                model_name = m["model"][:28]
-                if m.get("has_pricing"):
-                    cost_cell = f"${m['cost']:>6.2f}"
-                else:
-                    cost_cell = "     N/A"
-                lines.append(f"  {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
-            if o.get("models_without_pricing"):
-                lines.append("  * Cost N/A for custom/self-hosted models")
+                lines.append(f"  {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,}")
            lines.append("")

        # Platform breakdown
@@ -739,15 +727,7 @@ class InsightsEngine:

        # Overview
        lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
-        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
-        if cache_total > 0:
-            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
-        else:
-            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
-        cost_note = ""
-        if o.get("models_without_pricing"):
-            cost_note = " _(excludes custom/self-hosted models)_"
-        lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
+        lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
        if o["total_hours"] > 0:
            lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
        lines.append("")
@@ -756,8 +736,7 @@ class InsightsEngine:
        if report["models"]:
            lines.append("**🤖 Models:**")
            for m in report["models"][:5]:
-                cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
-                lines.append(f"  {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
+                lines.append(f"  {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens")
            lines.append("")

        # Platforms (if multi-platform)
@@ -4514,6 +4514,34 @@ class HermesCLI:
        self._restore_modal_input_snapshot()
        self._invalidate(min_interval=0.0)

+    @staticmethod
+    def _compute_model_picker_viewport(
+        selected: int,
+        scroll_offset: int,
+        n: int,
+        term_rows: int,
+        reserved_below: int = 6,
+        panel_chrome: int = 6,
+        min_visible: int = 3,
+    ) -> tuple[int, int]:
+        """Resolve (scroll_offset, visible) for the /model picker viewport.
+
+        ``reserved_below`` matches the approval / clarify panels — input area,
+        status bar, and separators below the panel. ``panel_chrome`` covers
+        this panel's own borders + blanks + hint row. The remaining rows hold
+        the scrollable list, with the offset slid to keep ``selected`` on screen.
+        """
+        max_visible = max(min_visible, term_rows - reserved_below - panel_chrome)
+        if n <= max_visible:
+            return 0, n
+        visible = max_visible
+        if selected < scroll_offset:
+            scroll_offset = selected
+        elif selected >= scroll_offset + visible:
+            scroll_offset = selected - visible + 1
+        scroll_offset = max(0, min(scroll_offset, n - visible))
+        return scroll_offset, visible
+
    def _apply_model_switch_result(self, result, persist_global: bool) -> None:
        if not result.success:
            _cprint(f"  ✗ {result.error_message}")
@@ -8528,6 +8556,7 @@ class HermesCLI:
            # --- /model picker modal ---
            if self._model_picker_state:
                self._handle_model_picker_selection()
+                event.app.current_buffer.reset()
                event.app.invalidate()
                return

@@ -8693,6 +8722,13 @@ class HermesCLI:
            state["selected"] = min(max_idx, state.get("selected", 0) + 1)
            event.app.invalidate()

+        @kb.add('escape', filter=Condition(lambda: bool(self._model_picker_state)), eager=True)
+        def model_picker_escape(event):
+            """ESC closes the /model picker."""
+            self._close_model_picker()
+            event.app.current_buffer.reset()
+            event.app.invalidate()
+
        # --- History navigation: up/down browse history in normal input mode ---
        # The TextArea is multiline, so by default up/down only move the cursor.
        # Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -9494,6 +9530,22 @@ class HermesCLI:

            box_width = _panel_box_width(title, [hint] + choices, min_width=46, max_width=84)
            inner_text_width = max(8, box_width - 6)
+            selected = state.get("selected", 0)
+
+            # Scrolling viewport: the panel renders into a Window with no max
+            # height, so without limiting visible items the bottom border and
+            # any items past the available terminal rows get clipped on long
+            # provider catalogs (e.g. Ollama Cloud's 36+ models).
+            try:
+                from prompt_toolkit.application import get_app
+                term_rows = get_app().output.get_size().rows
+            except Exception:
+                term_rows = shutil.get_terminal_size((100, 24)).lines
+            scroll_offset, visible = HermesCLI._compute_model_picker_viewport(
+                selected, state.get("_scroll_offset", 0), len(choices), term_rows,
+            )
+            state["_scroll_offset"] = scroll_offset
+
            lines = []
            lines.append(('class:clarify-border', '╭─ '))
            lines.append(('class:clarify-title', title))
@@ -9501,8 +9553,8 @@ class HermesCLI:
            _append_blank_panel_line(lines, 'class:clarify-border', box_width)
            _append_panel_line(lines, 'class:clarify-border', 'class:clarify-hint', hint, box_width)
            _append_blank_panel_line(lines, 'class:clarify-border', box_width)
-            selected = state.get("selected", 0)
-            for idx, choice in enumerate(choices):
+            for idx in range(scroll_offset, scroll_offset + visible):
+                choice = choices[idx]
                style = 'class:clarify-selected' if idx == selected else 'class:clarify-choice'
                prefix = '❯ ' if idx == selected else '  '
                for wrapped in _wrap_panel_text(prefix + choice, inner_text_width, subsequent_indent='  '):
@@ -27,7 +27,7 @@ except ImportError:
    except ImportError:
        msvcrt = None
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional

 # Add parent directory to path for imports BEFORE repo-level imports.
 # Without this, standalone invocations (e.g. after `hermes update` reloads
@@ -49,6 +49,25 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
    "qqbot",
 })

+# Platforms that support a configured cron/notification home target, mapped to
+# the environment variable used by gateway setup/runtime config.
+_HOME_TARGET_ENV_VARS = {
+    "matrix": "MATRIX_HOME_ROOM",
+    "telegram": "TELEGRAM_HOME_CHANNEL",
+    "discord": "DISCORD_HOME_CHANNEL",
+    "slack": "SLACK_HOME_CHANNEL",
+    "signal": "SIGNAL_HOME_CHANNEL",
+    "mattermost": "MATTERMOST_HOME_CHANNEL",
+    "sms": "SMS_HOME_CHANNEL",
+    "email": "EMAIL_HOME_ADDRESS",
+    "dingtalk": "DINGTALK_HOME_CHANNEL",
+    "feishu": "FEISHU_HOME_CHANNEL",
+    "wecom": "WECOM_HOME_CHANNEL",
+    "weixin": "WEIXIN_HOME_CHANNEL",
+    "bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
+    "qqbot": "QQ_HOME_CHANNEL",
+}
+
 from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run

 # Sentinel: when a cron agent has nothing new to report, it can start its
@@ -76,15 +95,23 @@ def _resolve_origin(job: dict) -> Optional[dict]:
    return None


-def _resolve_delivery_target(job: dict) -> Optional[dict]:
-    """Resolve the concrete auto-delivery target for a cron job, if any."""
-    deliver = job.get("deliver", "local")
+def _get_home_target_chat_id(platform_name: str) -> str:
+    """Return the configured home target chat/room ID for a delivery platform."""
+    env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
+    if not env_var:
+        return ""
+    return os.getenv(env_var, "")
+
+
+def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
+    """Resolve one concrete auto-delivery target for a cron job."""
+
    origin = _resolve_origin(job)

-    if deliver == "local":
+    if deliver_value == "local":
        return None

-    if deliver == "origin":
+    if deliver_value == "origin":
        if origin:
            return {
                "platform": origin["platform"],
@@ -93,8 +120,8 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
            }
        # Origin missing (e.g. job created via API/script) — try each
        # platform's home channel as a fallback instead of silently dropping.
-        for platform_name in ("matrix", "telegram", "discord", "slack", "bluebubbles"):
-            chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
+        for platform_name in _HOME_TARGET_ENV_VARS:
+            chat_id = _get_home_target_chat_id(platform_name)
            if chat_id:
                logger.info(
                    "Job '%s' has deliver=origin but no origin; falling back to %s home channel",
@@ -108,8 +135,8 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
                }
        return None

-    if ":" in deliver:
-        platform_name, rest = deliver.split(":", 1)
+    if ":" in deliver_value:
+        platform_name, rest = deliver_value.split(":", 1)
        platform_key = platform_name.lower()

        from tools.send_message_tool import _parse_target_ref
@@ -139,7 +166,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
            "thread_id": thread_id,
        }

-    platform_name = deliver
+    platform_name = deliver_value
    if origin and origin.get("platform") == platform_name:
        return {
            "platform": platform_name,
@@ -149,7 +176,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:

    if platform_name.lower() not in _KNOWN_DELIVERY_PLATFORMS:
        return None
-    chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
+    chat_id = _get_home_target_chat_id(platform_name)
    if not chat_id:
        return None

@@ -160,6 +187,30 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
    }


+def _resolve_delivery_targets(job: dict) -> List[dict]:
+    """Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
+    deliver = job.get("deliver", "local")
+    if deliver == "local":
+        return []
+    parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
+    seen = set()
+    targets = []
+    for part in parts:
+        target = _resolve_single_delivery_target(job, part)
+        if target:
+            key = (target["platform"].lower(), str(target["chat_id"]), target.get("thread_id"))
+            if key not in seen:
+                seen.add(key)
+                targets.append(target)
+    return targets
+
+
+def _resolve_delivery_target(job: dict) -> Optional[dict]:
+    """Resolve the concrete auto-delivery target for a cron job, if any."""
+    targets = _resolve_delivery_targets(job)
+    return targets[0] if targets else None
+
+
 # Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
 _AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
 _VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
@@ -200,7 +251,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:

 def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]:
    """
-    Deliver job output to the configured target (origin chat, specific platform, etc.).
+    Deliver job output to the configured target(s) (origin chat, specific platform, etc.).

    When ``adapters`` and ``loop`` are provided (gateway is running), tries to
    use the live adapter first — this supports E2EE rooms (e.g. Matrix) where
@@ -209,33 +260,14 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option

    Returns None on success, or an error string on failure.
    """
-    target = _resolve_delivery_target(job)
-    if not target:
+    targets = _resolve_delivery_targets(job)
+    if not targets:
        if job.get("deliver", "local") != "local":
            msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}"
            logger.warning("Job '%s': %s", job["id"], msg)
            return msg
        return None  # local-only jobs don't deliver — not a failure

-    platform_name = target["platform"]
-    chat_id = target["chat_id"]
-    thread_id = target.get("thread_id")
-
-    # Diagnostic: log thread_id for topic-aware delivery debugging
-    origin = job.get("origin") or {}
-    origin_thread = origin.get("thread_id")
-    if origin_thread and not thread_id:
-        logger.warning(
-            "Job '%s': origin has thread_id=%s but delivery target lost it "
-            "(deliver=%s, target=%s)",
-            job["id"], origin_thread, job.get("deliver", "local"), target,
-        )
-    elif thread_id:
-        logger.debug(
-            "Job '%s': delivering to %s:%s thread_id=%s",
-            job["id"], platform_name, chat_id, thread_id,
-        )
-
    from tools.send_message_tool import _send_to_platform
    from gateway.config import load_gateway_config, Platform

@@ -258,24 +290,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        "bluebubbles": Platform.BLUEBUBBLES,
        "qqbot": Platform.QQBOT,
    }
-    platform = platform_map.get(platform_name.lower())
-    if not platform:
-        msg = f"unknown platform '{platform_name}'"
-        logger.warning("Job '%s': %s", job["id"], msg)
-        return msg
-
-    try:
-        config = load_gateway_config()
-    except Exception as e:
-        msg = f"failed to load gateway config: {e}"
-        logger.error("Job '%s': %s", job["id"], msg)
-        return msg
-
-    pconfig = config.platforms.get(platform)
-    if not pconfig or not pconfig.enabled:
-        msg = f"platform '{platform_name}' not configured/enabled"
-        logger.warning("Job '%s': %s", job["id"], msg)
-        return msg

    # Optionally wrap the content with a header/footer so the user knows this
    # is a cron delivery.  Wrapping is on by default; set cron.wrap_response: false
@@ -304,67 +318,117 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
    from gateway.platforms.base import BasePlatformAdapter
    media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)

-    # Prefer the live adapter when the gateway is running — this supports E2EE
-    # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
-    runtime_adapter = (adapters or {}).get(platform)
-    if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
-        send_metadata = {"thread_id": thread_id} if thread_id else None
-        try:
-            # Send cleaned text (MEDIA tags stripped) — not the raw content
-            text_to_send = cleaned_delivery_content.strip()
-            adapter_ok = True
-            if text_to_send:
-                future = asyncio.run_coroutine_threadsafe(
-                    runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
-                    loop,
-                )
-                send_result = future.result(timeout=60)
-                if send_result and not getattr(send_result, "success", True):
-                    err = getattr(send_result, "error", "unknown")
-                    logger.warning(
-                        "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
-                        job["id"], platform_name, chat_id, err,
-                    )
-                    adapter_ok = False  # fall through to standalone path
+    try:
+        config = load_gateway_config()
+    except Exception as e:
+        msg = f"failed to load gateway config: {e}"
+        logger.error("Job '%s': %s", job["id"], msg)
+        return msg

-            # Send extracted media files as native attachments via the live adapter
-            if adapter_ok and media_files:
-                _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
+    delivery_errors = []

-            if adapter_ok:
-                logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
-                return None
-        except Exception as e:
+    for target in targets:
+        platform_name = target["platform"]
+        chat_id = target["chat_id"]
+        thread_id = target.get("thread_id")
+
+        # Diagnostic: log thread_id for topic-aware delivery debugging
+        origin = job.get("origin") or {}
+        origin_thread = origin.get("thread_id")
+        if origin_thread and not thread_id:
            logger.warning(
-                "Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
-                job["id"], platform_name, chat_id, e,
+                "Job '%s': origin has thread_id=%s but delivery target lost it "
+                "(deliver=%s, target=%s)",
+                job["id"], origin_thread, job.get("deliver", "local"), target,
+            )
+        elif thread_id:
+            logger.debug(
+                "Job '%s': delivering to %s:%s thread_id=%s",
+                job["id"], platform_name, chat_id, thread_id,
            )

-    # Standalone path: run the async send in a fresh event loop (safe from any thread)
-    coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
-    try:
-        result = asyncio.run(coro)
-    except RuntimeError:
-        # asyncio.run() checks for a running loop before awaiting the coroutine;
-        # when it raises, the original coro was never started — close it to
-        # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
-        # fresh thread that has no running loop.
-        coro.close()
-        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
-            result = future.result(timeout=30)
-    except Exception as e:
-        msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
-        logger.error("Job '%s': %s", job["id"], msg)
-        return msg
+        platform = platform_map.get(platform_name.lower())
+        if not platform:
+            msg = f"unknown platform '{platform_name}'"
+            logger.warning("Job '%s': %s", job["id"], msg)
+            delivery_errors.append(msg)
+            continue

-    if result and result.get("error"):
-        msg = f"delivery error: {result['error']}"
-        logger.error("Job '%s': %s", job["id"], msg)
-        return msg
+        # Prefer the live adapter when the gateway is running — this supports E2EE
+        # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
+        runtime_adapter = (adapters or {}).get(platform)
+        delivered = False
+        if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
+            send_metadata = {"thread_id": thread_id} if thread_id else None
+            try:
+                # Send cleaned text (MEDIA tags stripped) — not the raw content
+                text_to_send = cleaned_delivery_content.strip()
+                adapter_ok = True
+                if text_to_send:
+                    future = asyncio.run_coroutine_threadsafe(
+                        runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
+                        loop,
+                    )
+                    send_result = future.result(timeout=60)
+                    if send_result and not getattr(send_result, "success", True):
+                        err = getattr(send_result, "error", "unknown")
+                        logger.warning(
+                            "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
+                            job["id"], platform_name, chat_id, err,
+                        )
+                        adapter_ok = False  # fall through to standalone path

-    logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
+                # Send extracted media files as native attachments via the live adapter
+                if adapter_ok and media_files:
+                    _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
+
+                if adapter_ok:
+                    logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
+                    delivered = True
+            except Exception as e:
+                logger.warning(
+                    "Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
+                    job["id"], platform_name, chat_id, e,
+                )
+
+        if not delivered:
+            pconfig = config.platforms.get(platform)
+            if not pconfig or not pconfig.enabled:
+                msg = f"platform '{platform_name}' not configured/enabled"
+                logger.warning("Job '%s': %s", job["id"], msg)
+                delivery_errors.append(msg)
+                continue
+
+            # Standalone path: run the async send in a fresh event loop (safe from any thread)
+            coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
+            try:
+                result = asyncio.run(coro)
+            except RuntimeError:
+                # asyncio.run() checks for a running loop before awaiting the coroutine;
+                # when it raises, the original coro was never started — close it to
+                # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
+                # fresh thread that has no running loop.
+                coro.close()
+                import concurrent.futures
+                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+                    future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
+                    result = future.result(timeout=30)
+            except Exception as e:
+                msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
+                logger.error("Job '%s': %s", job["id"], msg)
+                delivery_errors.append(msg)
+                continue
+
+            if result and result.get("error"):
+                msg = f"delivery error: {result['error']}"
+                logger.error("Job '%s': %s", job["id"], msg)
+                delivery_errors.append(msg)
+                continue
+
+            logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
+
+    if delivery_errors:
+        return "; ".join(delivery_errors)
    return None


@@ -307,6 +307,14 @@ class GatewayConfig:
            # QQBot uses extra dict for app credentials
            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
                connected.append(platform)
+            # DingTalk uses client_id/client_secret from config.extra or env vars
+            elif platform == Platform.DINGTALK and (
+                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
+            ) and (
+                config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
+            ):
+                connected.append(platform)
+        
        return connected
    
    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -617,6 +625,20 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(ntc, list):
                        ntc = ",".join(str(v) for v in ntc)
                    os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
+                # allow_mentions: granular control over what the bot can ping.
+                # Safe defaults (no @everyone/roles) are applied in the adapter;
+                # these YAML keys only override when set and let users opt back
+                # into unsafe modes (e.g. roles=true) if they actually want it.
+                allow_mentions_cfg = discord_cfg.get("allow_mentions")
+                if isinstance(allow_mentions_cfg, dict):
+                    for yaml_key, env_key in (
+                        ("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"),
+                        ("roles", "DISCORD_ALLOW_MENTION_ROLES"),
+                        ("users", "DISCORD_ALLOW_MENTION_USERS"),
+                        ("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"),
+                    ):
+                        if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
+                            os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()

            # Telegram settings → env vars (env vars take precedence)
            telegram_cfg = yaml_cfg.get("telegram", {})
@@ -663,6 +685,24 @@ def load_gateway_config() -> GatewayConfig:
                        frc = ",".join(str(v) for v in frc)
                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)

+            # DingTalk settings → env vars (env vars take precedence)
+            dingtalk_cfg = yaml_cfg.get("dingtalk", {})
+            if isinstance(dingtalk_cfg, dict):
+                if "require_mention" in dingtalk_cfg and not os.getenv("DINGTALK_REQUIRE_MENTION"):
+                    os.environ["DINGTALK_REQUIRE_MENTION"] = str(dingtalk_cfg["require_mention"]).lower()
+                if "mention_patterns" in dingtalk_cfg and not os.getenv("DINGTALK_MENTION_PATTERNS"):
+                    os.environ["DINGTALK_MENTION_PATTERNS"] = json.dumps(dingtalk_cfg["mention_patterns"])
+                frc = dingtalk_cfg.get("free_response_chats")
+                if frc is not None and not os.getenv("DINGTALK_FREE_RESPONSE_CHATS"):
+                    if isinstance(frc, list):
+                        frc = ",".join(str(v) for v in frc)
+                    os.environ["DINGTALK_FREE_RESPONSE_CHATS"] = str(frc)
+                allowed = dingtalk_cfg.get("allowed_users")
+                if allowed is not None and not os.getenv("DINGTALK_ALLOWED_USERS"):
+                    if isinstance(allowed, list):
+                        allowed = ",".join(str(v) for v in allowed)
+                    os.environ["DINGTALK_ALLOWED_USERS"] = str(allowed)
+
            # Matrix settings → env vars (env vars take precedence)
            matrix_cfg = yaml_cfg.get("matrix", {})
            if isinstance(matrix_cfg, dict):
@@ -1006,6 +1046,25 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
        if webhook_secret:
            config.platforms[Platform.WEBHOOK].extra["secret"] = webhook_secret

+    # DingTalk
+    dingtalk_client_id = os.getenv("DINGTALK_CLIENT_ID")
+    dingtalk_client_secret = os.getenv("DINGTALK_CLIENT_SECRET")
+    if dingtalk_client_id and dingtalk_client_secret:
+        if Platform.DINGTALK not in config.platforms:
+            config.platforms[Platform.DINGTALK] = PlatformConfig()
+        config.platforms[Platform.DINGTALK].enabled = True
+        config.platforms[Platform.DINGTALK].extra.update({
+            "client_id": dingtalk_client_id,
+            "client_secret": dingtalk_client_secret,
+        })
+        dingtalk_home = os.getenv("DINGTALK_HOME_CHANNEL")
+        if dingtalk_home:
+            config.platforms[Platform.DINGTALK].home_channel = HomeChannel(
+                platform=Platform.DINGTALK,
+                chat_id=dingtalk_home,
+                name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"),
+            )
+
    # Feishu / Lark
    feishu_app_id = os.getenv("FEISHU_APP_ID")
    feishu_app_secret = os.getenv("FEISHU_APP_SECRET")
@@ -1991,6 +1991,7 @@ class BasePlatformAdapter(ABC):
        chat_topic: Optional[str] = None,
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
+        is_bot: bool = False,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2007,6 +2008,7 @@ class BasePlatformAdapter(ABC):
            chat_topic=chat_topic.strip() if chat_topic else None,
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
+            is_bot=is_bot,
        )
    
    @abstractmethod
@@ -12,18 +12,27 @@ Configuration in config.yaml:
    platforms:
      dingtalk:
        enabled: true
+        # Optional group-chat gating (mirrors Slack/Telegram/Discord):
+        require_mention: true            # or DINGTALK_REQUIRE_MENTION env var
+        # free_response_chats:           # conversations that skip require_mention
+        #   - cidABC==
+        # mention_patterns:              # regex wake-words (e.g. Chinese bot names)
+        #   - "^小马"
+        # allowed_users:                 # staff_id or sender_id list; "*" = any
+        #   - "manager1234"
        extra:
          client_id: "your-app-key"      # or DINGTALK_CLIENT_ID env var
          client_secret: "your-secret"   # or DINGTALK_CLIENT_SECRET env var
 """

 import asyncio
+import json
 import logging
 import os
 import re
 import uuid
 from datetime import datetime, timezone
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, Set

 try:
    import dingtalk_stream
@@ -54,7 +63,7 @@ logger = logging.getLogger(__name__)
 MAX_MESSAGE_LENGTH = 20000
 RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
 _SESSION_WEBHOOKS_MAX = 500
-_DINGTALK_WEBHOOK_RE = re.compile(r'^https://api\.dingtalk\.com/')
+_DINGTALK_WEBHOOK_RE = re.compile(r'^https://(?:api|oapi)\.dingtalk\.com/')


 def check_dingtalk_requirements() -> bool:
@@ -92,6 +101,10 @@ class DingTalkAdapter(BasePlatformAdapter):
        # Map chat_id -> session_webhook for reply routing
        self._session_webhooks: Dict[str, str] = {}

+        # Group-chat gating (mirrors Slack/Telegram/Discord/WhatsApp conventions)
+        self._mention_patterns: List[re.Pattern] = self._compile_mention_patterns()
+        self._allowed_users: Set[str] = self._load_allowed_users()
+
    # -- Connection lifecycle -----------------------------------------------

    async def connect(self) -> bool:
@@ -128,12 +141,12 @@ class DingTalkAdapter(BasePlatformAdapter):
            return False

    async def _run_stream(self) -> None:
-        """Run the blocking stream client with auto-reconnection."""
+        """Run the stream client with auto-reconnection."""
        backoff_idx = 0
        while self._running:
            try:
                logger.debug("[%s] Starting stream client...", self.name)
-                await asyncio.to_thread(self._stream_client.start)
+                await self._stream_client.start()
            except asyncio.CancelledError:
                return
            except Exception as e:
@@ -154,12 +167,19 @@ class DingTalkAdapter(BasePlatformAdapter):
        self._running = False
        self._mark_disconnected()

+        websocket = getattr(self._stream_client, "websocket", None)
+        if websocket is not None:
+            try:
+                await websocket.close()
+            except Exception as e:
+                logger.debug("[%s] websocket close during disconnect failed: %s", self.name, e)
+
        if self._stream_task:
            self._stream_task.cancel()
            try:
-                await self._stream_task
-            except asyncio.CancelledError:
-                pass
+                await asyncio.wait_for(self._stream_task, timeout=2.0)
+            except (asyncio.CancelledError, asyncio.TimeoutError):
+                logger.debug("[%s] stream task did not exit cleanly during disconnect", self.name)
            self._stream_task = None

        if self._http_client:
@@ -171,6 +191,118 @@ class DingTalkAdapter(BasePlatformAdapter):
        self._dedup.clear()
        logger.info("[%s] Disconnected", self.name)

+    # -- Group gating --------------------------------------------------------
+
+    def _dingtalk_require_mention(self) -> bool:
+        """Return whether group chats should require an explicit bot trigger."""
+        configured = self.config.extra.get("require_mention")
+        if configured is not None:
+            if isinstance(configured, str):
+                return configured.lower() in ("true", "1", "yes", "on")
+            return bool(configured)
+        return os.getenv("DINGTALK_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
+
+    def _dingtalk_free_response_chats(self) -> Set[str]:
+        raw = self.config.extra.get("free_response_chats")
+        if raw is None:
+            raw = os.getenv("DINGTALK_FREE_RESPONSE_CHATS", "")
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        return {part.strip() for part in str(raw).split(",") if part.strip()}
+
+    def _compile_mention_patterns(self) -> List[re.Pattern]:
+        """Compile optional regex wake-word patterns for group triggers."""
+        patterns = self.config.extra.get("mention_patterns") if self.config.extra else None
+        if patterns is None:
+            raw = os.getenv("DINGTALK_MENTION_PATTERNS", "").strip()
+            if raw:
+                try:
+                    loaded = json.loads(raw)
+                except Exception:
+                    loaded = [part.strip() for part in raw.splitlines() if part.strip()]
+                    if not loaded:
+                        loaded = [part.strip() for part in raw.split(",") if part.strip()]
+                patterns = loaded
+
+        if patterns is None:
+            return []
+        if isinstance(patterns, str):
+            patterns = [patterns]
+        if not isinstance(patterns, list):
+            logger.warning(
+                "[%s] dingtalk mention_patterns must be a list or string; got %s",
+                self.name,
+                type(patterns).__name__,
+            )
+            return []
+
+        compiled: List[re.Pattern] = []
+        for pattern in patterns:
+            if not isinstance(pattern, str) or not pattern.strip():
+                continue
+            try:
+                compiled.append(re.compile(pattern, re.IGNORECASE))
+            except re.error as exc:
+                logger.warning("[%s] Invalid DingTalk mention pattern %r: %s", self.name, pattern, exc)
+        if compiled:
+            logger.info("[%s] Loaded %d DingTalk mention pattern(s)", self.name, len(compiled))
+        return compiled
+
+    def _load_allowed_users(self) -> Set[str]:
+        """Load allowed-users list from config.extra or env var.
+
+        IDs are matched case-insensitively against the sender's ``staff_id`` and
+        ``sender_id``. A wildcard ``*`` disables the check.
+        """
+        raw = self.config.extra.get("allowed_users") if self.config.extra else None
+        if raw is None:
+            raw = os.getenv("DINGTALK_ALLOWED_USERS", "")
+        if isinstance(raw, list):
+            items = [str(part).strip() for part in raw if str(part).strip()]
+        else:
+            items = [part.strip() for part in str(raw).split(",") if part.strip()]
+        return {item.lower() for item in items}
+
+    def _is_user_allowed(self, sender_id: str, sender_staff_id: str) -> bool:
+        if not self._allowed_users or "*" in self._allowed_users:
+            return True
+        candidates = {(sender_id or "").lower(), (sender_staff_id or "").lower()}
+        candidates.discard("")
+        return bool(candidates & self._allowed_users)
+
+    def _message_mentions_bot(self, message: "ChatbotMessage") -> bool:
+        """True if the bot was @-mentioned in a group message.
+
+        dingtalk-stream sets ``is_in_at_list`` on the incoming ChatbotMessage
+        when the bot is addressed via @-mention.
+        """
+        return bool(getattr(message, "is_in_at_list", False))
+
+    def _message_matches_mention_patterns(self, text: str) -> bool:
+        if not text or not self._mention_patterns:
+            return False
+        return any(pattern.search(text) for pattern in self._mention_patterns)
+
+    def _should_process_message(self, message: "ChatbotMessage", text: str, is_group: bool, chat_id: str) -> bool:
+        """Apply DingTalk group trigger rules.
+
+        DMs remain unrestricted (subject to ``allowed_users`` which is enforced
+        earlier). Group messages are accepted when:
+        - the chat is explicitly allowlisted in ``free_response_chats``
+        - ``require_mention`` is disabled
+        - the bot is @mentioned (``is_in_at_list``)
+        - the text matches a configured regex wake-word pattern
+        """
+        if not is_group:
+            return True
+        if chat_id and chat_id in self._dingtalk_free_response_chats():
+            return True
+        if not self._dingtalk_require_mention():
+            return True
+        if self._message_mentions_bot(message):
+            return True
+        return self._message_matches_mention_patterns(text)
+
    # -- Inbound message processing -----------------------------------------

    async def _on_message(self, message: "ChatbotMessage") -> None:
@@ -196,6 +328,22 @@ class DingTalkAdapter(BasePlatformAdapter):
        chat_id = conversation_id or sender_id
        chat_type = "group" if is_group else "dm"

+        # Allowed-users gate (applies to both DM and group)
+        if not self._is_user_allowed(sender_id, sender_staff_id):
+            logger.debug(
+                "[%s] Dropping message from non-allowlisted user staff_id=%s sender_id=%s",
+                self.name, sender_staff_id, sender_id,
+            )
+            return
+
+        # Group mention/pattern gate
+        if not self._should_process_message(message, text, is_group, chat_id):
+            logger.debug(
+                "[%s] Dropping group message that failed mention gate message_id=%s chat_id=%s",
+                self.name, msg_id, chat_id,
+            )
+            return
+
        # Store session webhook for reply routing (validate origin to prevent SSRF)
        session_webhook = getattr(message, "session_webhook", None) or ""
        if session_webhook and chat_id and _DINGTALK_WEBHOOK_RE.match(session_webhook):
@@ -238,18 +386,35 @@ class DingTalkAdapter(BasePlatformAdapter):

    @staticmethod
    def _extract_text(message: "ChatbotMessage") -> str:
-        """Extract plain text from a DingTalk chatbot message."""
-        text = getattr(message, "text", None) or ""
-        if isinstance(text, dict):
-            content = text.get("content", "").strip()
-        else:
-            content = str(text).strip()
+        """Extract plain text from a DingTalk chatbot message.
+
+        Handles both legacy and current dingtalk-stream SDK payload shapes:
+          * legacy: ``message.text`` was a dict ``{"content": "..."}``
+          * >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose
+            ``__str__`` returns ``"TextContent(content=...)"`` — never fall
+            back to ``str(text)`` without extracting ``.content`` first.
+          * rich text moved from ``message.rich_text`` (list) to
+            ``message.rich_text_content.rich_text_list`` (list of dicts).
+        """
+        text = getattr(message, "text", None)
+        content = ""
+        if text is not None:
+            if isinstance(text, dict):
+                content = (text.get("content") or "").strip()
+            elif hasattr(text, "content"):
+                content = str(text.content or "").strip()
+            else:
+                content = str(text).strip()

-        # Fall back to rich text if present
        if not content:
-            rich_text = getattr(message, "rich_text", None)
-            if rich_text and isinstance(rich_text, list):
-                parts = [item["text"] for item in rich_text
+            rich_list = None
+            rtc = getattr(message, "rich_text_content", None)
+            if rtc is not None and hasattr(rtc, "rich_text_list"):
+                rich_list = rtc.rich_text_list
+            if rich_list is None:
+                rich_list = getattr(message, "rich_text", None)
+            if rich_list and isinstance(rich_list, list):
+                parts = [item["text"] for item in rich_list
                         if isinstance(item, dict) and item.get("text")]
                content = " ".join(parts).strip()
        return content
@@ -314,20 +479,43 @@ class _IncomingHandler(ChatbotHandler if DINGTALK_STREAM_AVAILABLE else object):
        self._adapter = adapter
        self._loop = loop

-    def process(self, message: "ChatbotMessage"):
-        """Called by dingtalk-stream in its thread when a message arrives.
+    async def process(self, callback_message):
+        """Called by dingtalk-stream when a message arrives.

-        Schedules the async handler on the main event loop.
+        dingtalk-stream >= 0.24 passes a CallbackMessage whose `.data` contains
+        the chatbot payload. Convert it to ChatbotMessage via
+        ``ChatbotMessage.from_dict()``.
+
+        Message processing is dispatched as a background task so that this
+        method returns the ACK immediately — blocking here would prevent the
+        SDK from sending heartbeats, eventually causing a disconnect.
        """
-        loop = self._loop
-        if loop is None or loop.is_closed():
-            logger.error("[DingTalk] Event loop unavailable, cannot dispatch message")
-            return dingtalk_stream.AckMessage.STATUS_OK, "OK"
-
-        future = asyncio.run_coroutine_threadsafe(self._adapter._on_message(message), loop)
        try:
-            future.result(timeout=60)
+            data = callback_message.data
+            chatbot_msg = ChatbotMessage.from_dict(data)
+
+            # Ensure session_webhook is populated even if the SDK's
+            # from_dict() did not map it (field name mismatch across
+            # SDK versions).
+            if not getattr(chatbot_msg, "session_webhook", None):
+                webhook = (
+                    data.get("sessionWebhook")
+                    or data.get("session_webhook")
+                    or ""
+                )
+                if webhook:
+                    chatbot_msg.session_webhook = webhook
+
+            # Fire-and-forget: return ACK immediately, process in background.
+            asyncio.create_task(self._safe_on_message(chatbot_msg))
        except Exception:
-            logger.exception("[DingTalk] Error processing incoming message")
+            logger.exception("[DingTalk] Error preparing incoming message")

        return dingtalk_stream.AckMessage.STATUS_OK, "OK"
+
+    async def _safe_on_message(self, chatbot_msg: "ChatbotMessage") -> None:
+        """Wrapper that catches exceptions from _on_message."""
+        try:
+            await self._adapter._on_message(chatbot_msg)
+        except Exception:
+            logger.exception("[DingTalk] Error processing incoming message")
@@ -51,7 +51,9 @@ from gateway.platforms.base import (
    ProcessingOutcome,
    SendResult,
    cache_image_from_url,
+    cache_image_from_bytes,
    cache_audio_from_url,
+    cache_audio_from_bytes,
    cache_document_from_bytes,
    SUPPORTED_DOCUMENT_TYPES,
 )
@@ -80,6 +82,41 @@ def check_discord_requirements() -> bool:
    return DISCORD_AVAILABLE


+def _build_allowed_mentions():
+    """Build Discord ``AllowedMentions`` with safe defaults, overridable via env.
+
+    Discord bots default to parsing ``@everyone``, ``@here``, role pings, and
+    user pings when ``allowed_mentions`` is unset on the client — any LLM
+    output or echoed user content that contains ``@everyone`` would therefore
+    ping the whole server. We explicitly deny ``@everyone`` and role pings
+    by default and keep user / replied-user pings enabled so normal
+    conversation still works.
+
+    Override via environment variables (or ``discord.allow_mentions.*`` in
+    config.yaml):
+
+        DISCORD_ALLOW_MENTION_EVERYONE      default false  — @everyone + @here
+        DISCORD_ALLOW_MENTION_ROLES         default false  — @role pings
+        DISCORD_ALLOW_MENTION_USERS         default true   — @user pings
+        DISCORD_ALLOW_MENTION_REPLIED_USER  default true   — reply-ping author
+    """
+    if not DISCORD_AVAILABLE:
+        return None
+
+    def _b(name: str, default: bool) -> bool:
+        raw = os.getenv(name, "").strip().lower()
+        if not raw:
+            return default
+        return raw in ("true", "1", "yes", "on")
+
+    return discord.AllowedMentions(
+        everyone=_b("DISCORD_ALLOW_MENTION_EVERYONE", False),
+        roles=_b("DISCORD_ALLOW_MENTION_ROLES", False),
+        users=_b("DISCORD_ALLOW_MENTION_USERS", True),
+        replied_user=_b("DISCORD_ALLOW_MENTION_REPLIED_USER", True),
+    )
+
+
 class VoiceReceiver:
    """Captures and decodes voice audio from a Discord voice channel.

@@ -458,6 +495,7 @@ class DiscordAdapter(BasePlatformAdapter):
        self._client: Optional[commands.Bot] = None
        self._ready_event = asyncio.Event()
        self._allowed_user_ids: set = set()  # For button approval authorization
+        self._allowed_role_ids: set = set()  # For DISCORD_ALLOWED_ROLES filtering
        # Voice channel state (per-guild)
        self._voice_clients: Dict[int, Any] = {}  # guild_id -> VoiceClient
        # Text batching: merge rapid successive messages (Telegram-style)
@@ -536,6 +574,15 @@ class DiscordAdapter(BasePlatformAdapter):
                    if uid.strip()
                }

+            # Parse DISCORD_ALLOWED_ROLES — comma-separated role IDs.
+            # Users with ANY of these roles can interact with the bot.
+            roles_env = os.getenv("DISCORD_ALLOWED_ROLES", "")
+            if roles_env:
+                self._allowed_role_ids = {
+                    int(rid.strip()) for rid in roles_env.split(",")
+                    if rid.strip().isdigit()
+                }
+
            # Set up intents.
            # Message Content is required for normal text replies.
            # Server Members is only needed when the allowlist contains usernames
@@ -547,7 +594,10 @@ class DiscordAdapter(BasePlatformAdapter):
            intents.message_content = True
            intents.dm_messages = True
            intents.guild_messages = True
-            intents.members = any(not entry.isdigit() for entry in self._allowed_user_ids)
+            intents.members = (
+                any(not entry.isdigit() for entry in self._allowed_user_ids)
+                or bool(self._allowed_role_ids)  # Need members intent for role lookup
+            )
            intents.voice_states = True

            # Resolve proxy (DISCORD_PROXY > generic env vars > macOS system proxy)
@@ -556,10 +606,15 @@ class DiscordAdapter(BasePlatformAdapter):
            if proxy_url:
                logger.info("[%s] Using proxy for Discord: %s", self.name, proxy_url)

-            # Create bot — proxy= for HTTP, connector= for SOCKS
+            # Create bot — proxy= for HTTP, connector= for SOCKS.
+            # allowed_mentions is set with safe defaults (no @everyone/roles)
+            # so LLM output or echoed user content can't ping the whole
+            # server; override per DISCORD_ALLOW_MENTION_* env vars or the
+            # discord.allow_mentions.* block in config.yaml.
            self._client = commands.Bot(
                command_prefix="!",  # Not really used, we handle raw messages
                intents=intents,
+                allowed_mentions=_build_allowed_mentions(),
                **proxy_kwargs_for_bot(proxy_url),
            )
            adapter_self = self  # capture for closure
@@ -594,14 +649,13 @@ class DiscordAdapter(BasePlatformAdapter):
                if message.type not in (discord.MessageType.default, discord.MessageType.reply):
                    return

-                # Check if the message author is in the allowed user list
-                if not self._is_allowed_user(str(message.author.id)):
-                    return
-
                # Bot message filtering (DISCORD_ALLOW_BOTS):
                #   "none"     — ignore all other bots (default)
                #   "mentions" — accept bot messages only when they @mention us
                #   "all"      — accept all bot messages
+                # Must run BEFORE the user allowlist check so that bots
+                # permitted by DISCORD_ALLOW_BOTS are not rejected for
+                # not being in DISCORD_ALLOWED_USERS (fixes #4466).
                if getattr(message.author, "bot", False):
                    allow_bots = os.getenv("DISCORD_ALLOW_BOTS", "none").lower().strip()
                    if allow_bots == "none":
@@ -609,7 +663,12 @@ class DiscordAdapter(BasePlatformAdapter):
                    elif allow_bots == "mentions":
                        if not self._client.user or self._client.user not in message.mentions:
                            return
-                    # "all" falls through to handle_message
+                    # "all" falls through; bot is permitted — skip the
+                    # human-user allowlist below (bots aren't in it).
+                else:
+                    # Non-bot: enforce the configured user/role allowlists.
+                    if not self._is_allowed_user(str(message.author.id), message.author):
+                        return
                
                # Multi-agent filtering: if the message mentions specific bots
                # but NOT this bot, the sender is talking to another agent —
@@ -833,7 +892,10 @@ class DiscordAdapter(BasePlatformAdapter):
            if reply_to and self._reply_to_mode != "off":
                try:
                    ref_msg = await channel.fetch_message(int(reply_to))
-                    reference = ref_msg
+                    if hasattr(ref_msg, "to_reference"):
+                        reference = ref_msg.to_reference(fail_if_not_exists=False)
+                    else:
+                        reference = ref_msg
                except Exception as e:
                    logger.debug("Could not fetch reply-to message: %s", e)

@@ -851,14 +913,20 @@ class DiscordAdapter(BasePlatformAdapter):
                    err_text = str(e)
                    if (
                        chunk_reference is not None
-                        and "error code: 50035" in err_text
-                        and "Cannot reply to a system message" in err_text
+                        and (
+                            (
+                                "error code: 50035" in err_text
+                                and "Cannot reply to a system message" in err_text
+                            )
+                            or "error code: 10008" in err_text
+                        )
                    ):
                        logger.warning(
-                            "[%s] Reply target %s is a Discord system message; retrying send without reply reference",
+                            "[%s] Reply target %s rejected the reply reference; retrying send without reply reference",
                            self.name,
                            reply_to,
                        )
+                        reference = None
                        msg = await channel.send(
                            content=chunk,
                            reference=None,
@@ -1310,11 +1378,48 @@ class DiscordAdapter(BasePlatformAdapter):
            except OSError:
                pass

-    def _is_allowed_user(self, user_id: str) -> bool:
-        """Check if user is in DISCORD_ALLOWED_USERS."""
-        if not self._allowed_user_ids:
+    def _is_allowed_user(self, user_id: str, author=None) -> bool:
+        """Check if user is allowed via DISCORD_ALLOWED_USERS or DISCORD_ALLOWED_ROLES.
+
+        Uses OR semantics: if the user matches EITHER allowlist, they're allowed.
+        If both allowlists are empty, everyone is allowed (backwards compatible).
+        When author is a Member, checks .roles directly; otherwise falls back
+        to scanning the bot's mutual guilds for a Member record.
+        """
+        # ``getattr`` fallbacks here guard against test fixtures that build
+        # an adapter via ``object.__new__(DiscordAdapter)`` and skip __init__
+        # (see AGENTS.md pitfall #17 — same pattern as gateway.run).
+        allowed_users = getattr(self, "_allowed_user_ids", set())
+        allowed_roles = getattr(self, "_allowed_role_ids", set())
+        has_users = bool(allowed_users)
+        has_roles = bool(allowed_roles)
+        if not has_users and not has_roles:
            return True
-        return user_id in self._allowed_user_ids
+        # Check user ID allowlist
+        if has_users and user_id in allowed_users:
+            return True
+        # Check role allowlist
+        if has_roles:
+            # Try direct role check from Member object
+            direct_roles = getattr(author, "roles", None) if author is not None else None
+            if direct_roles:
+                if any(getattr(r, "id", None) in allowed_roles for r in direct_roles):
+                    return True
+            # Fallback: scan mutual guilds for member's roles
+            if self._client is not None:
+                try:
+                    uid_int = int(user_id)
+                except (TypeError, ValueError):
+                    uid_int = None
+                if uid_int is not None:
+                    for guild in self._client.guilds:
+                        m = guild.get_member(uid_int)
+                        if m is None:
+                            continue
+                        m_roles = getattr(m, "roles", None) or []
+                        if any(getattr(r, "id", None) in allowed_roles for r in m_roles):
+                            return True
+        return False

    async def send_image_file(
        self,
@@ -1904,12 +2009,23 @@ class DiscordAdapter(BasePlatformAdapter):
        self._register_skill_group(tree)

    def _register_skill_group(self, tree) -> None:
-        """Register a ``/skill`` command group with category subcommand groups.
+        """Register a single ``/skill`` command with autocomplete on the name.

-        Skills are organized by their directory category under ``SKILLS_DIR``.
-        Each category becomes a subcommand group; root-level skills become
-        direct subcommands.  Discord supports 25 subcommand groups × 25
-        subcommands each = 625 skills — well beyond the old 100-command cap.
+        Discord enforces an ~8000-byte per-command payload limit. The older
+        nested layout (``/skill <category> <name>``) registered one giant
+        command whose serialized payload grew linearly with the skill
+        catalog — with the default ~75 skills the payload was ~14 KB and
+        ``tree.sync()`` rejected the entire slash-command batch (issues
+        #11321, #10259, #11385, #10261, #10214).
+
+        Autocomplete options are fetched dynamically by Discord when the
+        user types — they do NOT count against the per-command registration
+        budget. So we register ONE flat ``/skill`` command with
+        ``name: str`` (autocompleted) and ``args: str = ""``. This scales
+        to thousands of skills with no size math, no splitting, and no
+        hidden skills. The slash picker also becomes more discoverable —
+        Discord live-filters by the user's typed prefix against both the
+        skill name and its description.
        """
        try:
            from hermes_cli.commands import discord_skill_commands_by_category
@@ -1920,68 +2036,97 @@ class DiscordAdapter(BasePlatformAdapter):
            except Exception:
                pass

+            # Reuse the existing collector for consistent filtering
+            # (per-platform disabled, hub-excluded, name clamping), then
+            # flatten — the category grouping was only useful for the
+            # nested layout.
            categories, uncategorized, hidden = discord_skill_commands_by_category(
                reserved_names=existing_names,
            )
+            entries: list[tuple[str, str, str]] = list(uncategorized)
+            for cat_skills in categories.values():
+                entries.extend(cat_skills)

-            if not categories and not uncategorized:
+            if not entries:
                return

-            skill_group = discord.app_commands.Group(
+            # Stable alphabetical order so the autocomplete suggestion
+            # list is predictable across restarts.
+            entries.sort(key=lambda t: t[0])
+
+            # name -> (description, cmd_key) — used by both the autocomplete
+            # callback and the handler for O(1) dispatch.
+            skill_lookup: dict[str, tuple[str, str]] = {
+                n: (d, k) for n, d, k in entries
+            }
+
+            async def _autocomplete_name(
+                interaction: "discord.Interaction", current: str,
+            ) -> list:
+                """Filter skills by the user's typed prefix.
+
+                Matches both the skill name and its description so
+                "/skill pdf" surfaces skills whose description mentions
+                PDFs even if the name doesn't. Discord caps this list at
+                25 entries per query.
+                """
+                q = (current or "").strip().lower()
+                choices: list = []
+                for name, desc, _key in entries:
+                    if not q or q in name.lower() or (desc and q in desc.lower()):
+                        if desc:
+                            label = f"{name} — {desc}"
+                        else:
+                            label = name
+                        # Discord's Choice.name is capped at 100 chars.
+                        if len(label) > 100:
+                            label = label[:97] + "..."
+                        choices.append(
+                            discord.app_commands.Choice(name=label, value=name)
+                        )
+                        if len(choices) >= 25:
+                            break
+                return choices
+
+            @discord.app_commands.describe(
+                name="Which skill to run",
+                args="Optional arguments for the skill",
+            )
+            @discord.app_commands.autocomplete(name=_autocomplete_name)
+            async def _skill_handler(
+                interaction: "discord.Interaction", name: str, args: str = "",
+            ):
+                entry = skill_lookup.get(name)
+                if not entry:
+                    await interaction.response.send_message(
+                        f"Unknown skill: `{name}`. Start typing for "
+                        f"autocomplete suggestions.",
+                        ephemeral=True,
+                    )
+                    return
+                _desc, cmd_key = entry
+                await self._run_simple_slash(
+                    interaction, f"{cmd_key} {args}".strip()
+                )
+
+            cmd = discord.app_commands.Command(
                name="skill",
                description="Run a Hermes skill",
+                callback=_skill_handler,
            )
+            tree.add_command(cmd)

-            # ── Helper: build a callback for a skill command key ──
-            def _make_handler(_key: str):
-                @discord.app_commands.describe(args="Optional arguments for the skill")
-                async def _handler(interaction: discord.Interaction, args: str = ""):
-                    await self._run_simple_slash(interaction, f"{_key} {args}".strip())
-                _handler.__name__ = f"skill_{_key.lstrip('/').replace('-', '_')}"
-                return _handler
-
-            # ── Uncategorized (root-level) skills → direct subcommands ──
-            for discord_name, description, cmd_key in uncategorized:
-                cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=description or f"Run the {discord_name} skill",
-                    callback=_make_handler(cmd_key),
-                )
-                skill_group.add_command(cmd)
-
-            # ── Category subcommand groups ──
-            for cat_name in sorted(categories):
-                cat_desc = f"{cat_name.replace('-', ' ').title()} skills"
-                if len(cat_desc) > 100:
-                    cat_desc = cat_desc[:97] + "..."
-                cat_group = discord.app_commands.Group(
-                    name=cat_name,
-                    description=cat_desc,
-                    parent=skill_group,
-                )
-                for discord_name, description, cmd_key in categories[cat_name]:
-                    cmd = discord.app_commands.Command(
-                        name=discord_name,
-                        description=description or f"Run the {discord_name} skill",
-                        callback=_make_handler(cmd_key),
-                    )
-                    cat_group.add_command(cmd)
-
-            tree.add_command(skill_group)
-
-            total = sum(len(v) for v in categories.values()) + len(uncategorized)
            logger.info(
-                "[%s] Registered /skill group: %d skill(s) across %d categories"
-                " + %d uncategorized",
-                self.name, total, len(categories), len(uncategorized),
+                "[%s] Registered /skill command with %d skill(s) via autocomplete",
+                self.name, len(entries),
            )
            if hidden:
-                logger.warning(
-                    "[%s] %d skill(s) not registered (Discord subcommand limits)",
+                logger.info(
+                    "[%s] %d skill(s) filtered out of /skill (name clamp / reserved)",
                    self.name, hidden,
                )
        except Exception as exc:
-            logger.warning("[%s] Failed to register /skill group: %s", self.name, exc)
+            logger.warning("[%s] Failed to register /skill command: %s", self.name, exc)

    def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
        """Build a MessageEvent from a Discord slash command interaction."""
@@ -2140,6 +2285,26 @@ class DiscordAdapter(BasePlatformAdapter):
        from gateway.platforms.base import resolve_channel_prompt
        return resolve_channel_prompt(self.config.extra, channel_id, parent_id)

+    def _discord_require_mention(self) -> bool:
+        """Return whether Discord channel messages require a bot mention."""
+        configured = self.config.extra.get("require_mention")
+        if configured is not None:
+            if isinstance(configured, str):
+                return configured.lower() not in ("false", "0", "no", "off")
+            return bool(configured)
+        return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
+
+    def _discord_free_response_channels(self) -> set:
+        """Return Discord channel IDs where no bot mention is required."""
+        raw = self.config.extra.get("free_response_channels")
+        if raw is None:
+            raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        if isinstance(raw, str) and raw.strip():
+            return {part.strip() for part in raw.split(",") if part.strip()}
+        return set()
+
    def _thread_parent_channel(self, channel: Any) -> Any:
        """Return the parent text channel when invoked from a thread."""
        return getattr(channel, "parent", None) or channel
@@ -2242,8 +2407,15 @@ class DiscordAdapter(BasePlatformAdapter):

        Returns the created thread object, or ``None`` on failure.
        """
-        # Build a short thread name from the message
+        # Build a short thread name from the message. Strip Discord mention
+        # syntax (users / roles / channels) so thread titles don't end up
+        # showing raw <@id>, <@&id>, or <#id> markers — the ID isn't
+        # meaningful to humans glancing at the thread list (#6336).
        content = (message.content or "").strip()
+        # <@123>, <@!123>, <@&123>, <#123> — collapse to empty; normalize spaces.
+        content = re.sub(r"<@[!&]?\d+>", "", content)
+        content = re.sub(r"<#\d+>", "", content)
+        content = re.sub(r"\s+", " ", content).strip()
        thread_name = content[:80] if content else "Hermes"
        if len(content) > 80:
            thread_name = thread_name[:77] + "..."
@@ -2251,9 +2423,25 @@ class DiscordAdapter(BasePlatformAdapter):
        try:
            thread = await message.create_thread(name=thread_name, auto_archive_duration=1440)
            return thread
-        except Exception as e:
-            logger.warning("[%s] Auto-thread creation failed: %s", self.name, e)
-            return None
+        except Exception as direct_error:
+            display_name = getattr(getattr(message, "author", None), "display_name", None) or "unknown user"
+            reason = f"Auto-threaded from mention by {display_name}"
+            try:
+                seed_msg = await message.channel.send(f"\U0001f9f5 Thread created by Hermes: **{thread_name}**")
+                thread = await seed_msg.create_thread(
+                    name=thread_name,
+                    auto_archive_duration=1440,
+                    reason=reason,
+                )
+                return thread
+            except Exception as fallback_error:
+                logger.warning(
+                    "[%s] Auto-thread creation failed. Direct error: %s. Fallback error: %s",
+                    self.name,
+                    direct_error,
+                    fallback_error,
+                )
+                return None

    async def send_exec_approval(
        self, chat_id: str, command: str, session_key: str,
@@ -2440,6 +2628,124 @@ class DiscordAdapter(BasePlatformAdapter):
            return f"{parent_name} / {thread_name}"
        return thread_name

+    # ------------------------------------------------------------------
+    # Attachment download helpers
+    #
+    # Discord attachments (images / audio / documents) are fetched via the
+    # authenticated bot session whenever the Attachment object exposes
+    # ``read()``. That sidesteps two classes of bug that hit the older
+    # plain-HTTP path:
+    #
+    #   1. ``cdn.discordapp.com`` URLs increasingly require bot auth on
+    #      download — unauthenticated httpx sees 403 Forbidden.
+    #      (issue #8242)
+    #   2. Some user environments (VPNs, corporate DNS, tunnels) resolve
+    #      ``cdn.discordapp.com`` to private-looking IPs that our
+    #      ``is_safe_url`` guard classifies as SSRF risks. Routing the
+    #      fetch through discord.py's own HTTP client handles DNS
+    #      internally so our guard isn't consulted for the attachment
+    #      path. (issue #6587)
+    #
+    # If ``att.read()`` is unavailable (unexpected object shape / test
+    # stub) or the bot session fetch fails, we fall back to the existing
+    # SSRF-gated URL downloaders. The fallback keeps defense-in-depth
+    # against any future Discord payload-schema drift that could slip a
+    # non-CDN URL into the ``att.url`` field. (issue #11345)
+    # ------------------------------------------------------------------
+
+    async def _read_attachment_bytes(self, att) -> Optional[bytes]:
+        """Read an attachment via discord.py's authenticated bot session.
+
+        Returns the raw bytes on success, or ``None`` if ``att`` doesn't
+        expose a callable ``read()`` or the read itself fails. Callers
+        should treat ``None`` as a signal to fall back to the URL-based
+        downloaders.
+        """
+        reader = getattr(att, "read", None)
+        if reader is None or not callable(reader):
+            return None
+        try:
+            return await reader()
+        except Exception as e:
+            logger.warning(
+                "[Discord] Authenticated attachment read failed for %s: %s",
+                getattr(att, "filename", None) or getattr(att, "url", "<unknown>"),
+                e,
+            )
+            return None
+
+    async def _cache_discord_image(self, att, ext: str) -> str:
+        """Cache a Discord image attachment to local disk.
+
+        Primary path: ``att.read()`` + ``cache_image_from_bytes``
+        (authenticated, no SSRF gate).
+
+        Fallback: ``cache_image_from_url`` (plain httpx, SSRF-gated).
+        """
+        raw_bytes = await self._read_attachment_bytes(att)
+        if raw_bytes is not None:
+            try:
+                return cache_image_from_bytes(raw_bytes, ext=ext)
+            except Exception as e:
+                logger.debug(
+                    "[Discord] cache_image_from_bytes rejected att.read() data; falling back to URL: %s",
+                    e,
+                )
+        return await cache_image_from_url(att.url, ext=ext)
+
+    async def _cache_discord_audio(self, att, ext: str) -> str:
+        """Cache a Discord audio attachment to local disk.
+
+        Primary path: ``att.read()`` + ``cache_audio_from_bytes``
+        (authenticated, no SSRF gate).
+
+        Fallback: ``cache_audio_from_url`` (plain httpx, SSRF-gated).
+        """
+        raw_bytes = await self._read_attachment_bytes(att)
+        if raw_bytes is not None:
+            try:
+                return cache_audio_from_bytes(raw_bytes, ext=ext)
+            except Exception as e:
+                logger.debug(
+                    "[Discord] cache_audio_from_bytes failed; falling back to URL: %s",
+                    e,
+                )
+        return await cache_audio_from_url(att.url, ext=ext)
+
+    async def _cache_discord_document(self, att, ext: str) -> bytes:
+        """Download a Discord document attachment and return the raw bytes.
+
+        Primary path: ``att.read()`` (authenticated, no SSRF gate).
+
+        Fallback: SSRF-gated ``aiohttp`` download. This closes the gap
+        where the old document path made raw ``aiohttp.ClientSession``
+        requests with no safety check (#11345). The caller is responsible
+        for passing the returned bytes to ``cache_document_from_bytes``
+        (and, where applicable, for injecting text content).
+        """
+        raw_bytes = await self._read_attachment_bytes(att)
+        if raw_bytes is not None:
+            return raw_bytes
+
+        # Fallback: SSRF-gated URL download.
+        if not is_safe_url(att.url):
+            raise ValueError(
+                f"Blocked unsafe attachment URL (SSRF protection): {att.url}"
+            )
+        import aiohttp
+        from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+        _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
+        _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
+        async with aiohttp.ClientSession(**_sess_kw) as session:
+            async with session.get(
+                att.url,
+                timeout=aiohttp.ClientTimeout(total=30),
+                **_req_kw,
+            ) as resp:
+                if resp.status != 200:
+                    raise Exception(f"HTTP {resp.status}")
+                return await resp.read()
+
    async def _handle_message(self, message: DiscordMessage) -> None:
        """Handle incoming Discord messages."""
        # In server channels (not DMs), require the bot to be @mentioned
@@ -2482,12 +2788,11 @@ class DiscordAdapter(BasePlatformAdapter):
                logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
                return

-            free_channels_raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
-            free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()}
+            free_channels = self._discord_free_response_channels()
            if parent_channel_id:
                channel_ids.add(parent_channel_id)

-            require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
+            require_mention = self._discord_require_mention()
            # Voice-linked text channels act as free-response while voice is active.
            # Only the exact bound channel gets the exemption, not sibling threads.
            voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
@@ -2515,9 +2820,10 @@ class DiscordAdapter(BasePlatformAdapter):
        if not is_thread and not isinstance(message.channel, discord.DMChannel):
            no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "")
            no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
-            skip_thread = bool(channel_ids & no_thread_channels)
+            skip_thread = bool(channel_ids & no_thread_channels) or is_free_channel
            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
-            if auto_thread and not skip_thread and not is_voice_linked_channel:
+            is_reply_message = getattr(message, "type", None) == discord.MessageType.reply
+            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
                    is_thread = True
@@ -2578,6 +2884,7 @@ class DiscordAdapter(BasePlatformAdapter):
            user_name=message.author.display_name,
            thread_id=thread_id,
            chat_topic=chat_topic,
+            is_bot=getattr(message.author, "bot", False),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -2593,7 +2900,7 @@ class DiscordAdapter(BasePlatformAdapter):
                    ext = "." + content_type.split("/")[-1].split(";")[0]
                    if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
                        ext = ".jpg"
-                    cached_path = await cache_image_from_url(att.url, ext=ext)
+                    cached_path = await self._cache_discord_image(att, ext)
                    media_urls.append(cached_path)
                    media_types.append(content_type)
                    print(f"[Discord] Cached user image: {cached_path}", flush=True)
@@ -2607,7 +2914,7 @@ class DiscordAdapter(BasePlatformAdapter):
                    ext = "." + content_type.split("/")[-1].split(";")[0]
                    if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
                        ext = ".ogg"
-                    cached_path = await cache_audio_from_url(att.url, ext=ext)
+                    cached_path = await self._cache_discord_audio(att, ext)
                    media_urls.append(cached_path)
                    media_types.append(content_type)
                    print(f"[Discord] Cached user audio: {cached_path}", flush=True)
@@ -2638,19 +2945,7 @@ class DiscordAdapter(BasePlatformAdapter):
                        )
                    else:
                        try:
-                            import aiohttp
-                            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
-                            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
-                            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
-                            async with aiohttp.ClientSession(**_sess_kw) as session:
-                                async with session.get(
-                                    att.url,
-                                    timeout=aiohttp.ClientTimeout(total=30),
-                                    **_req_kw,
-                                ) as resp:
-                                    if resp.status != 200:
-                                        raise Exception(f"HTTP {resp.status}")
-                                    raw_bytes = await resp.read()
+                            raw_bytes = await self._cache_discord_document(att, ext)
                            cached_path = cache_document_from_bytes(
                                raw_bytes, att.filename or f"document{ext}"
                            )
@@ -1073,6 +1073,13 @@ class FeishuAdapter(BasePlatformAdapter):
        self._webhook_rate_counts: Dict[str, tuple[int, float]] = {}  # rate_key → (count, window_start)
        self._webhook_anomaly_counts: Dict[str, tuple[int, str, float]] = {}  # ip → (count, last_status, first_seen)
        self._card_action_tokens: Dict[str, float] = {}  # token → first_seen_time
+        # Inbound events that arrived before the adapter loop was ready
+        # (e.g. during startup/restart or network-flap reconnect). A single
+        # drainer thread replays them as soon as the loop becomes available.
+        self._pending_inbound_events: List[Any] = []
+        self._pending_inbound_lock = threading.Lock()
+        self._pending_drain_scheduled = False
+        self._pending_inbound_max_depth = 1000  # cap queue; drop oldest beyond
        self._chat_locks: Dict[str, asyncio.Lock] = {}  # chat_id → lock (per-chat serial processing)
        self._sent_message_ids_to_chat: Dict[str, str] = {}  # message_id → chat_id (for reaction routing)
        self._sent_message_id_order: List[str] = []  # LRU order for _sent_message_ids_to_chat
@@ -1219,6 +1226,8 @@ class FeishuAdapter(BasePlatformAdapter):
            .register_p2_card_action_trigger(self._on_card_action_trigger)
            .register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
            .register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
+            .register_p2_im_chat_access_event_bot_p2p_chat_entered_v1(self._on_p2p_chat_entered)
+            .register_p2_im_message_recalled_v1(self._on_message_recalled)
            .build()
        )

@@ -1757,10 +1766,22 @@ class FeishuAdapter(BasePlatformAdapter):
    # =========================================================================

    def _on_message_event(self, data: Any) -> None:
-        """Normalize Feishu inbound events into MessageEvent."""
+        """Normalize Feishu inbound events into MessageEvent.
+
+        Called by the lark_oapi SDK's event dispatcher on a background thread.
+        If the adapter loop is not currently accepting callbacks (brief window
+        during startup/restart or network-flap reconnect), the event is queued
+        for replay instead of dropped.
+        """
        loop = self._loop
-        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
-            logger.warning("[Feishu] Dropping inbound message before adapter loop is ready")
+        if not self._loop_accepts_callbacks(loop):
+            start_drainer = self._enqueue_pending_inbound_event(data)
+            if start_drainer:
+                threading.Thread(
+                    target=self._drain_pending_inbound_events,
+                    name="feishu-pending-inbound-drainer",
+                    daemon=True,
+                ).start()
            return
        future = asyncio.run_coroutine_threadsafe(
            self._handle_message_event_data(data),
@@ -1768,6 +1789,124 @@ class FeishuAdapter(BasePlatformAdapter):
        )
        future.add_done_callback(self._log_background_failure)

+    def _enqueue_pending_inbound_event(self, data: Any) -> bool:
+        """Append an event to the pending-inbound queue.
+
+        Returns True if the caller should spawn a drainer thread (no drainer
+        currently scheduled), False if a drainer is already running and will
+        pick up the new event on its next pass.
+        """
+        with self._pending_inbound_lock:
+            if len(self._pending_inbound_events) >= self._pending_inbound_max_depth:
+                # Queue full — drop the oldest to make room. This happens only
+                # if the loop stays unavailable for an extended period AND the
+                # WS keeps firing callbacks. Still better than silent drops.
+                dropped = self._pending_inbound_events.pop(0)
+                try:
+                    event = getattr(dropped, "event", None)
+                    message = getattr(event, "message", None)
+                    message_id = str(getattr(message, "message_id", "") or "unknown")
+                except Exception:
+                    message_id = "unknown"
+                logger.error(
+                    "[Feishu] Pending-inbound queue full (%d); dropped oldest event %s",
+                    self._pending_inbound_max_depth,
+                    message_id,
+                )
+            self._pending_inbound_events.append(data)
+            depth = len(self._pending_inbound_events)
+            should_start = not self._pending_drain_scheduled
+            if should_start:
+                self._pending_drain_scheduled = True
+        logger.warning(
+            "[Feishu] Queued inbound event for replay (loop not ready, queue depth=%d)",
+            depth,
+        )
+        return should_start
+
+    def _drain_pending_inbound_events(self) -> None:
+        """Replay queued inbound events once the adapter loop is ready.
+
+        Runs in a dedicated daemon thread. Polls ``_running`` and
+        ``_loop_accepts_callbacks`` until events can be dispatched or the
+        adapter shuts down. A single drainer handles the entire queue;
+        concurrent ``_on_message_event`` calls just append.
+        """
+        poll_interval = 0.25
+        max_wait_seconds = 120.0  # safety cap: drop queue after 2 minutes
+        waited = 0.0
+        try:
+            while True:
+                if not getattr(self, "_running", True):
+                    # Adapter shutting down — drop queued events rather than
+                    # holding them against a closed loop.
+                    with self._pending_inbound_lock:
+                        dropped = len(self._pending_inbound_events)
+                        self._pending_inbound_events.clear()
+                    if dropped:
+                        logger.warning(
+                            "[Feishu] Dropped %d queued inbound event(s) during shutdown",
+                            dropped,
+                        )
+                    return
+                loop = self._loop
+                if self._loop_accepts_callbacks(loop):
+                    with self._pending_inbound_lock:
+                        batch = self._pending_inbound_events[:]
+                        self._pending_inbound_events.clear()
+                    if not batch:
+                        # Queue emptied between check and grab; done.
+                        with self._pending_inbound_lock:
+                            if not self._pending_inbound_events:
+                                return
+                        continue
+                    dispatched = 0
+                    requeue: List[Any] = []
+                    for event in batch:
+                        try:
+                            fut = asyncio.run_coroutine_threadsafe(
+                                self._handle_message_event_data(event),
+                                loop,
+                            )
+                            fut.add_done_callback(self._log_background_failure)
+                            dispatched += 1
+                        except RuntimeError:
+                            # Loop closed between check and submit — requeue
+                            # and poll again.
+                            requeue.append(event)
+                    if requeue:
+                        with self._pending_inbound_lock:
+                            self._pending_inbound_events[:0] = requeue
+                    if dispatched:
+                        logger.info(
+                            "[Feishu] Replayed %d queued inbound event(s)",
+                            dispatched,
+                        )
+                    if not requeue:
+                        # Successfully drained; check if more arrived while
+                        # we were dispatching and exit if not.
+                        with self._pending_inbound_lock:
+                            if not self._pending_inbound_events:
+                                return
+                    # More events queued or requeue pending — loop again.
+                    continue
+                if waited >= max_wait_seconds:
+                    with self._pending_inbound_lock:
+                        dropped = len(self._pending_inbound_events)
+                        self._pending_inbound_events.clear()
+                    logger.error(
+                        "[Feishu] Adapter loop unavailable for %.0fs; "
+                        "dropped %d queued inbound event(s)",
+                        max_wait_seconds,
+                        dropped,
+                    )
+                    return
+                time.sleep(poll_interval)
+                waited += poll_interval
+        finally:
+            with self._pending_inbound_lock:
+                self._pending_drain_scheduled = False
+
    async def _handle_message_event_data(self, data: Any) -> None:
        """Shared inbound message handling for websocket and webhook transports."""
        event = getattr(data, "event", None)
@@ -1820,6 +1959,12 @@ class FeishuAdapter(BasePlatformAdapter):
        logger.info("[Feishu] Bot removed from chat: %s", chat_id)
        self._chat_info_cache.pop(chat_id, None)

+    def _on_p2p_chat_entered(self, data: Any) -> None:
+        logger.debug("[Feishu] User entered P2P chat with bot")
+
+    def _on_message_recalled(self, data: Any) -> None:
+        logger.debug("[Feishu] Message recalled by user")
+
    def _on_reaction_event(self, event_type: str, data: Any) -> None:
        """Route user reactions on bot messages as synthetic text events."""
        event = getattr(data, "event", None)
@@ -64,6 +64,7 @@ from gateway.platforms.base import (
    MessageEvent,
    MessageType,
    SendResult,
+    _ssrf_redirect_guard,
    cache_document_from_bytes,
    cache_image_from_bytes,
 )
@@ -226,7 +227,11 @@ class QQAdapter(BasePlatformAdapter):
            return False

        try:
-            self._http_client = httpx.AsyncClient(timeout=30.0, follow_redirects=True)
+            self._http_client = httpx.AsyncClient(
+                timeout=30.0,
+                follow_redirects=True,
+                event_hooks={"response": [_ssrf_redirect_guard]},
+            )

            # 1. Get access token
            await self._ensure_token()
@@ -1101,6 +1106,11 @@ class QQAdapter(BasePlatformAdapter):
            is_pre_wav = True
            logger.info("[QQ] STT: using voice_wav_url (pre-converted WAV)")

+        from tools.url_safety import is_safe_url
+        if not is_safe_url(download_url):
+            logger.warning("[QQ] STT blocked unsafe URL: %s", download_url[:80])
+            return None
+
        try:
            # 2. Download audio (QQ CDN requires Authorization header)
            if not self._http_client:
@@ -1525,6 +1535,33 @@ class QQAdapter(BasePlatformAdapter):

        raise last_exc  # type: ignore[misc]

+    # Maximum time (seconds) to wait for reconnection before giving up on send.
+    _RECONNECT_WAIT_SECONDS = 15.0
+    # How often (seconds) to poll is_connected while waiting.
+    _RECONNECT_POLL_INTERVAL = 0.5
+
+    async def _wait_for_reconnection(self) -> bool:
+        """Wait for the WebSocket listener to reconnect.
+
+        The listener loop (_listen_loop) auto-reconnects on disconnect, but
+        there is a race window where send() is called right after a disconnect
+        and before the reconnect completes.  This method polls is_connected
+        for up to _RECONNECT_WAIT_SECONDS.
+
+        Returns True if reconnected, False if still disconnected.
+        """
+        logger.info("[%s] Not connected — waiting for reconnection (up to %.0fs)",
+                    self.name, self._RECONNECT_WAIT_SECONDS)
+        waited = 0.0
+        while waited < self._RECONNECT_WAIT_SECONDS:
+            await asyncio.sleep(self._RECONNECT_POLL_INTERVAL)
+            waited += self._RECONNECT_POLL_INTERVAL
+            if self.is_connected:
+                logger.info("[%s] Reconnected after %.1fs", self.name, waited)
+                return True
+        logger.warning("[%s] Still not connected after %.0fs", self.name, self._RECONNECT_WAIT_SECONDS)
+        return False
+
    async def send(
        self,
        chat_id: str,
@@ -1540,7 +1577,8 @@ class QQAdapter(BasePlatformAdapter):
        del metadata

        if not self.is_connected:
-            return SendResult(success=False, error="Not connected")
+            if not await self._wait_for_reconnection():
+                return SendResult(success=False, error="Not connected", retryable=True)

        if not content or not content.strip():
            return SendResult(success=True)
@@ -1741,7 +1779,8 @@ class QQAdapter(BasePlatformAdapter):
    ) -> SendResult:
        """Upload media and send as a native message."""
        if not self.is_connected:
-            return SendResult(success=False, error="Not connected")
+            if not await self._wait_for_reconnection():
+                return SendResult(success=False, error="Not connected", retryable=True)

        try:
            # Resolve media source
@@ -28,7 +28,7 @@ import uuid
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import quote
+from urllib.parse import quote, urlparse

 logger = logging.getLogger(__name__)

@@ -96,6 +96,28 @@ MEDIA_VIDEO = 2
 MEDIA_FILE = 3
 MEDIA_VOICE = 4

+_LIVE_ADAPTERS: Dict[str, Any] = {}
+
+
+def _make_ssl_connector() -> Optional["aiohttp.TCPConnector"]:
+    """Return a TCPConnector with a certifi CA bundle, or None if certifi is unavailable.
+
+    Tencent's iLink server (``ilinkai.weixin.qq.com``) is not verifiable against
+    some system CA stores (notably Homebrew's OpenSSL on macOS Apple Silicon).
+    When ``certifi`` is installed, use its Mozilla CA bundle to guarantee
+    verification. Otherwise fall back to aiohttp's default (which honors
+    ``SSL_CERT_FILE`` env var via ``trust_env=True``).
+    """
+    try:
+        import ssl
+        import certifi
+    except ImportError:
+        return None
+    if not AIOHTTP_AVAILABLE:
+        return None
+    ssl_ctx = ssl.create_default_context(cafile=certifi.where())
+    return aiohttp.TCPConnector(ssl=ssl_ctx)
+
 ITEM_TEXT = 1
 ITEM_IMAGE = 2
 ITEM_VOICE = 3
@@ -398,7 +420,12 @@ async def _send_message(
    text: str,
    context_token: Optional[str],
    client_id: str,
-) -> None:
+) -> Dict[str, Any]:
+    """Send a text message via iLink sendmessage API.
+
+    Returns the raw API response dict (may contain error codes like
+    ``errcode: -14`` for session expiry that the caller can inspect).
+    """
    if not text or not text.strip():
        raise ValueError("_send_message: text must not be empty")
    message: Dict[str, Any] = {
@@ -411,7 +438,7 @@ async def _send_message(
    }
    if context_token:
        message["context_token"] = context_token
-    await _api_post(
+    return await _api_post(
        session,
        base_url=base_url,
        endpoint=EP_SEND_MESSAGE,
@@ -533,6 +560,39 @@ async def _download_bytes(
        return await response.read()


+_WEIXIN_CDN_ALLOWLIST: frozenset[str] = frozenset(
+    {
+        "novac2c.cdn.weixin.qq.com",
+        "ilinkai.weixin.qq.com",
+        "wx.qlogo.cn",
+        "thirdwx.qlogo.cn",
+        "res.wx.qq.com",
+        "mmbiz.qpic.cn",
+        "mmbiz.qlogo.cn",
+    }
+)
+
+
+def _assert_weixin_cdn_url(url: str) -> None:
+    """Raise ValueError if *url* does not point at a known WeChat CDN host."""
+    try:
+        parsed = urlparse(url)
+        scheme = parsed.scheme.lower()
+        host = parsed.hostname or ""
+    except Exception as exc:  # noqa: BLE001
+        raise ValueError(f"Unparseable media URL: {url!r}") from exc
+
+    if scheme not in ("http", "https"):
+        raise ValueError(
+            f"Media URL has disallowed scheme {scheme!r}; only http/https are permitted."
+        )
+    if host not in _WEIXIN_CDN_ALLOWLIST:
+        raise ValueError(
+            f"Media URL host {host!r} is not in the WeChat CDN allowlist. "
+            "Refusing to fetch to prevent SSRF."
+        )
+
+
 def _media_reference(item: Dict[str, Any], key: str) -> Dict[str, Any]:
    return (item.get(key) or {}).get("media") or {}

@@ -553,6 +613,7 @@ async def _download_and_decrypt_media(
            timeout_seconds=timeout_seconds,
        )
    elif full_url:
+        _assert_weixin_cdn_url(full_url)
        raw = await _download_bytes(session, url=full_url, timeout_seconds=timeout_seconds)
    else:
        raise RuntimeError("media item had neither encrypt_query_param nor full_url")
@@ -623,42 +684,31 @@ def _rewrite_table_block_for_weixin(lines: List[str]) -> str:
 def _normalize_markdown_blocks(content: str) -> str:
    lines = content.splitlines()
    result: List[str] = []
-    i = 0
    in_code_block = False
+    blank_run = 0

-    while i < len(lines):
-        line = lines[i].rstrip()
-        fence_match = _FENCE_RE.match(line.strip())
-        if fence_match:
+    for raw_line in lines:
+        line = raw_line.rstrip()
+        if _FENCE_RE.match(line.strip()):
            in_code_block = not in_code_block
            result.append(line)
-            i += 1
+            blank_run = 0
            continue

        if in_code_block:
            result.append(line)
-            i += 1
            continue

-        if (
-            i + 1 < len(lines)
-            and "|" in lines[i]
-            and _TABLE_RULE_RE.match(lines[i + 1].rstrip())
-        ):
-            table_lines = [lines[i].rstrip(), lines[i + 1].rstrip()]
-            i += 2
-            while i < len(lines) and "|" in lines[i]:
-                table_lines.append(lines[i].rstrip())
-                i += 1
-            result.append(_rewrite_table_block_for_weixin(table_lines))
+        if not line.strip():
+            blank_run += 1
+            if blank_run <= 1:
+                result.append("")
            continue

-        result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line)))
-        i += 1
+        blank_run = 0
+        result.append(line)

-    normalized = "\n".join(item.rstrip() for item in result)
-    normalized = re.sub(r"\n{3,}", "\n\n", normalized)
-    return normalized.strip()
+    return "\n".join(result).strip()


 def _split_markdown_blocks(content: str) -> List[str]:
@@ -704,8 +754,8 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]:

    Weixin can render Markdown, but chat readability is better when top-level
    line breaks become separate messages. Keep fenced code blocks intact and
-    attach indented continuation lines to the previous top-level line so
-    transformed tables/lists do not get torn apart.
+    attach indented continuation lines to the previous top-level line so nested
+    list items do not get torn apart.
    """
    units: List[str] = []

@@ -747,7 +797,9 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:
        return False
    if line.startswith((" ", "\t")):
        return False
-    if stripped.startswith((">", "-", "*", "【")):
+    if stripped.startswith((">", "-", "*", "【", "#", "|")):
+        return False
+    if _TABLE_RULE_RE.match(stripped):
        return False
    if re.match(r"^\*\*[^*]+\*\*$", stripped):
        return False
@@ -757,10 +809,12 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:


 def _looks_like_heading_line_for_weixin(line: str) -> bool:
-    """Return True when a short line behaves like a plain-text heading."""
+    """Return True when a short line behaves like a heading."""
    stripped = line.strip()
    if not stripped:
        return False
+    if _HEADER_RE.match(stripped):
+        return True
    return len(stripped) <= 24 and stripped.endswith((":", "："))


@@ -935,7 +989,7 @@ async def qr_login(
    if not AIOHTTP_AVAILABLE:
        raise RuntimeError("aiohttp is required for Weixin QR login")

-    async with aiohttp.ClientSession(trust_env=True) as session:
+    async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
        try:
            qr_resp = await _api_get(
                session,
@@ -953,6 +1007,10 @@ async def qr_login(
            logger.error("weixin: QR response missing qrcode")
            return None

+        # qrcode_url is the full scannable liteapp URL; qrcode_value is just the hex token
+        # WeChat needs to scan the full URL, not the raw hex string
+        qr_scan_data = qrcode_url if qrcode_url else qrcode_value
+
        print("\n请使用微信扫描以下二维码：")
        if qrcode_url:
            print(qrcode_url)
@@ -960,11 +1018,11 @@ async def qr_login(
            import qrcode

            qr = qrcode.QRCode()
-            qr.add_data(qrcode_url or qrcode_value)
+            qr.add_data(qr_scan_data)
            qr.make(fit=True)
            qr.print_ascii(invert=True)
-        except Exception:
-            print("（终端二维码渲染失败，请直接打开上面的二维码链接）")
+        except Exception as _qr_exc:
+            print(f"（终端二维码渲染失败: {_qr_exc}，请直接打开上面的二维码链接）")

        deadline = time.time() + timeout_seconds
        current_base_url = ILINK_BASE_URL
@@ -1010,8 +1068,17 @@ async def qr_login(
                    )
                    qrcode_value = str(qr_resp.get("qrcode") or "")
                    qrcode_url = str(qr_resp.get("qrcode_img_content") or "")
+                    qr_scan_data = qrcode_url if qrcode_url else qrcode_value
                    if qrcode_url:
                        print(qrcode_url)
+                    try:
+                        import qrcode as _qrcode
+                        qr = _qrcode.QRCode()
+                        qr.add_data(qr_scan_data)
+                        qr.make(fit=True)
+                        qr.print_ascii(invert=True)
+                    except Exception:
+                        pass
                except Exception as exc:
                    logger.error("weixin: QR refresh failed: %s", exc)
                    return None
@@ -1059,7 +1126,8 @@ class WeixinAdapter(BasePlatformAdapter):
        self._hermes_home = hermes_home
        self._token_store = ContextTokenStore(hermes_home)
        self._typing_cache = TypingTicketCache()
-        self._session: Optional[aiohttp.ClientSession] = None
+        self._poll_session: Optional[aiohttp.ClientSession] = None
+        self._send_session: Optional[aiohttp.ClientSession] = None
        self._poll_task: Optional[asyncio.Task] = None
        self._dedup = MessageDeduplicator(ttl_seconds=MESSAGE_DEDUP_TTL_SECONDS)

@@ -1134,14 +1202,17 @@ class WeixinAdapter(BasePlatformAdapter):
        except Exception as exc:
            logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc)

-        self._session = aiohttp.ClientSession(trust_env=True)
+        self._poll_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
+        self._send_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
        self._token_store.restore(self._account_id)
        self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll")
        self._mark_connected()
+        _LIVE_ADAPTERS[self._token] = self
        logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
        return True

    async def disconnect(self) -> None:
+        _LIVE_ADAPTERS.pop(self._token, None)
        self._running = False
        if self._poll_task and not self._poll_task.done():
            self._poll_task.cancel()
@@ -1150,15 +1221,18 @@ class WeixinAdapter(BasePlatformAdapter):
            except asyncio.CancelledError:
                pass
        self._poll_task = None
-        if self._session and not self._session.closed:
-            await self._session.close()
-        self._session = None
+        if self._poll_session and not self._poll_session.closed:
+            await self._poll_session.close()
+        self._poll_session = None
+        if self._send_session and not self._send_session.closed:
+            await self._send_session.close()
+        self._send_session = None
        self._release_platform_lock()
        self._mark_disconnected()
        logger.info("[%s] Disconnected", self.name)

    async def _poll_loop(self) -> None:
-        assert self._session is not None
+        assert self._poll_session is not None
        sync_buf = _load_sync_buf(self._hermes_home, self._account_id)
        timeout_ms = LONG_POLL_TIMEOUT_MS
        consecutive_failures = 0
@@ -1166,7 +1240,7 @@ class WeixinAdapter(BasePlatformAdapter):
        while self._running:
            try:
                response = await _get_updates(
-                    self._session,
+                    self._poll_session,
                    base_url=self._base_url,
                    token=self._token,
                    sync_buf=sync_buf,
@@ -1223,7 +1297,7 @@ class WeixinAdapter(BasePlatformAdapter):
            logger.error("[%s] unhandled inbound error from=%s: %s", self.name, _safe_id(message.get("from_user_id")), exc, exc_info=True)

    async def _process_message(self, message: Dict[str, Any]) -> None:
-        assert self._session is not None
+        assert self._poll_session is not None
        sender_id = str(message.get("from_user_id") or "").strip()
        if not sender_id:
            return
@@ -1316,7 +1390,7 @@ class WeixinAdapter(BasePlatformAdapter):
        media = _media_reference(item, "image_item")
        try:
            data = await _download_and_decrypt_media(
-                self._session,
+                self._poll_session,
                cdn_base_url=self._cdn_base_url,
                encrypted_query_param=media.get("encrypt_query_param"),
                aes_key_b64=(item.get("image_item") or {}).get("aeskey")
@@ -1334,7 +1408,7 @@ class WeixinAdapter(BasePlatformAdapter):
        media = _media_reference(item, "video_item")
        try:
            data = await _download_and_decrypt_media(
-                self._session,
+                self._poll_session,
                cdn_base_url=self._cdn_base_url,
                encrypted_query_param=media.get("encrypt_query_param"),
                aes_key_b64=media.get("aes_key"),
@@ -1353,7 +1427,7 @@ class WeixinAdapter(BasePlatformAdapter):
        mime = _mime_from_filename(filename)
        try:
            data = await _download_and_decrypt_media(
-                self._session,
+                self._poll_session,
                cdn_base_url=self._cdn_base_url,
                encrypted_query_param=media.get("encrypt_query_param"),
                aes_key_b64=media.get("aes_key"),
@@ -1372,7 +1446,7 @@ class WeixinAdapter(BasePlatformAdapter):
            return None
        try:
            data = await _download_and_decrypt_media(
-                self._session,
+                self._poll_session,
                cdn_base_url=self._cdn_base_url,
                encrypted_query_param=media.get("encrypt_query_param"),
                aes_key_b64=media.get("aes_key"),
@@ -1385,13 +1459,13 @@ class WeixinAdapter(BasePlatformAdapter):
            return None

    async def _maybe_fetch_typing_ticket(self, user_id: str, context_token: Optional[str]) -> None:
-        if not self._session or not self._token:
+        if not self._poll_session or not self._token:
            return
        if self._typing_cache.get(user_id):
            return
        try:
            response = await _get_config(
-                self._session,
+                self._poll_session,
                base_url=self._base_url,
                token=self._token,
                user_id=user_id,
@@ -1416,12 +1490,19 @@ class WeixinAdapter(BasePlatformAdapter):
        context_token: Optional[str],
        client_id: str,
    ) -> None:
-        """Send a single text chunk with per-chunk retry and backoff."""
+        """Send a single text chunk with per-chunk retry and backoff.
+
+        On session-expired errors (errcode -14), automatically retries
+        *without* ``context_token`` — iLink accepts tokenless sends as a
+        degraded fallback, which keeps cron-initiated push messages working
+        even when no user message has refreshed the session recently.
+        """
        last_error: Optional[Exception] = None
+        retried_without_token = False
        for attempt in range(self._send_chunk_retries + 1):
            try:
-                await _send_message(
-                    self._session,
+                resp = await _send_message(
+                    self._send_session,
                    base_url=self._base_url,
                    token=self._token,
                    to=chat_id,
@@ -1429,6 +1510,31 @@ class WeixinAdapter(BasePlatformAdapter):
                    context_token=context_token,
                    client_id=client_id,
                )
+                # Check iLink response for session-expired error
+                if resp and isinstance(resp, dict):
+                    ret = resp.get("ret")
+                    errcode = resp.get("errcode")
+                    if (ret is not None and ret not in (0,)) or (errcode is not None and errcode not in (0,)):
+                        is_session_expired = (
+                            ret == SESSION_EXPIRED_ERRCODE
+                            or errcode == SESSION_EXPIRED_ERRCODE
+                        )
+                        # Session expired — strip token and retry once
+                        if is_session_expired and not retried_without_token and context_token:
+                            retried_without_token = True
+                            context_token = None
+                            self._token_store._cache.pop(
+                                self._token_store._key(self._account_id, chat_id), None
+                            )
+                            logger.warning(
+                                "[%s] session expired for %s; retrying without context_token",
+                                self.name, _safe_id(chat_id),
+                            )
+                            continue
+                        errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
+                        raise RuntimeError(
+                            f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
+                        )
                return
            except Exception as exc:
                last_error = exc
@@ -1456,12 +1562,48 @@ class WeixinAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        if not self._session or not self._token:
+        if not self._send_session or not self._token:
            return SendResult(success=False, error="Not connected")
        context_token = self._token_store.get(self._account_id, chat_id)
        last_message_id: Optional[str] = None
+
+        # Extract MEDIA: tags and bare local file paths before text delivery.
+        media_files, cleaned_content = self.extract_media(content)
+        _, image_cleaned = self.extract_images(cleaned_content)
+        local_files, final_content = self.extract_local_files(image_cleaned)
+
+        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
+        _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
+        _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
+
+        async def _deliver_media(path: str, is_voice: bool = False) -> None:
+            ext = Path(path).suffix.lower()
+            if is_voice or ext in _AUDIO_EXTS:
+                await self.send_voice(chat_id=chat_id, audio_path=path, metadata=metadata)
+            elif ext in _VIDEO_EXTS:
+                await self.send_video(chat_id=chat_id, video_path=path, metadata=metadata)
+            elif ext in _IMAGE_EXTS:
+                await self.send_image_file(chat_id=chat_id, image_path=path, metadata=metadata)
+            else:
+                await self.send_document(chat_id=chat_id, file_path=path, metadata=metadata)
+
        try:
-            chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()]
+            # Deliver extracted MEDIA: attachments first.
+            for media_path, is_voice in media_files:
+                try:
+                    await _deliver_media(media_path, is_voice)
+                except Exception as exc:
+                    logger.warning("[%s] media delivery failed for %s: %s", self.name, media_path, exc)
+
+            # Deliver bare local file paths.
+            for file_path in local_files:
+                try:
+                    await _deliver_media(file_path, is_voice=False)
+                except Exception as exc:
+                    logger.warning("[%s] local file delivery failed for %s: %s", self.name, file_path, exc)
+
+            # Deliver text content.
+            chunks = [c for c in self._split_text(self.format_message(final_content)) if c and c.strip()]
            for idx, chunk in enumerate(chunks):
                client_id = f"hermes-weixin-{uuid.uuid4().hex}"
                await self._send_text_chunk(
@@ -1479,14 +1621,14 @@ class WeixinAdapter(BasePlatformAdapter):
            return SendResult(success=False, error=str(exc))

    async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
-        if not self._session or not self._token:
+        if not self._send_session or not self._token:
            return
        typing_ticket = self._typing_cache.get(chat_id)
        if not typing_ticket:
            return
        try:
            await _send_typing(
-                self._session,
+                self._send_session,
                base_url=self._base_url,
                token=self._token,
                to_user_id=chat_id,
@@ -1497,14 +1639,14 @@ class WeixinAdapter(BasePlatformAdapter):
            logger.debug("[%s] typing start failed for %s: %s", self.name, _safe_id(chat_id), exc)

    async def stop_typing(self, chat_id: str) -> None:
-        if not self._session or not self._token:
+        if not self._send_session or not self._token:
            return
        typing_ticket = self._typing_cache.get(chat_id)
        if not typing_ticket:
            return
        try:
            await _send_typing(
-                self._session,
+                self._send_session,
                base_url=self._base_url,
                token=self._token,
                to_user_id=chat_id,
@@ -1542,24 +1684,35 @@ class WeixinAdapter(BasePlatformAdapter):
    async def send_image_file(
        self,
        chat_id: str,
-        path: str,
-        caption: str = "",
+        image_path: str,
+        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs,
    ) -> SendResult:
-        return await self.send_document(chat_id, file_path=path, caption=caption, metadata=metadata)
+        del reply_to, kwargs
+        return await self.send_document(
+            chat_id=chat_id,
+            file_path=image_path,
+            caption=caption,
+            metadata=metadata,
+        )

    async def send_document(
        self,
        chat_id: str,
        file_path: str,
-        caption: str = "",
+        caption: Optional[str] = None,
+        file_name: Optional[str] = None,
+        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs,
    ) -> SendResult:
-        if not self._session or not self._token:
+        del file_name, reply_to, metadata, kwargs
+        if not self._send_session or not self._token:
            return SendResult(success=False, error="Not connected")
        try:
-            message_id = await self._send_file(chat_id, file_path, caption)
+            message_id = await self._send_file(chat_id, file_path, caption or "")
            return SendResult(success=True, message_id=message_id)
        except Exception as exc:
            logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
@@ -1573,7 +1726,7 @@ class WeixinAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        if not self._session or not self._token:
+        if not self._send_session or not self._token:
            return SendResult(success=False, error="Not connected")
        try:
            message_id = await self._send_file(chat_id, video_path, caption or "")
@@ -1590,7 +1743,24 @@ class WeixinAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
+        if not self._send_session or not self._token:
+            return SendResult(success=False, error="Not connected")
+
+        # Native outbound Weixin voice bubbles are not proven-working in the
+        # upstream reference implementation. Prefer a reliable file attachment
+        # fallback so users at least receive playable audio, even for .silk.
+        fallback_caption = caption or "[voice message as attachment]"
+        try:
+            message_id = await self._send_file(
+                chat_id,
+                audio_path,
+                fallback_caption,
+                force_file_attachment=True,
+            )
+            return SendResult(success=True, message_id=message_id)
+        except Exception as exc:
+            logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc)
+            return SendResult(success=False, error=str(exc))

    async def _download_remote_media(self, url: str) -> str:
        from tools.url_safety import is_safe_url
@@ -1598,8 +1768,8 @@ class WeixinAdapter(BasePlatformAdapter):
        if not is_safe_url(url):
            raise ValueError(f"Blocked unsafe URL (SSRF protection): {url}")

-        assert self._session is not None
-        async with self._session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
+        assert self._send_session is not None
+        async with self._send_session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
            response.raise_for_status()
            data = await response.read()
            suffix = Path(url.split("?", 1)[0]).suffix or ".bin"
@@ -1607,16 +1777,22 @@ class WeixinAdapter(BasePlatformAdapter):
            handle.write(data)
            return handle.name

-    async def _send_file(self, chat_id: str, path: str, caption: str) -> str:
-        assert self._session is not None and self._token is not None
+    async def _send_file(
+        self,
+        chat_id: str,
+        path: str,
+        caption: str,
+        force_file_attachment: bool = False,
+    ) -> str:
+        assert self._send_session is not None and self._token is not None
        plaintext = Path(path).read_bytes()
-        media_type, item_builder = self._outbound_media_builder(path)
+        media_type, item_builder = self._outbound_media_builder(path, force_file_attachment=force_file_attachment)
        filekey = secrets.token_hex(16)
        aes_key = secrets.token_bytes(16)
        rawsize = len(plaintext)
        rawfilemd5 = hashlib.md5(plaintext).hexdigest()
        upload_response = await _get_upload_url(
-            self._session,
+            self._send_session,
            base_url=self._base_url,
            token=self._token,
            to_user_id=chat_id,
@@ -1642,30 +1818,34 @@ class WeixinAdapter(BasePlatformAdapter):
            raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}")

        encrypted_query_param = await _upload_ciphertext(
-            self._session,
+            self._send_session,
            ciphertext=ciphertext,
            upload_url=upload_url,
        )
-
        context_token = self._token_store.get(self._account_id, chat_id)
        # The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes).
        # Sending base64(raw_bytes) causes images to show as grey boxes on the
        # receiver side because the decryption key doesn't match.
        aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
-        media_item = item_builder(
-            encrypt_query_param=encrypted_query_param,
-            aes_key_for_api=aes_key_for_api,
-            ciphertext_size=len(ciphertext),
-            plaintext_size=rawsize,
-            filename=Path(path).name,
-            rawfilemd5=rawfilemd5,
-        )
+        item_kwargs = {
+            "encrypt_query_param": encrypted_query_param,
+            "aes_key_for_api": aes_key_for_api,
+            "ciphertext_size": len(ciphertext),
+            "plaintext_size": rawsize,
+            "filename": Path(path).name,
+            "rawfilemd5": rawfilemd5,
+        }
+        if media_type == MEDIA_VOICE and path.endswith(".silk"):
+            item_kwargs["encode_type"] = 6
+            item_kwargs["sample_rate"] = 24000
+            item_kwargs["bits_per_sample"] = 16
+        media_item = item_builder(**item_kwargs)

        last_message_id = None
        if caption:
            last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
            await _send_message(
-                self._session,
+                self._send_session,
                base_url=self._base_url,
                token=self._token,
                to=chat_id,
@@ -1676,7 +1856,7 @@ class WeixinAdapter(BasePlatformAdapter):

        last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
        await _api_post(
-            self._session,
+            self._send_session,
            base_url=self._base_url,
            endpoint=EP_SEND_MESSAGE,
            payload={
@@ -1695,7 +1875,7 @@ class WeixinAdapter(BasePlatformAdapter):
        )
        return last_message_id

-    def _outbound_media_builder(self, path: str):
+    def _outbound_media_builder(self, path: str, force_file_attachment: bool = False):
        mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
        if mime.startswith("image/"):
            return MEDIA_IMAGE, lambda **kw: {
@@ -1723,7 +1903,7 @@ class WeixinAdapter(BasePlatformAdapter):
                    "video_md5": kw.get("rawfilemd5", ""),
                },
            }
-        if mime.startswith("audio/") or path.endswith(".silk"):
+        if path.endswith(".silk") and not force_file_attachment:
            return MEDIA_VOICE, lambda **kw: {
                "type": ITEM_VOICE,
                "voice_item": {
@@ -1732,9 +1912,25 @@ class WeixinAdapter(BasePlatformAdapter):
                        "aes_key": kw["aes_key_for_api"],
                        "encrypt_type": 1,
                    },
+                    "encode_type": kw.get("encode_type"),
+                    "bits_per_sample": kw.get("bits_per_sample"),
+                    "sample_rate": kw.get("sample_rate"),
                    "playtime": kw.get("playtime", 0),
                },
            }
+        if mime.startswith("audio/"):
+            return MEDIA_FILE, lambda **kw: {
+                "type": ITEM_FILE,
+                "file_item": {
+                    "media": {
+                        "encrypt_query_param": kw["encrypt_query_param"],
+                        "aes_key": kw["aes_key_for_api"],
+                        "encrypt_type": 1,
+                    },
+                    "file_name": kw["filename"],
+                    "len": str(kw["plaintext_size"]),
+                },
+            }
        return MEDIA_FILE, lambda **kw: {
            "type": ITEM_FILE,
            "file_item": {
@@ -1784,7 +1980,34 @@ async def send_weixin_direct(
    token_store.restore(account_id)
    context_token = token_store.get(account_id, chat_id)

-    async with aiohttp.ClientSession(trust_env=True) as session:
+    live_adapter = _LIVE_ADAPTERS.get(resolved_token)
+    send_session = getattr(live_adapter, '_send_session', None)
+    if live_adapter is not None and send_session is not None and not send_session.closed:
+        last_result: Optional[SendResult] = None
+        cleaned = live_adapter.format_message(message)
+        if cleaned:
+            last_result = await live_adapter.send(chat_id, cleaned)
+            if not last_result.success:
+                return {"error": f"Weixin send failed: {last_result.error}"}
+
+        for media_path, _is_voice in media_files or []:
+            ext = Path(media_path).suffix.lower()
+            if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}:
+                last_result = await live_adapter.send_image_file(chat_id, media_path)
+            else:
+                last_result = await live_adapter.send_document(chat_id, media_path)
+            if not last_result.success:
+                return {"error": f"Weixin media send failed: {last_result.error}"}
+
+        return {
+            "success": True,
+            "platform": "weixin",
+            "chat_id": chat_id,
+            "message_id": last_result.message_id if last_result else None,
+            "context_token_used": bool(context_token),
+        }
+
+    async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
        adapter = WeixinAdapter(
            PlatformConfig(
                enabled=True,
@@ -1797,6 +2020,7 @@ async def send_weixin_direct(
                },
            )
        )
+        adapter._send_session = session
        adapter._session = session
        adapter._token = resolved_token
        adapter._account_id = account_id
@@ -24,11 +24,20 @@ import signal
 import tempfile
 import threading
 import time
+from collections import OrderedDict
 from contextvars import copy_context
 from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List

+# --- Agent cache tuning ---------------------------------------------------
+# Bounds the per-session AIAgent cache to prevent unbounded growth in
+# long-lived gateways (each AIAgent holds LLM clients, tool schemas,
+# memory providers, etc.).  LRU order + idle TTL eviction are enforced
+# from _enforce_agent_cache_cap() and _session_expiry_watcher() below.
+_AGENT_CACHE_MAX_SIZE = 128
+_AGENT_CACHE_IDLE_TTL_SECS = 3600.0  # evict agents idle for >1h
+
 # ---------------------------------------------------------------------------
 # SSL certificate auto-detection for NixOS and other non-standard systems.
 # Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
@@ -622,8 +631,13 @@ class GatewayRunner:
        # system prompt (including memory) every turn — breaking prefix cache
        # and costing ~10x more on providers with prompt caching (Anthropic).
        # Key: session_key, Value: (AIAgent, config_signature_str)
+        #
+        # OrderedDict so _enforce_agent_cache_cap() can pop the least-recently-
+        # used entry (move_to_end() on cache hits, popitem(last=False) for
+        # eviction).  Hard cap via _AGENT_CACHE_MAX_SIZE, idle TTL enforced
+        # from _session_expiry_watcher().
        import threading as _threading
-        self._agent_cache: Dict[str, tuple] = {}
+        self._agent_cache: "OrderedDict[str, tuple]" = OrderedDict()
        self._agent_cache_lock = _threading.Lock()

        # Per-session model overrides from /model command.
@@ -2102,6 +2116,11 @@ class GatewayRunner:
                            _cached_agent = self._running_agents.get(key)
                        if _cached_agent and _cached_agent is not _AGENT_PENDING_SENTINEL:
                            self._cleanup_agent_resources(_cached_agent)
+                        # Drop the cache entry so the AIAgent (and its LLM
+                        # clients, tool schemas, memory provider refs) can
+                        # be garbage-collected.  Otherwise the cache grows
+                        # unbounded across the gateway's lifetime.
+                        self._evict_cached_agent(key)
                        # Mark as flushed and persist to disk so the flag
                        # survives gateway restarts.
                        with self.session_store._lock:
@@ -2145,6 +2164,20 @@ class GatewayRunner:
                        logger.info(
                            "Session expiry done: %d flushed", _flushed,
                        )
+
+                # Sweep agents that have been idle beyond the TTL regardless
+                # of session reset policy.  This catches sessions with very
+                # long / "never" reset windows, whose cached AIAgents would
+                # otherwise pin memory for the gateway's entire lifetime.
+                try:
+                    _idle_evicted = self._sweep_idle_cached_agents()
+                    if _idle_evicted:
+                        logger.info(
+                            "Agent cache idle sweep: evicted %d agent(s)",
+                            _idle_evicted,
+                        )
+                except Exception as _e:
+                    logger.debug("Idle agent sweep failed: %s", _e)
            except Exception as e:
                logger.debug("Session expiry watcher error: %s", e)
            # Sleep in small increments so we can stop quickly
@@ -2618,6 +2651,9 @@ class GatewayRunner:
            Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS",
            Platform.QQBOT: "QQ_ALLOWED_USERS",
        }
+        platform_group_env_map = {
+            Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
+        }
        platform_allow_all_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
@@ -2642,6 +2678,28 @@ class GatewayRunner:
        if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
            return True

+        # Discord bot senders that passed the DISCORD_ALLOW_BOTS platform
+        # filter are already authorized at the platform level — skip the
+        # user allowlist. Without this, bot messages allowed by
+        # DISCORD_ALLOW_BOTS=mentions/all would be rejected here with
+        # "Unauthorized user" (fixes #4466).
+        if source.platform == Platform.DISCORD and getattr(source, "is_bot", False):
+            allow_bots = os.getenv("DISCORD_ALLOW_BOTS", "none").lower().strip()
+            if allow_bots in ("mentions", "all"):
+                return True
+
+        # Discord role-based access (DISCORD_ALLOWED_ROLES): the adapter's
+        # on_message pre-filter already verified role membership — if the
+        # message reached here, the user passed that check. Authorize
+        # directly to avoid the "no allowlists configured" branch below
+        # rejecting role-only setups where DISCORD_ALLOWED_USERS is empty
+        # (issue #7871).
+        if (
+            source.platform == Platform.DISCORD
+            and os.getenv("DISCORD_ALLOWED_ROLES", "").strip()
+        ):
+            return True
+
        # Check pairing store (always checked, regardless of allowlists)
        platform_name = source.platform.value if source.platform else ""
        if self.pairing_store.is_approved(platform_name, user_id):
@@ -2649,12 +2707,23 @@ class GatewayRunner:

        # Check platform-specific and global allowlists
        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
+        group_allowlist = ""
+        if source.chat_type == "group":
+            group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip()
        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()

-        if not platform_allowlist and not global_allowlist:
+        if not platform_allowlist and not group_allowlist and not global_allowlist:
            # No allowlists configured -- check global allow-all flag
            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")

+        # Some platforms authorize group traffic by chat ID rather than sender ID.
+        if group_allowlist and source.chat_type == "group" and source.chat_id:
+            allowed_group_ids = {
+                chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip()
+            }
+            if "*" in allowed_group_ids or source.chat_id in allowed_group_ids:
+                return True
+
        # Check if user is in any allowlist
        allowed_ids = set()
        if platform_allowlist:
@@ -5797,7 +5866,7 @@ class GatewayRunner:
                        pass

                # Send media files
-                for media_path in (media_files or []):
+                for media_path, _is_voice in (media_files or []):
                    try:
                        await adapter.send_document(
                            chat_id=source.chat_id,
@@ -5975,7 +6044,7 @@ class GatewayRunner:
                except Exception:
                    pass

-            for media_path in (media_files or []):
+            for media_path, _is_voice in (media_files or []):
                try:
                    await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
                except Exception:
@@ -7851,6 +7920,153 @@ class GatewayRunner:
            with _lock:
                self._agent_cache.pop(session_key, None)

+    def _release_evicted_agent_soft(self, agent: Any) -> None:
+        """Soft cleanup for cache-evicted agents — preserves session tool state.
+
+        Called from _enforce_agent_cache_cap and _sweep_idle_cached_agents.
+        Distinct from _cleanup_agent_resources (full teardown) because a
+        cache-evicted session may resume at any time — its terminal
+        sandbox, browser daemon, and tracked bg processes must outlive
+        the Python AIAgent instance so the next agent built for the
+        same task_id inherits them.
+        """
+        if agent is None:
+            return
+        try:
+            if hasattr(agent, "release_clients"):
+                agent.release_clients()
+            else:
+                # Older agent instance (shouldn't happen in practice) —
+                # fall back to the legacy full-close path.
+                self._cleanup_agent_resources(agent)
+        except Exception:
+            pass
+
+    def _enforce_agent_cache_cap(self) -> None:
+        """Evict oldest cached agents when cache exceeds _AGENT_CACHE_MAX_SIZE.
+
+        Must be called with _agent_cache_lock held.  Resource cleanup
+        (memory provider shutdown, tool resource close) is scheduled
+        on a daemon thread so the caller doesn't block on slow teardown
+        while holding the cache lock.
+
+        Agents currently in _running_agents are SKIPPED — their clients,
+        terminal sandboxes, background processes, and child subagents
+        are all in active use by the running turn.  Evicting them would
+        tear down those resources mid-turn and crash the request.  If
+        every candidate in the LRU order is active, we simply leave the
+        cache over the cap; it will be re-checked on the next insert.
+        """
+        _cache = getattr(self, "_agent_cache", None)
+        if _cache is None:
+            return
+        # OrderedDict.popitem(last=False) pops oldest; plain dict lacks the
+        # arg so skip enforcement if a test fixture swapped the cache type.
+        if not hasattr(_cache, "move_to_end"):
+            return
+
+        # Snapshot of agent instances that are actively mid-turn.  Use id()
+        # so the lookup is O(1) and doesn't depend on AIAgent.__eq__ (which
+        # MagicMock overrides in tests).
+        running_ids = {
+            id(a)
+            for a in getattr(self, "_running_agents", {}).values()
+            if a is not None and a is not _AGENT_PENDING_SENTINEL
+        }
+
+        # Walk LRU → MRU and evict excess-LRU entries that aren't mid-turn.
+        # We only consider entries in the first (size - cap) LRU positions
+        # as eviction candidates.  If one of those slots is held by an
+        # active agent, we SKIP it without compensating by evicting a
+        # newer entry — that would penalise a freshly-inserted session
+        # (which has no cache history to retain) while protecting an
+        # already-cached long-running one.  The cache may therefore stay
+        # temporarily over cap; it will re-check on the next insert,
+        # after active turns have finished.
+        excess = max(0, len(_cache) - _AGENT_CACHE_MAX_SIZE)
+        evict_plan: List[tuple] = []  # [(key, agent), ...]
+        if excess > 0:
+            ordered_keys = list(_cache.keys())
+            for key in ordered_keys[:excess]:
+                entry = _cache.get(key)
+                agent = entry[0] if isinstance(entry, tuple) and entry else None
+                if agent is not None and id(agent) in running_ids:
+                    continue  # active mid-turn; don't evict, don't substitute
+                evict_plan.append((key, agent))
+
+        for key, _ in evict_plan:
+            _cache.pop(key, None)
+
+        remaining_over_cap = len(_cache) - _AGENT_CACHE_MAX_SIZE
+        if remaining_over_cap > 0:
+            logger.warning(
+                "Agent cache over cap (%d > %d); %d excess slot(s) held by "
+                "mid-turn agents — will re-check on next insert.",
+                len(_cache), _AGENT_CACHE_MAX_SIZE, remaining_over_cap,
+            )
+
+        for key, agent in evict_plan:
+            logger.info(
+                "Agent cache at cap; evicting LRU session=%s (cache_size=%d)",
+                key, len(_cache),
+            )
+            if agent is not None:
+                threading.Thread(
+                    target=self._release_evicted_agent_soft,
+                    args=(agent,),
+                    daemon=True,
+                    name=f"agent-cache-evict-{key[:24]}",
+                ).start()
+
+    def _sweep_idle_cached_agents(self) -> int:
+        """Evict cached agents whose AIAgent has been idle > _AGENT_CACHE_IDLE_TTL_SECS.
+
+        Safe to call from the session expiry watcher without holding the
+        cache lock — acquires it internally.  Returns the number of entries
+        evicted.  Resource cleanup is scheduled on daemon threads.
+
+        Agents currently in _running_agents are SKIPPED for the same reason
+        as _enforce_agent_cache_cap: tearing down an active turn's clients
+        mid-flight would crash the request.
+        """
+        _cache = getattr(self, "_agent_cache", None)
+        _lock = getattr(self, "_agent_cache_lock", None)
+        if _cache is None or _lock is None:
+            return 0
+        now = time.time()
+        to_evict: List[tuple] = []
+        running_ids = {
+            id(a)
+            for a in getattr(self, "_running_agents", {}).values()
+            if a is not None and a is not _AGENT_PENDING_SENTINEL
+        }
+        with _lock:
+            for key, entry in list(_cache.items()):
+                agent = entry[0] if isinstance(entry, tuple) and entry else None
+                if agent is None:
+                    continue
+                if id(agent) in running_ids:
+                    continue  # mid-turn — don't tear it down
+                last_activity = getattr(agent, "_last_activity_ts", None)
+                if last_activity is None:
+                    continue
+                if (now - last_activity) > _AGENT_CACHE_IDLE_TTL_SECS:
+                    to_evict.append((key, agent))
+            for key, _ in to_evict:
+                _cache.pop(key, None)
+        for key, agent in to_evict:
+            logger.info(
+                "Agent cache idle-TTL evict: session=%s (idle=%.0fs)",
+                key, now - getattr(agent, "_last_activity_ts", now),
+            )
+            threading.Thread(
+                target=self._release_evicted_agent_soft,
+                args=(agent,),
+                daemon=True,
+                name=f"agent-cache-idle-{key[:24]}",
+            ).start()
+        return len(to_evict)
+
    # ------------------------------------------------------------------
    # Proxy mode: forward messages to a remote Hermes API server
    # ------------------------------------------------------------------
@@ -8618,6 +8834,13 @@ class GatewayRunner:
                    cached = _cache.get(session_key)
                    if cached and cached[1] == _sig:
                        agent = cached[0]
+                        # Refresh LRU order so the cap enforcement evicts
+                        # truly-oldest entries, not the one we just used.
+                        if hasattr(_cache, "move_to_end"):
+                            try:
+                                _cache.move_to_end(session_key)
+                            except KeyError:
+                                pass
                        # Reset activity timestamp so the inactivity timeout
                        # handler doesn't see stale idle time from the previous
                        # turn and immediately kill this agent.  (#9051)
@@ -8656,6 +8879,7 @@ class GatewayRunner:
                if _cache_lock and _cache is not None:
                    with _cache_lock:
                        _cache[session_key] = (agent, _sig)
+                        self._enforce_agent_cache_cap()
                logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig)

            # Per-message state — callbacks and reasoning config change every
@@ -82,6 +82,7 @@ class SessionSource:
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
+    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
    
    @property
    def description(self) -> str:
@@ -773,6 +773,28 @@ def is_source_suppressed(provider_id: str, source: str) -> bool:
        return False


+def unsuppress_credential_source(provider_id: str, source: str) -> bool:
+    """Clear a suppression marker so the source will be re-seeded on the next load.
+
+    Returns True if a marker was cleared, False if no marker existed.
+    """
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        suppressed = auth_store.get("suppressed_sources")
+        if not isinstance(suppressed, dict):
+            return False
+        provider_list = suppressed.get(provider_id)
+        if not isinstance(provider_list, list) or source not in provider_list:
+            return False
+        provider_list.remove(source)
+        if not provider_list:
+            suppressed.pop(provider_id, None)
+        if not suppressed:
+            auth_store.pop("suppressed_sources", None)
+        _save_auth_store(auth_store)
+        return True
+
+
 def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
    """Return persisted auth state for a provider, or None."""
    auth_store = _load_auth_store()
@@ -3297,6 +3319,14 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:

        inference_base_url = auth_state["inference_base_url"]

+        # Snapshot the prior active_provider BEFORE _save_provider_state
+        # overwrites it to "nous".  If the user picks "Skip (keep current)"
+        # during model selection below, we restore this so the user's previous
+        # provider (e.g. openrouter) is preserved.
+        with _auth_store_lock():
+            _prior_store = _load_auth_store()
+            prior_active_provider = _prior_store.get("active_provider")
+
        with _auth_store_lock():
            auth_store = _load_auth_store()
            _save_provider_state(auth_store, "nous", auth_state)
@@ -3356,6 +3386,27 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            print(f"Login succeeded, but could not fetch available models. Reason: {message}")

        # Write provider + model atomically so config is never mismatched.
+        # If no model was selected (user picked "Skip (keep current)",
+        # model list fetch failed, or no curated models were available),
+        # preserve the user's previous provider — don't silently switch
+        # them to Nous with a mismatched model.  The Nous OAuth tokens
+        # stay saved for future use.
+        if not selected_model:
+            # Restore the prior active_provider that _save_provider_state
+            # overwrote to "nous".  config.yaml model.provider is left
+            # untouched, so the user's previous provider is fully preserved.
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                if prior_active_provider:
+                    auth_store["active_provider"] = prior_active_provider
+                else:
+                    auth_store.pop("active_provider", None)
+                _save_auth_store(auth_store)
+            print()
+            print("No provider change. Nous credentials saved for future use.")
+            print("  Run `hermes model` again to switch to Nous Portal.")
+            return
+
        config_path = _update_config_for_provider(
            "nous", inference_base_url, default_model=selected_model,
        )
@@ -233,6 +233,9 @@ def auth_add_command(args) -> None:
        return

    if provider == "openai-codex":
+        # Clear any existing suppression marker so a re-link after `hermes auth
+        # remove openai-codex` works without the new tokens being skipped.
+        auth_mod.unsuppress_credential_source(provider, "device_code")
        creds = auth_mod._codex_device_code_login()
        label = (getattr(args, "label", None) or "").strip() or label_from_token(
            creds["tokens"]["access_token"],
@@ -352,7 +355,34 @@ def auth_remove_command(args) -> None:
    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
    # clear the underlying auth store / credential file so it doesn't get
    # re-seeded on the next load_pool() call.
-    elif removed.source == "device_code" and provider in ("openai-codex", "nous"):
+    elif provider == "openai-codex" and (
+        removed.source == "device_code" or removed.source.endswith(":device_code")
+    ):
+        # Codex tokens live in TWO places: the Hermes auth store and
+        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
+        # refresh_codex_oauth_pure() writes to both.  So clearing only the
+        # Hermes auth store is not enough — _seed_from_singletons() will
+        # auto-import from ~/.codex/auth.json on the next load_pool() and
+        # the removal is instantly undone.  Mark the source as suppressed
+        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
+        # the Codex CLI itself keeps working.
+        from hermes_cli.auth import (
+            _load_auth_store, _save_auth_store, _auth_store_lock,
+            suppress_credential_source,
+        )
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+            providers_dict = auth_store.get("providers")
+            if isinstance(providers_dict, dict) and provider in providers_dict:
+                del providers_dict[provider]
+                _save_auth_store(auth_store)
+                print(f"Cleared {provider} OAuth tokens from auth store")
+        suppress_credential_source(provider, "device_code")
+        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
+        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
+        print("Run `hermes auth add openai-codex` to re-enable if needed.")
+
+    elif removed.source == "device_code" and provider == "nous":
        from hermes_cli.auth import (
            _load_auth_store, _save_auth_store, _auth_store_lock,
        )
@@ -0,0 +1,294 @@
+"""
+DingTalk Device Flow authorization.
+
+Implements the same 3-step registration flow as dingtalk-openclaw-connector:
+  1. POST /app/registration/init   → get nonce
+  2. POST /app/registration/begin  → get device_code + verification_uri_complete
+  3. POST /app/registration/poll   → poll until SUCCESS → get client_id + client_secret
+
+The verification_uri_complete is rendered as a QR code in the terminal so the
+user can scan it with DingTalk to authorize, yielding AppKey + AppSecret
+automatically.
+"""
+
+from __future__ import annotations
+
+import io
+import os
+import sys
+import time
+import logging
+from typing import Optional, Tuple
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+# ── Configuration ──────────────────────────────────────────────────────────
+
+REGISTRATION_BASE_URL = os.environ.get(
+    "DINGTALK_REGISTRATION_BASE_URL", "https://oapi.dingtalk.com"
+).rstrip("/")
+
+REGISTRATION_SOURCE = os.environ.get("DINGTALK_REGISTRATION_SOURCE", "openClaw")
+
+
+# ── API helpers ────────────────────────────────────────────────────────────
+
+class RegistrationError(Exception):
+    """Raised when a DingTalk registration API call fails."""
+
+
+def _api_post(path: str, payload: dict) -> dict:
+    """POST to the registration API and return the parsed JSON body."""
+    url = f"{REGISTRATION_BASE_URL}{path}"
+    try:
+        resp = requests.post(url, json=payload, timeout=15)
+        resp.raise_for_status()
+        data = resp.json()
+    except requests.RequestException as exc:
+        raise RegistrationError(f"Network error calling {url}: {exc}") from exc
+
+    errcode = data.get("errcode", -1)
+    if errcode != 0:
+        errmsg = data.get("errmsg", "unknown error")
+        raise RegistrationError(f"API error [{path}]: {errmsg} (errcode={errcode})")
+    return data
+
+
+# ── Core flow ──────────────────────────────────────────────────────────────
+
+def begin_registration() -> dict:
+    """Start a device-flow registration.
+
+    Returns a dict with keys:
+        device_code, verification_uri_complete, expires_in, interval
+    """
+    # Step 1: init → nonce
+    init_data = _api_post("/app/registration/init", {"source": REGISTRATION_SOURCE})
+    nonce = str(init_data.get("nonce", "")).strip()
+    if not nonce:
+        raise RegistrationError("init response missing nonce")
+
+    # Step 2: begin → device_code, verification_uri_complete
+    begin_data = _api_post("/app/registration/begin", {"nonce": nonce})
+    device_code = str(begin_data.get("device_code", "")).strip()
+    verification_uri_complete = str(begin_data.get("verification_uri_complete", "")).strip()
+    if not device_code:
+        raise RegistrationError("begin response missing device_code")
+    if not verification_uri_complete:
+        raise RegistrationError("begin response missing verification_uri_complete")
+
+    return {
+        "device_code": device_code,
+        "verification_uri_complete": verification_uri_complete,
+        "expires_in": int(begin_data.get("expires_in", 7200)),
+        "interval": max(int(begin_data.get("interval", 3)), 2),
+    }
+
+
+def poll_registration(device_code: str) -> dict:
+    """Poll the registration status once.
+
+    Returns a dict with keys:  status, client_id?, client_secret?, fail_reason?
+    """
+    data = _api_post("/app/registration/poll", {"device_code": device_code})
+    status_raw = str(data.get("status", "")).strip().upper()
+    if status_raw not in ("WAITING", "SUCCESS", "FAIL", "EXPIRED"):
+        status_raw = "UNKNOWN"
+    return {
+        "status": status_raw,
+        "client_id": str(data.get("client_id", "")).strip() or None,
+        "client_secret": str(data.get("client_secret", "")).strip() or None,
+        "fail_reason": str(data.get("fail_reason", "")).strip() or None,
+    }
+
+
+def wait_for_registration_success(
+    device_code: str,
+    interval: int = 3,
+    expires_in: int = 7200,
+    on_waiting: Optional[callable] = None,
+) -> Tuple[str, str]:
+    """Block until the registration succeeds or times out.
+
+    Returns (client_id, client_secret).
+    """
+    deadline = time.monotonic() + expires_in
+    retry_window = 120  # 2 minutes for transient errors
+    retry_start = 0.0
+
+    while time.monotonic() < deadline:
+        time.sleep(interval)
+        try:
+            result = poll_registration(device_code)
+        except RegistrationError:
+            if retry_start == 0:
+                retry_start = time.monotonic()
+            if time.monotonic() - retry_start < retry_window:
+                continue
+            raise
+
+        status = result["status"]
+        if status == "WAITING":
+            retry_start = 0
+            if on_waiting:
+                on_waiting()
+            continue
+        if status == "SUCCESS":
+            cid = result["client_id"]
+            csecret = result["client_secret"]
+            if not cid or not csecret:
+                raise RegistrationError("authorization succeeded but credentials are missing")
+            return cid, csecret
+        # FAIL / EXPIRED / UNKNOWN
+        if retry_start == 0:
+            retry_start = time.monotonic()
+        if time.monotonic() - retry_start < retry_window:
+            continue
+        reason = result.get("fail_reason") or status
+        raise RegistrationError(f"authorization failed: {reason}")
+
+    raise RegistrationError("authorization timed out, please retry")
+
+
+# ── QR code rendering ─────────────────────────────────────────────────────
+
+def _ensure_qrcode_installed() -> bool:
+    """Try to import qrcode; if missing, auto-install it via pip/uv."""
+    try:
+        import qrcode  # noqa: F401
+        return True
+    except ImportError:
+        pass
+
+    import subprocess
+
+    # Try uv first (Hermes convention), then pip
+    for cmd in (
+        [sys.executable, "-m", "uv", "pip", "install", "qrcode"],
+        [sys.executable, "-m", "pip", "install", "-q", "qrcode"],
+    ):
+        try:
+            subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+            import qrcode  # noqa: F401,F811
+            return True
+        except (subprocess.CalledProcessError, ImportError, FileNotFoundError):
+            continue
+    return False
+
+
+def render_qr_to_terminal(url: str) -> bool:
+    """Render *url* as a compact QR code in the terminal.
+
+    Returns True if the QR code was printed, False if the library is missing.
+    """
+    try:
+        import qrcode
+    except ImportError:
+        return False
+
+    qr = qrcode.QRCode(
+        version=1,
+        error_correction=qrcode.constants.ERROR_CORRECT_L,
+        box_size=1,
+        border=1,
+    )
+    qr.add_data(url)
+    qr.make(fit=True)
+
+    # Use half-block characters for compact rendering (2 rows per character)
+    matrix = qr.get_matrix()
+    rows = len(matrix)
+    lines: list[str] = []
+
+    TOP_HALF = "\u2580"      # ▀
+    BOTTOM_HALF = "\u2584"   # ▄
+    FULL_BLOCK = "\u2588"    # █
+    EMPTY = " "
+
+    for r in range(0, rows, 2):
+        line_chars: list[str] = []
+        for c in range(len(matrix[r])):
+            top = matrix[r][c]
+            bottom = matrix[r + 1][c] if r + 1 < rows else False
+            if top and bottom:
+                line_chars.append(FULL_BLOCK)
+            elif top:
+                line_chars.append(TOP_HALF)
+            elif bottom:
+                line_chars.append(BOTTOM_HALF)
+            else:
+                line_chars.append(EMPTY)
+        lines.append("    " + "".join(line_chars))
+
+    print("\n".join(lines))
+    return True
+
+
+# ── High-level entry point for the setup wizard ───────────────────────────
+
+def dingtalk_qr_auth() -> Optional[Tuple[str, str]]:
+    """Run the interactive QR-code device-flow authorization.
+
+    Returns (client_id, client_secret) on success, or None if the user
+    cancelled or the flow failed.
+    """
+    from hermes_cli.setup import print_info, print_success, print_warning, print_error
+
+    print()
+    print_info("  Initializing DingTalk device authorization...")
+    print_info("  Note: the scan page is branded 'OpenClaw' — DingTalk's")
+    print_info("        ecosystem onboarding bridge. Safe to use.")
+
+    try:
+        reg = begin_registration()
+    except RegistrationError as exc:
+        print_error(f"  Authorization init failed: {exc}")
+        return None
+
+    url = reg["verification_uri_complete"]
+
+    # Ensure qrcode library is available (auto-install if missing)
+    if not _ensure_qrcode_installed():
+        print_warning("  qrcode library install failed, will show link only.")
+
+    print()
+    print_info("  Please scan the QR code below with DingTalk to authorize:")
+    print()
+
+    if not render_qr_to_terminal(url):
+        print_warning(f"  QR code render failed, please open the link below to authorize:")
+
+    print()
+    print_info(f"  Or open this link manually: {url}")
+    print()
+    print_info("  Waiting for QR scan authorization... (timeout: 2 hours)")
+
+    dot_count = 0
+
+    def _on_waiting():
+        nonlocal dot_count
+        dot_count += 1
+        if dot_count % 10 == 0:
+            sys.stdout.write(".")
+            sys.stdout.flush()
+
+    try:
+        client_id, client_secret = wait_for_registration_success(
+            device_code=reg["device_code"],
+            interval=reg["interval"],
+            expires_in=reg["expires_in"],
+            on_waiting=_on_waiting,
+        )
+    except RegistrationError as exc:
+        print()
+        print_error(f"  Authorization failed: {exc}")
+        return None
+
+    print()
+    print_success("  QR scan authorization successful!")
+    print_success(f"  Client ID:     {client_id}")
+    print_success(f"  Client Secret: {client_secret[:8]}{'*' * (len(client_secret) - 8)}")
+
+    return client_id, client_secret
@@ -2211,9 +2211,62 @@ def _setup_sms():


 def _setup_dingtalk():
-    """Configure DingTalk via the standard platform setup."""
+    """Configure DingTalk — QR scan (recommended) or manual credential entry."""
+    from hermes_cli.setup import (
+        prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
+    )
+
    dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
-    _setup_standard_platform(dingtalk_platform)
+    emoji = dingtalk_platform["emoji"]
+    label = dingtalk_platform["label"]
+
+    print()
+    print(color(f"  ─── {emoji} {label} Setup ───", Colors.CYAN))
+
+    existing = get_env_value("DINGTALK_CLIENT_ID")
+    if existing:
+        print()
+        print_success(f"{label} is already configured (Client ID: {existing}).")
+        if not prompt_yes_no(f"  Reconfigure {label}?", False):
+            return
+
+    print()
+    method = prompt_choice(
+        "  Choose setup method",
+        [
+            "QR Code Scan (Recommended, auto-obtain Client ID and Client Secret)",
+            "Manual Input (Client ID and Client Secret)",
+        ],
+        default=0,
+    )
+
+    if method == 0:
+        # ── QR-code device-flow authorization ──
+        try:
+            from hermes_cli.dingtalk_auth import dingtalk_qr_auth
+        except ImportError as exc:
+            print_warning(f"  QR auth module failed to load ({exc}), falling back to manual input.")
+            _setup_standard_platform(dingtalk_platform)
+            return
+
+        result = dingtalk_qr_auth()
+        if result is None:
+            print_warning("  QR auth incomplete, falling back to manual input.")
+            _setup_standard_platform(dingtalk_platform)
+            return
+
+        client_id, client_secret = result
+        save_env_value("DINGTALK_CLIENT_ID", client_id)
+        save_env_value("DINGTALK_CLIENT_SECRET", client_secret)
+        save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
+        print()
+        print_success(f"{emoji} {label} configured via QR scan!")
+    else:
+        # ── Manual entry ──
+        _setup_standard_platform(dingtalk_platform)
+        # Also enable allow-all by default for convenience
+        if get_env_value("DINGTALK_CLIENT_ID"):
+            save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")


 def _setup_wecom():
@@ -2749,6 +2802,8 @@ def gateway_setup():
            _setup_signal()
        elif platform["key"] == "weixin":
            _setup_weixin()
+        elif platform["key"] == "dingtalk":
+            _setup_dingtalk()
        elif platform["key"] == "feishu":
            _setup_feishu()
        else:
@@ -2472,10 +2472,10 @@ def _model_flow_kimi(config, current_model=""):

    # Step 3: Model selection — show appropriate models for the endpoint
    if is_coding_plan:
-        # Coding Plan models (kimi-for-coding first)
+        # Coding Plan models (kimi-k2.5 first)
        model_list = [
-            "kimi-for-coding",
            "kimi-k2.5",
+            "kimi-for-coding",
            "kimi-k2-thinking",
            "kimi-k2-thinking-turbo",
        ]
@@ -5600,6 +5600,25 @@ Examples:
    skills_uninstall = skills_subparsers.add_parser("uninstall", help="Remove a hub-installed skill")
    skills_uninstall.add_argument("name", help="Skill name to remove")

+    skills_reset = skills_subparsers.add_parser(
+        "reset",
+        help="Reset a bundled skill — clears 'user-modified' tracking so updates work again",
+        description=(
+            "Clear a bundled skill's entry from the sync manifest (~/.hermes/skills/.bundled_manifest) "
+            "so future 'hermes update' runs stop marking it as user-modified. Pass --restore to also "
+            "replace the current copy with the bundled version."
+        ),
+    )
+    skills_reset.add_argument("name", help="Skill name to reset (e.g. google-workspace)")
+    skills_reset.add_argument(
+        "--restore", action="store_true",
+        help="Also delete the current copy and re-copy the bundled version",
+    )
+    skills_reset.add_argument(
+        "--yes", "-y", action="store_true",
+        help="Skip confirmation prompt when using --restore",
+    )
+
    skills_publish = skills_subparsers.add_parser("publish", help="Publish a skill to a registry")
    skills_publish.add_argument("skill_path", help="Path to skill directory")
    skills_publish.add_argument("--to", default="github", choices=["github", "clawhub"], help="Target registry")
@@ -5904,6 +5923,12 @@ Examples:
    mcp_cfg_p = mcp_sub.add_parser("configure", aliases=["config"], help="Toggle tool selection")
    mcp_cfg_p.add_argument("name", help="Server name to configure")

+    mcp_login_p = mcp_sub.add_parser(
+        "login",
+        help="Force re-authentication for an OAuth-based MCP server",
+    )
+    mcp_login_p.add_argument("name", help="Server name to re-authenticate")
+
    def cmd_mcp(args):
        from hermes_cli.mcp_config import mcp_command
        mcp_command(args)
@@ -279,8 +279,8 @@ def cmd_mcp_add(args):
        _info(f"Starting OAuth flow for '{name}'...")
        oauth_ok = False
        try:
-            from tools.mcp_oauth import build_oauth_auth
-            oauth_auth = build_oauth_auth(name, url)
+            from tools.mcp_oauth_manager import get_manager
+            oauth_auth = get_manager().get_or_build_provider(name, url, None)
            if oauth_auth:
                server_config["auth"] = "oauth"
                _success("OAuth configured (tokens will be acquired on first connection)")
@@ -428,10 +428,12 @@ def cmd_mcp_remove(args):
    _remove_mcp_server(name)
    _success(f"Removed '{name}' from config")

-    # Clean up OAuth tokens if they exist
+    # Clean up OAuth tokens if they exist — route through MCPOAuthManager so
+    # any provider instance cached in the current process (e.g. from an
+    # earlier `hermes mcp test` in the same session) is evicted too.
    try:
-        from tools.mcp_oauth import remove_oauth_tokens
-        remove_oauth_tokens(name)
+        from tools.mcp_oauth_manager import get_manager
+        get_manager().remove(name)
        _success("Cleaned up OAuth tokens")
    except Exception:
        pass
@@ -577,6 +579,63 @@ def _interpolate_value(value: str) -> str:
    return re.sub(r"\$\{(\w+)\}", _replace, value)


+# ─── hermes mcp login ────────────────────────────────────────────────────────
+
+def cmd_mcp_login(args):
+    """Force re-authentication for an OAuth-based MCP server.
+
+    Deletes cached tokens (both on disk and in the running process's
+    MCPOAuthManager cache) and triggers a fresh OAuth flow via the
+    existing probe path.
+
+    Use this when:
+      - Tokens are stuck in a bad state (server revoked, refresh token
+        consumed by an external process, etc.)
+      - You want to re-authenticate to change scopes or account
+      - A tool call returned ``needs_reauth: true``
+    """
+    name = args.name
+    servers = _get_mcp_servers()
+
+    if name not in servers:
+        _error(f"Server '{name}' not found in config.")
+        if servers:
+            _info(f"Available servers: {', '.join(servers)}")
+        return
+
+    server_config = servers[name]
+    url = server_config.get("url")
+    if not url:
+        _error(f"Server '{name}' has no URL — not an OAuth-capable server")
+        return
+    if server_config.get("auth") != "oauth":
+        _error(f"Server '{name}' is not configured for OAuth (auth={server_config.get('auth')})")
+        _info("Use `hermes mcp remove` + `hermes mcp add` to reconfigure auth.")
+        return
+
+    # Wipe both disk and in-memory cache so the next probe forces a fresh
+    # OAuth flow.
+    try:
+        from tools.mcp_oauth_manager import get_manager
+        mgr = get_manager()
+        mgr.remove(name)
+    except Exception as exc:
+        _warning(f"Could not clear existing OAuth state: {exc}")
+
+    print()
+    _info(f"Starting OAuth flow for '{name}'...")
+
+    # Probe triggers the OAuth flow (browser redirect + callback capture).
+    try:
+        tools = _probe_single_server(name, server_config)
+        if tools:
+            _success(f"Authenticated — {len(tools)} tool(s) available")
+        else:
+            _success("Authenticated (server reported no tools)")
+    except Exception as exc:
+        _error(f"Authentication failed: {exc}")
+
+
 # ─── hermes mcp configure ────────────────────────────────────────────────────

 def cmd_mcp_configure(args):
@@ -696,6 +755,7 @@ def mcp_command(args):
        "test": cmd_mcp_test,
        "configure": cmd_mcp_configure,
        "config": cmd_mcp_configure,
+        "login": cmd_mcp_login,
    }

    handler = handlers.get(action)
@@ -713,4 +773,5 @@ def mcp_command(args):
        _info("hermes mcp list                               List servers")
        _info("hermes mcp test <name>                        Test connection")
        _info("hermes mcp configure <name>                   Toggle tools")
+        _info("hermes mcp login <name>                       Re-authenticate OAuth")
        print()
@@ -374,7 +374,26 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
            return bare
        return _dots_to_hyphens(bare)

-    # --- Copilot: strip matching provider prefix, keep dots ---
+    # --- Copilot / Copilot ACP: delegate to the Copilot-specific
+    #     normalizer.  It knows about the alias table (vendor-prefix
+    #     stripping for Anthropic/OpenAI, dash-to-dot repair for Claude)
+    #     and live-catalog lookups.  Without this, vendor-prefixed or
+    #     dash-notation Claude IDs survive to the Copilot API and hit
+    #     HTTP 400 "model_not_supported".  See issue #6879.
+    if provider in {"copilot", "copilot-acp"}:
+        try:
+            from hermes_cli.models import normalize_copilot_model_id
+
+            normalized = normalize_copilot_model_id(name)
+            if normalized:
+                return normalized
+        except Exception:
+            # Fall through to the generic strip-vendor behaviour below
+            # if the Copilot-specific path is unavailable for any reason.
+            pass
+
+    # --- Copilot / Copilot ACP / openai-codex fallback:
+    #     strip matching provider prefix, keep dots ---
    if provider in _STRIP_VENDOR_ONLY_PROVIDERS:
        stripped = _strip_matching_provider_prefix(name, provider)
        if stripped == name and name.startswith("openai/"):
@@ -727,6 +727,22 @@ def switch_model(
    if not api_mode:
        api_mode = determine_api_mode(target_provider, base_url)

+    # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
+    # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
+    # trailing /v1 so the SDK constructs the correct path (e.g.
+    # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
+    # Mirrors the same logic in hermes_cli.runtime_provider.resolve_runtime_provider;
+    # without it, /model switches into an anthropic_messages-routed OpenCode
+    # model (e.g. `/model minimax-m2.7` on opencode-go, `/model claude-sonnet-4-6`
+    # on opencode-zen) hit a double /v1 and returned OpenCode's website 404 page.
+    if (
+        api_mode == "anthropic_messages"
+        and target_provider in {"opencode-zen", "opencode-go"}
+        and isinstance(base_url, str)
+        and base_url
+    ):
+        base_url = re.sub(r"/v1/?$", "", base_url)
+
    # --- Get capabilities (legacy) ---
    capabilities = get_model_capabilities(target_provider, new_model)

@@ -26,7 +26,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("anthropic/claude-opus-4.7",       "recommended"),
+    ("moonshotai/kimi-k2.5",            "recommended"),
+    ("anthropic/claude-opus-4.7",       ""),
    ("anthropic/claude-opus-4.6",       ""),
    ("anthropic/claude-sonnet-4.6",     ""),
    ("qwen/qwen3.6-plus",               ""),
@@ -49,7 +50,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("z-ai/glm-5.1",                    ""),
    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
-    ("moonshotai/kimi-k2.5",            ""),
    ("x-ai/grok-4.20",                  ""),
    ("nvidia/nemotron-3-super-120b-a12b",      ""),
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
@@ -75,7 +75,9 @@ def _codex_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
+        "moonshotai/kimi-k2.5",
        "xiaomi/mimo-v2-pro",
+        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
@@ -95,7 +97,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "z-ai/glm-5.1",
        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
-        "moonshotai/kimi-k2.5",
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
        "nvidia/nemotron-3-super-120b-a12b:free",
@@ -155,8 +156,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "grok-4-1-fast-reasoning",
    ],
    "kimi-coding": [
-        "kimi-for-coding",
        "kimi-k2.5",
+        "kimi-for-coding",
        "kimi-k2-thinking",
        "kimi-k2-thinking-turbo",
        "kimi-k2-turbo-preview",
@@ -211,6 +212,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "trinity-mini",
    ],
    "opencode-zen": [
+        "kimi-k2.5",
        "gpt-5.4-pro",
        "gpt-5.4",
        "gpt-5.3-codex",
@@ -242,16 +244,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-5",
        "glm-4.7",
        "glm-4.6",
-        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2",
        "qwen3-coder",
        "big-pickle",
    ],
    "opencode-go": [
+        "kimi-k2.5",
        "glm-5.1",
        "glm-5",
-        "kimi-k2.5",
        "mimo-v2-pro",
        "mimo-v2-omni",
        "minimax-m2.7",
@@ -284,21 +285,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
    # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
    "alibaba": [
+        "kimi-k2.5",
        "qwen3.5-plus",
        "qwen3-coder-plus",
        "qwen3-coder-next",
        # Third-party models available on coding-intl
        "glm-5",
        "glm-4.7",
-        "kimi-k2.5",
        "MiniMax-M2.5",
    ],
    # Curated HF model list — only agentic models that map to OpenRouter defaults.
    "huggingface": [
+        "moonshotai/Kimi-K2.5",
        "Qwen/Qwen3.5-397B-A17B",
        "Qwen/Qwen3.5-35B-A3B",
        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2.5",
        "MiniMaxAI/MiniMax-M2.5",
        "zai-org/GLM-5",
        "XiaomiMiMo/MiMo-V2-Flash",
@@ -1487,6 +1488,19 @@ _COPILOT_MODEL_ALIASES = {
    "anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
    "anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
    "anthropic/claude-haiku-4.5": "claude-haiku-4.5",
+    # Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use
+    # hyphens (anthropic native format), but Copilot's API only accepts
+    # dot-notation.  Accept both so users who configure copilot + a
+    # default hyphenated Claude model don't hit HTTP 400
+    # "model_not_supported".  See issue #6879.
+    "claude-opus-4-6": "claude-opus-4.6",
+    "claude-sonnet-4-6": "claude-sonnet-4.6",
+    "claude-sonnet-4-5": "claude-sonnet-4.5",
+    "claude-haiku-4-5": "claude-haiku-4.5",
+    "anthropic/claude-opus-4-6": "claude-opus-4.6",
+    "anthropic/claude-sonnet-4-6": "claude-sonnet-4.6",
+    "anthropic/claude-sonnet-4-5": "claude-sonnet-4.5",
+    "anthropic/claude-haiku-4-5": "claude-haiku-4.5",
 }


@@ -684,6 +684,51 @@ def do_uninstall(name: str, console: Optional[Console] = None,
        c.print(f"[bold red]Error:[/] {msg}\n")


+def do_reset(name: str, restore: bool = False,
+             console: Optional[Console] = None,
+             skip_confirm: bool = False,
+             invalidate_cache: bool = True) -> None:
+    """Reset a bundled skill's manifest tracking (+ optionally restore from bundled)."""
+    from tools.skills_sync import reset_bundled_skill
+
+    c = console or _console
+
+    if not skip_confirm and restore:
+        c.print(f"\n[bold]Restore '{name}' from bundled source?[/]")
+        c.print("[dim]This will DELETE your current copy and re-copy the bundled version.[/]")
+        try:
+            answer = input("Confirm [y/N]: ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            answer = "n"
+        if answer not in ("y", "yes"):
+            c.print("[dim]Cancelled.[/]\n")
+            return
+
+    result = reset_bundled_skill(name, restore=restore)
+
+    if not result["ok"]:
+        c.print(f"[bold red]Error:[/] {result['message']}\n")
+        return
+
+    c.print(f"[bold green]{result['message']}[/]")
+    synced = result.get("synced") or {}
+    if synced.get("copied"):
+        c.print(f"[dim]Copied: {', '.join(synced['copied'])}[/]")
+    if synced.get("updated"):
+        c.print(f"[dim]Updated: {', '.join(synced['updated'])}[/]")
+    c.print()
+
+    if invalidate_cache:
+        try:
+            from agent.prompt_builder import clear_skills_system_prompt_cache
+            clear_skills_system_prompt_cache(clear_snapshot=True)
+        except Exception:
+            pass
+    else:
+        c.print("[dim]Change will take effect in your next session.[/]")
+        c.print("[dim]Use /reset to start a new session now, or --now to apply immediately (invalidates prompt cache).[/]\n")
+
+
 def do_tap(action: str, repo: str = "", console: Optional[Console] = None) -> None:
    """Manage taps (custom GitHub repo sources)."""
    from tools.skills_hub import TapsManager
@@ -1007,6 +1052,9 @@ def skills_command(args) -> None:
        do_audit(name=getattr(args, "name", None))
    elif action == "uninstall":
        do_uninstall(args.name)
+    elif action == "reset":
+        do_reset(args.name, restore=getattr(args, "restore", False),
+                 skip_confirm=getattr(args, "yes", False))
    elif action == "publish":
        do_publish(
            args.skill_path,
@@ -1029,7 +1077,7 @@ def skills_command(args) -> None:
            return
        do_tap(tap_action, repo=repo)
    else:
-        _console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|publish|snapshot|tap]\n")
+        _console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|reset|publish|snapshot|tap]\n")
        _console.print("Run 'hermes skills <command> --help' for details.\n")


@@ -1175,6 +1223,19 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        do_uninstall(args[0], console=c, skip_confirm=skip_confirm,
                     invalidate_cache=invalidate_cache)

+    elif action == "reset":
+        if not args:
+            c.print("[bold red]Usage:[/] /skills reset <name> [--restore] [--now]\n")
+            c.print("[dim]Clears the bundled-skills manifest entry so future updates stop marking it as user-modified.[/]")
+            c.print("[dim]Pass --restore to also replace the current copy with the bundled version.[/]\n")
+            return
+        name = args[0]
+        restore = "--restore" in args
+        invalidate_cache = "--now" in args
+        # Slash commands can't prompt — --restore in slash mode is implicit consent.
+        do_reset(name, restore=restore, console=c, skip_confirm=True,
+                 invalidate_cache=invalidate_cache)
+
    elif action == "publish":
        if not args:
            c.print("[bold red]Usage:[/] /skills publish <skill-path> [--to github] [--repo owner/repo]\n")
@@ -1231,6 +1292,7 @@ def _print_skills_help(console: Console) -> None:
        "  [cyan]update[/] [name]               Update hub skills with upstream changes\n"
        "  [cyan]audit[/] [name]                Re-scan hub skills for security\n"
        "  [cyan]uninstall[/] <name>            Remove a hub-installed skill\n"
+        "  [cyan]reset[/] <name> [--restore]    Reset bundled-skill tracking (fix 'user-modified' flag)\n"
        "  [cyan]publish[/] <path> --repo <r>   Publish a skill to GitHub via PR\n"
        "  [cyan]snapshot[/] export|import      Export/import skill configurations\n"
        "  [cyan]tap[/] list|add|remove         Manage skill sources\n",
@@ -258,14 +258,16 @@ TOOL_CATEGORIES = {
                "requires_nous_auth": True,
                "managed_nous_feature": "image_gen",
                "override_env_vars": ["FAL_KEY"],
+                "imagegen_backend": "fal",
            },
            {
                "name": "FAL.ai",
                "badge": "paid",
-                "tag": "FLUX 2 Pro with auto-upscaling",
+                "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
                "env_vars": [
                    {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
                ],
+                "imagegen_backend": "fal",
            },
        ],
    },
@@ -510,7 +512,7 @@ def _get_platform_tools(
    """Resolve which individual toolset names are enabled for a platform."""
    from toolsets import resolve_toolset

-    platform_toolsets = config.get("platform_toolsets", {})
+    platform_toolsets = config.get("platform_toolsets") or {}
    toolset_names = platform_toolsets.get(platform)

    if toolset_names is None or not isinstance(toolset_names, list):
@@ -950,6 +952,106 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
    return 0


+# ─── Image Generation Model Pickers ───────────────────────────────────────────
+#
+# IMAGEGEN_BACKENDS is a per-backend catalog. Each entry exposes:
+#   - config_key:        top-level config.yaml key for this backend's settings
+#   - model_catalog_fn:  returns an OrderedDict-like {model_id: metadata}
+#   - default_model:     fallback when nothing is configured
+#
+# This prepares for future imagegen backends (Replicate, Stability, etc.):
+# each new backend registers its own entry; the FAL provider entry in
+# TOOL_CATEGORIES tags itself with `imagegen_backend: "fal"` to select the
+# right catalog at picker time.
+
+
+def _fal_model_catalog():
+    """Lazy-load the FAL model catalog from the tool module."""
+    from tools.image_generation_tool import FAL_MODELS, DEFAULT_MODEL
+    return FAL_MODELS, DEFAULT_MODEL
+
+
+IMAGEGEN_BACKENDS = {
+    "fal": {
+        "display": "FAL.ai",
+        "config_key": "image_gen",
+        "catalog_fn": _fal_model_catalog,
+    },
+}
+
+
+def _format_imagegen_model_row(model_id: str, meta: dict, widths: dict) -> str:
+    """Format a single picker row with column-aligned speed / strengths / price."""
+    return (
+        f"{model_id:<{widths['model']}}  "
+        f"{meta.get('speed', ''):<{widths['speed']}}  "
+        f"{meta.get('strengths', ''):<{widths['strengths']}}  "
+        f"{meta.get('price', '')}"
+    )
+
+
+def _configure_imagegen_model(backend_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for the given imagegen backend.
+
+    Writes selection to ``config[backend_config_key]["model"]``. Safe to
+    call even when stdin is not a TTY — curses_radiolist falls back to
+    keeping the current selection.
+    """
+    backend = IMAGEGEN_BACKENDS.get(backend_name)
+    if not backend:
+        return
+
+    catalog, default_model = backend["catalog_fn"]()
+    if not catalog:
+        return
+
+    cfg_key = backend["config_key"]
+    cur_cfg = config.setdefault(cfg_key, {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config[cfg_key] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    # Put current model at the top so the cursor lands on it by default.
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    # Column widths
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {backend['display']} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1006,6 +1108,10 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Imagegen backends prompt for model selection after backend pick.
+        backend = provider.get("imagegen_backend")
+        if backend:
+            _configure_imagegen_model(backend, config)
        return

    # Prompt for each required env var
@@ -1040,6 +1146,10 @@ def _configure_provider(provider: dict, config: dict):

    if all_configured:
        _print_success(f"  {provider['name']} configured!")
+        # Imagegen backends prompt for model selection after env vars are in.
+        backend = provider.get("imagegen_backend")
+        if backend:
+            _configure_imagegen_model(backend, config)


 def _configure_simple_requirements(ts_key: str):
@@ -1211,6 +1321,10 @@ def _reconfigure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Imagegen backends prompt for model selection on reconfig too.
+        backend = provider.get("imagegen_backend")
+        if backend:
+            _configure_imagegen_model(backend, config)
        return

    for var in env_vars:
@@ -1228,6 +1342,11 @@ def _reconfigure_provider(provider: dict, config: dict):
        else:
            _print_info("    Kept current")

+    # Imagegen backends prompt for model selection on reconfig too.
+    backend = provider.get("imagegen_backend")
+    if backend:
+        _configure_imagegen_model(backend, config)
+

 def _reconfigure_simple_requirements(ts_key: str):
    """Reconfigure simple env var requirements."""
@@ -0,0 +1,361 @@
+---
+name: concept-diagrams
+description: Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language with 9 semantic color ramps, sentence-case typography, and automatic dark mode. Best suited for educational and non-software visuals — physics setups, chemistry mechanisms, math curves, physical objects (aircraft, turbines, smartphones, mechanical watches), anatomy, floor plans, cross-sections, narrative journeys (lifecycle of X, process of Y), hub-spoke system integrations (smart city, IoT), and exploded layer views. If a more specialized skill exists for the subject (dedicated software/cloud architecture, hand-drawn sketches, animated explainers, etc.), prefer that — otherwise this skill can also serve as a general-purpose SVG diagram fallback with a clean educational look. Ships with 15 example diagrams.
+version: 0.1.0
+author: v1k22 (original PR), ported into hermes-agent
+license: MIT
+dependencies: []
+metadata:
+  hermes:
+    tags: [diagrams, svg, visualization, education, physics, chemistry, engineering]
+    related_skills: [architecture-diagram, excalidraw, generative-widgets]
+---
+
+# Concept Diagrams
+
+Generate production-quality SVG diagrams with a unified flat, minimal design system. Output is a single self-contained HTML file that renders identically in any modern browser, with automatic light/dark mode.
+
+## Scope
+
+**Best suited for:**
+- Physics setups, chemistry mechanisms, math curves, biology
+- Physical objects (aircraft, turbines, smartphones, mechanical watches, cells)
+- Anatomy, cross-sections, exploded layer views
+- Floor plans, architectural conversions
+- Narrative journeys (lifecycle of X, process of Y)
+- Hub-spoke system integrations (smart city, IoT networks, electricity grids)
+- Educational / textbook-style visuals in any domain
+- Quantitative charts (grouped bars, energy profiles)
+
+**Look elsewhere first for:**
+- Dedicated software / cloud infrastructure architecture with a dark tech aesthetic (consider `architecture-diagram` if available)
+- Hand-drawn whiteboard sketches (consider `excalidraw` if available)
+- Animated explainers or video output (consider an animation skill)
+
+If a more specialized skill is available for the subject, prefer that. If none fits, this skill can serve as a general-purpose SVG diagram fallback — the output will carry the clean educational aesthetic described below, which is a reasonable default for almost any subject.
+
+## Workflow
+
+1. Decide on the diagram type (see Diagram Types below).
+2. Lay out components using the Design System rules.
+3. Write the full HTML page using `templates/template.html` as the wrapper — paste your SVG where the template says `<!-- PASTE SVG HERE -->`.
+4. Save as a standalone `.html` file (for example `~/my-diagram.html` or `./my-diagram.html`).
+5. User opens it directly in a browser — no server, no dependencies.
+
+Optional: if the user wants a browsable gallery of multiple diagrams, see "Local Preview Server" at the bottom.
+
+Load the HTML template:
+```
+skill_view(name="concept-diagrams", file_path="templates/template.html")
+```
+
+The template embeds the full CSS design system (`c-*` color classes, text classes, light/dark variables, arrow marker styles). The SVG you generate relies on these classes being present on the hosting page.
+
+---
+
+## Design System
+
+### Philosophy
+
+- **Flat**: no gradients, drop shadows, blur, glow, or neon effects.
+- **Minimal**: show the essential. No decorative icons inside boxes.
+- **Consistent**: same colors, spacing, typography, and stroke widths across every diagram.
+- **Dark-mode ready**: all colors auto-adapt via CSS classes — no per-mode SVG.
+
+### Color Palette
+
+9 color ramps, each with 7 stops. Put the class name on a `<g>` or shape element; the template CSS handles both modes.
+
+| Class      | 50 (lightest) | 100     | 200     | 400     | 600     | 800     | 900 (darkest) |
+|------------|---------------|---------|---------|---------|---------|---------|---------------|
+| `c-purple` | #EEEDFE | #CECBF6 | #AFA9EC | #7F77DD | #534AB7 | #3C3489 | #26215C |
+| `c-teal`   | #E1F5EE | #9FE1CB | #5DCAA5 | #1D9E75 | #0F6E56 | #085041 | #04342C |
+| `c-coral`  | #FAECE7 | #F5C4B3 | #F0997B | #D85A30 | #993C1D | #712B13 | #4A1B0C |
+| `c-pink`   | #FBEAF0 | #F4C0D1 | #ED93B1 | #D4537E | #993556 | #72243E | #4B1528 |
+| `c-gray`   | #F1EFE8 | #D3D1C7 | #B4B2A9 | #888780 | #5F5E5A | #444441 | #2C2C2A |
+| `c-blue`   | #E6F1FB | #B5D4F4 | #85B7EB | #378ADD | #185FA5 | #0C447C | #042C53 |
+| `c-green`  | #EAF3DE | #C0DD97 | #97C459 | #639922 | #3B6D11 | #27500A | #173404 |
+| `c-amber`  | #FAEEDA | #FAC775 | #EF9F27 | #BA7517 | #854F0B | #633806 | #412402 |
+| `c-red`    | #FCEBEB | #F7C1C1 | #F09595 | #E24B4A | #A32D2D | #791F1F | #501313 |
+
+#### Color Assignment Rules
+
+Color encodes **meaning**, not sequence. Never cycle through colors like a rainbow.
+
+- Group nodes by **category** — all nodes of the same type share one color.
+- Use `c-gray` for neutral/structural nodes (start, end, generic steps, users).
+- Use **2-3 colors per diagram**, not 6+.
+- Prefer `c-purple`, `c-teal`, `c-coral`, `c-pink` for general categories.
+- Reserve `c-blue`, `c-green`, `c-amber`, `c-red` for semantic meaning (info, success, warning, error).
+
+Light/dark stop mapping (handled by the template CSS — just use the class):
+- Light mode: 50 fill + 600 stroke + 800 title / 600 subtitle
+- Dark mode:  800 fill + 200 stroke + 100 title / 200 subtitle
+
+### Typography
+
+Only two font sizes. No exceptions.
+
+| Class | Size | Weight | Use |
+|-------|------|--------|-----|
+| `th`  | 14px | 500    | Node titles, region labels |
+| `ts`  | 12px | 400    | Subtitles, descriptions, arrow labels |
+| `t`   | 14px | 400    | General text |
+
+- **Sentence case always.** Never Title Case, never ALL CAPS.
+- Every `<text>` MUST carry a class (`t`, `ts`, or `th`). No unclassed text.
+- `dominant-baseline="central"` on all text inside boxes.
+- `text-anchor="middle"` for centered text in boxes.
+
+**Width estimation (approx):**
+- 14px weight 500: ~8px per character
+- 12px weight 400: ~6.5px per character
+- Always verify: `box_width >= (char_count × px_per_char) + 48` (24px padding each side)
+
+### Spacing & Layout
+
+- **ViewBox**: `viewBox="0 0 680 H"` where H = content height + 40px buffer.
+- **Safe area**: x=40 to x=640, y=40 to y=(H-40).
+- **Between boxes**: 60px minimum gap.
+- **Inside boxes**: 24px horizontal padding, 12px vertical padding.
+- **Arrowhead gap**: 10px between arrowhead and box edge.
+- **Single-line box**: 44px height.
+- **Two-line box**: 56px height, 18px between title and subtitle baselines.
+- **Container padding**: 20px minimum inside every container.
+- **Max nesting**: 2-3 levels deep. Deeper gets unreadable at 680px width.
+
+### Stroke & Shape
+
+- **Stroke width**: 0.5px on all node borders. Not 1px, not 2px.
+- **Rect rounding**: `rx="8"` for nodes, `rx="12"` for inner containers, `rx="16"` to `rx="20"` for outer containers.
+- **Connector paths**: MUST have `fill="none"`. SVG defaults to `fill: black` otherwise.
+
+### Arrow Marker
+
+Include this `<defs>` block at the start of **every** SVG:
+
+```xml
+<defs>
+  <marker id="arrow" viewBox="0 0 10 10" refX="8" refY="5"
+          markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+    <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+          stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+  </marker>
+</defs>
+```
+
+Use `marker-end="url(#arrow)"` on lines. The arrowhead inherits the line color via `context-stroke`.
+
+### CSS Classes (Provided by the Template)
+
+The template page provides:
+
+- Text: `.t`, `.ts`, `.th`
+- Neutral: `.box`, `.arr`, `.leader`, `.node`
+- Color ramps: `.c-purple`, `.c-teal`, `.c-coral`, `.c-pink`, `.c-gray`, `.c-blue`, `.c-green`, `.c-amber`, `.c-red` (all with automatic light/dark mode)
+
+You do **not** need to redefine these — just apply them in your SVG. The template file contains the full CSS definitions.
+
+---
+
+## SVG Boilerplate
+
+Every SVG inside the template page starts with this exact structure:
+
+```xml
+<svg width="100%" viewBox="0 0 680 {HEIGHT}" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="arrow" viewBox="0 0 10 10" refX="8" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+      <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+            stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+    </marker>
+  </defs>
+
+  <!-- Diagram content here -->
+
+</svg>
+```
+
+Replace `{HEIGHT}` with the actual computed height (last element bottom + 40px).
+
+### Node Patterns
+
+**Single-line node (44px):**
+```xml
+<g class="node c-blue">
+  <rect x="100" y="20" width="180" height="44" rx="8" stroke-width="0.5"/>
+  <text class="th" x="190" y="42" text-anchor="middle" dominant-baseline="central">Service name</text>
+</g>
+```
+
+**Two-line node (56px):**
+```xml
+<g class="node c-teal">
+  <rect x="100" y="20" width="200" height="56" rx="8" stroke-width="0.5"/>
+  <text class="th" x="200" y="38" text-anchor="middle" dominant-baseline="central">Service name</text>
+  <text class="ts" x="200" y="56" text-anchor="middle" dominant-baseline="central">Short description</text>
+</g>
+```
+
+**Connector (no label):**
+```xml
+<line x1="200" y1="76" x2="200" y2="120" class="arr" marker-end="url(#arrow)"/>
+```
+
+**Container (dashed or solid):**
+```xml
+<g class="c-purple">
+  <rect x="40" y="92" width="600" height="300" rx="16" stroke-width="0.5"/>
+  <text class="th" x="66" y="116">Container label</text>
+  <text class="ts" x="66" y="134">Subtitle info</text>
+</g>
+```
+
+---
+
+## Diagram Types
+
+Choose the layout that fits the subject:
+
+1. **Flowchart** — CI/CD pipelines, request lifecycles, approval workflows, data processing. Single-direction flow (top-down or left-right). Max 4-5 nodes per row.
+2. **Structural / Containment** — Cloud infrastructure nesting, system architecture with layers. Large outer containers with inner regions. Dashed rects for logical groupings.
+3. **API / Endpoint Map** — REST routes, GraphQL schemas. Tree from root, branching to resource groups, each containing endpoint nodes.
+4. **Microservice Topology** — Service mesh, event-driven systems. Services as nodes, arrows for communication patterns, message queues between.
+5. **Data Flow** — ETL pipelines, streaming architectures. Left-to-right flow from sources through processing to sinks.
+6. **Physical / Structural** — Vehicles, buildings, hardware, anatomy. Use shapes that match the physical form — `<path>` for curved bodies, `<polygon>` for tapered shapes, `<ellipse>`/`<circle>` for cylindrical parts, nested `<rect>` for compartments. See `references/physical-shape-cookbook.md`.
+7. **Infrastructure / Systems Integration** — Smart cities, IoT networks, multi-domain systems. Hub-spoke layout with central platform connecting subsystems. Semantic line styles (`.data-line`, `.power-line`, `.water-pipe`, `.road`). See `references/infrastructure-patterns.md`.
+8. **UI / Dashboard Mockups** — Admin panels, monitoring dashboards. Screen frame with nested chart/gauge/indicator elements. See `references/dashboard-patterns.md`.
+
+For physical, infrastructure, and dashboard diagrams, load the matching reference file before generating — each one provides ready-made CSS classes and shape primitives.
+
+---
+
+## Validation Checklist
+
+Before finalizing any SVG, verify ALL of the following:
+
+1. Every `<text>` has class `t`, `ts`, or `th`.
+2. Every `<text>` inside a box has `dominant-baseline="central"`.
+3. Every connector `<path>` or `<line>` used as arrow has `fill="none"`.
+4. No arrow line crosses through an unrelated box.
+5. `box_width >= (longest_label_chars × 8) + 48` for 14px text.
+6. `box_width >= (longest_label_chars × 6.5) + 48` for 12px text.
+7. ViewBox height = bottom-most element + 40px.
+8. All content stays within x=40 to x=640.
+9. Color classes (`c-*`) are on `<g>` or shape elements, never on `<path>` connectors.
+10. Arrow `<defs>` block is present.
+11. No gradients, shadows, blur, or glow effects.
+12. Stroke width is 0.5px on all node borders.
+
+---
+
+## Output & Preview
+
+### Default: standalone HTML file
+
+Write a single `.html` file the user can open directly. No server, no dependencies, works offline. Pattern:
+
+```python
+# 1. Load the template
+template = skill_view("concept-diagrams", "templates/template.html")
+
+# 2. Fill in title, subtitle, and paste your SVG
+html = template.replace(
+    "<!-- DIAGRAM TITLE HERE -->", "SN2 reaction mechanism"
+).replace(
+    "<!-- OPTIONAL SUBTITLE HERE -->", "Bimolecular nucleophilic substitution"
+).replace(
+    "<!-- PASTE SVG HERE -->", svg_content
+)
+
+# 3. Write to a user-chosen path (or ./ by default)
+write_file("./sn2-mechanism.html", html)
+```
+
+Tell the user how to open it:
+
+```
+# macOS
+open ./sn2-mechanism.html
+# Linux
+xdg-open ./sn2-mechanism.html
+```
+
+### Optional: local preview server (multi-diagram gallery)
+
+Only use this when the user explicitly wants a browsable gallery of multiple diagrams.
+
+**Rules:**
+- Bind to `127.0.0.1` only. Never `0.0.0.0`. Exposing diagrams on all network interfaces is a security hazard on shared networks.
+- Pick a free port (do NOT hard-code one) and tell the user the chosen URL.
+- The server is optional and opt-in — prefer the standalone HTML file first.
+
+Recommended pattern (lets the OS pick a free ephemeral port):
+
+```bash
+# Put each diagram in its own folder under .diagrams/
+mkdir -p .diagrams/sn2-mechanism
+# ...write .diagrams/sn2-mechanism/index.html...
+
+# Serve on loopback only, free port
+cd .diagrams && python3 -c "
+import http.server, socketserver
+with socketserver.TCPServer(('127.0.0.1', 0), http.server.SimpleHTTPRequestHandler) as s:
+    print(f'Serving at http://127.0.0.1:{s.server_address[1]}/')
+    s.serve_forever()
+" &
+```
+
+If the user insists on a fixed port, use `127.0.0.1:<port>` — still never `0.0.0.0`. Document how to stop the server (`kill %1` or `pkill -f "http.server"`).
+
+---
+
+## Examples Reference
+
+The `examples/` directory ships 15 complete, tested diagrams. Browse them for working patterns before writing a new diagram of a similar type:
+
+| File | Type | Demonstrates |
+|------|------|--------------|
+| `hospital-emergency-department-flow.md` | Flowchart | Priority routing with semantic colors |
+| `feature-film-production-pipeline.md` | Flowchart | Phased workflow, horizontal sub-flows |
+| `automated-password-reset-flow.md` | Flowchart | Auth flow with error branches |
+| `autonomous-llm-research-agent-flow.md` | Flowchart | Loop-back arrows, decision branches |
+| `place-order-uml-sequence.md` | Sequence | UML sequence diagram style |
+| `commercial-aircraft-structure.md` | Physical | Paths, polygons, ellipses for realistic shapes |
+| `wind-turbine-structure.md` | Physical cross-section | Underground/above-ground separation, color coding |
+| `smartphone-layer-anatomy.md` | Exploded view | Alternating left/right labels, layered components |
+| `apartment-floor-plan-conversion.md` | Floor plan | Walls, doors, proposed changes in dotted red |
+| `banana-journey-tree-to-smoothie.md` | Narrative journey | Winding path, progressive state changes |
+| `cpu-ooo-microarchitecture.md` | Hardware pipeline | Fan-out, memory hierarchy sidebar |
+| `sn2-reaction-mechanism.md` | Chemistry | Molecules, curved arrows, energy profile |
+| `smart-city-infrastructure.md` | Hub-spoke | Semantic line styles per system |
+| `electricity-grid-flow.md` | Multi-stage flow | Voltage hierarchy, flow markers |
+| `ml-benchmark-grouped-bar-chart.md` | Chart | Grouped bars, dual axis |
+
+Load any example with:
+```
+skill_view(name="concept-diagrams", file_path="examples/<filename>")
+```
+
+---
+
+## Quick Reference: What to Use When
+
+| User says | Diagram type | Suggested colors |
+|-----------|--------------|------------------|
+| "show the pipeline" | Flowchart | gray start/end, purple steps, red errors, teal deploy |
+| "draw the data flow" | Data pipeline (left-right) | gray sources, purple processing, teal sinks |
+| "visualize the system" | Structural (containment) | purple container, teal services, coral data |
+| "map the endpoints" | API tree | purple root, one ramp per resource group |
+| "show the services" | Microservice topology | gray ingress, teal services, purple bus, coral workers |
+| "draw the aircraft/vehicle" | Physical | paths, polygons, ellipses for realistic shapes |
+| "smart city / IoT" | Hub-spoke integration | semantic line styles per subsystem |
+| "show the dashboard" | UI mockup | dark screen, chart colors: teal, purple, coral for alerts |
+| "power grid / electricity" | Multi-stage flow | voltage hierarchy (HV/MV/LV line weights) |
+| "wind turbine / turbine" | Physical cross-section | foundation + tower cutaway + nacelle color-coded |
+| "journey of X / lifecycle" | Narrative journey | winding path, progressive state changes |
+| "layers of X / exploded" | Exploded layer view | vertical stack, alternating labels |
+| "CPU / pipeline" | Hardware pipeline | vertical stages, fan-out to execution ports |
+| "floor plan / apartment" | Floor plan | walls, doors, proposed changes in dotted red |
+| "reaction mechanism" | Chemistry | atoms, bonds, curved arrows, transition state, energy profile |
@@ -0,0 +1,244 @@
+# Apartment Floor Plan: 3 BHK to 4 BHK Conversion
+
+An architectural floor plan showing a 1,500 sq ft apartment with proposed modifications to convert from 3 BHK to 4 BHK. Demonstrates architectural drawing conventions, room layouts, proposed changes with dotted lines, and area comparison tables.
+
+## Key Patterns Used
+
+- **Architectural floor plan**: Top-down view with walls, doors, windows
+- **Proposed modifications**: Dotted red lines for new walls
+- **Room color coding**: Light fills to distinguish room types
+- **Circulation paths**: Arrows showing new access routes
+- **Data table**: Before/after area comparison with highlighting
+- **Architectural symbols**: North arrow, scale bar, door swings
+
+## Diagram Type
+
+This is an **architectural floor plan** with:
+- **Plan view**: Top-down orthographic projection
+- **Overlay technique**: Existing structure + proposed changes
+- **Quantitative data**: Area measurements and comparison table
+
+## Architectural Drawing Elements
+
+### Wall Styles
+
+```xml
+<!-- Outer walls (thick) -->
+<line class="wall" x1="0" y1="0" x2="560" y2="0"/>
+
+<!-- Internal walls (thinner) -->
+<line class="wall-thin" x1="180" y1="0" x2="180" y2="140"/>
+
+<!-- Proposed new walls (dotted red) -->
+<line class="proposed-wall" x1="125" y1="170" x2="125" y2="330"/>
+```
+
+```css
+.wall { stroke: var(--text-primary); stroke-width: 6; fill: none; stroke-linecap: square; }
+.wall-thin { stroke: var(--text-primary); stroke-width: 3; fill: none; }
+.proposed-wall { stroke: #A32D2D; stroke-width: 4; fill: none; stroke-dasharray: 8 4; }
+```
+
+### Door Symbols
+
+```xml
+<!-- Door opening with swing arc -->
+<rect x="150" y="137" width="25" height="6" fill="var(--bg-primary)"/>
+<path class="door" d="M150,140 L150,165"/>
+<path class="door-swing" d="M150,140 A25,25 0 0,0 175,140"/>
+
+<!-- Sliding door (balcony) -->
+<rect x="60" y="327" width="60" height="6" fill="var(--bg-primary)" stroke="var(--text-secondary)" stroke-width="1"/>
+<line x1="60" y1="330" x2="90" y2="330" stroke="var(--text-secondary)" stroke-width="2"/>
+<line x1="90" y1="330" x2="120" y2="330" stroke="var(--text-secondary)" stroke-width="2" stroke-dasharray="3 3"/>
+
+<!-- Proposed door (dotted) -->
+<rect x="143" y="292" width="22" height="6" fill="var(--bg-primary)" stroke="#A32D2D" stroke-width="1" stroke-dasharray="3 2"/>
+<path d="M165,295 A22,22 0 0,0 165,273" stroke="#A32D2D" stroke-width="1" stroke-dasharray="3 2" fill="none"/>
+```
+
+```css
+.door { stroke: var(--text-secondary); stroke-width: 1.5; fill: none; }
+.door-swing { stroke: var(--text-tertiary); stroke-width: 1; fill: none; stroke-dasharray: 3 2; }
+```
+
+### Window Symbols
+
+```xml
+<!-- Window with glass indication -->
+<rect class="window" x="-3" y="30" width="6" height="50"/>
+<line class="window-glass" x1="0" y1="35" x2="0" y2="75"/>
+
+<!-- Horizontal window (top wall) -->
+<rect class="window" x="220" y="-3" width="60" height="6"/>
+<line class="window-glass" x1="225" y1="0" x2="275" y2="0"/>
+```
+
+```css
+.window { stroke: var(--text-primary); stroke-width: 1; fill: var(--bg-primary); }
+.window-glass { stroke: #378ADD; stroke-width: 2; fill: none; }
+```
+
+### Room Fills
+
+```xml
+<!-- Different colors for room types -->
+<rect class="room-master" x="3" y="3" width="174" height="134" rx="2"/>
+<rect class="room-bed2" x="183" y="3" width="134" height="104" rx="2"/>
+<rect class="room-living" x="3" y="173" width="554" height="154" rx="2"/>
+<rect class="room-kitchen" x="443" y="3" width="114" height="104" rx="2"/>
+<rect class="room-bath" x="183" y="113" width="54" height="54" rx="2"/>
+
+<!-- Proposed new room (highlighted) -->
+<rect class="room-new" x="3" y="223" width="120" height="104"/>
+```
+
+```css
+.room-master { fill: rgba(206, 203, 246, 0.3); }  /* purple tint */
+.room-bed2 { fill: rgba(159, 225, 203, 0.3); }    /* teal tint */
+.room-bed3 { fill: rgba(250, 199, 117, 0.3); }    /* amber tint */
+.room-living { fill: rgba(245, 196, 179, 0.3); }  /* coral tint */
+.room-kitchen { fill: rgba(237, 147, 177, 0.3); } /* pink tint */
+.room-bath { fill: rgba(133, 183, 235, 0.3); }    /* blue tint */
+.room-new { fill: rgba(163, 45, 45, 0.15); }      /* red tint for proposed */
+```
+
+### Support Fixtures
+
+```xml
+<!-- Kitchen counter hint -->
+<rect x="450" y="15" width="50" height="25" fill="none" stroke="var(--text-tertiary)" stroke-width="0.5" rx="2"/>
+<text class="tx" x="475" y="30" text-anchor="middle">Counter</text>
+
+<!-- Balcony (dashed outline) -->
+<rect class="balcony-fill" x="3" y="333" width="200" height="50"/>
+```
+
+```css
+.balcony { fill: none; stroke: var(--text-secondary); stroke-width: 2; stroke-dasharray: 6 3; }
+.balcony-fill { fill: rgba(93, 202, 165, 0.1); }
+```
+
+### Room Labels
+
+```xml
+<!-- Room name and area -->
+<text class="room-label" x="90" y="65" text-anchor="middle">MASTER</text>
+<text class="room-label" x="90" y="78" text-anchor="middle">BEDROOM</text>
+<text class="area-label" x="90" y="95" text-anchor="middle">195 sq ft</text>
+
+<!-- Proposed room (in red) -->
+<text class="room-label" x="63" y="268" text-anchor="middle" fill="#A32D2D">BEDROOM 4</text>
+<text class="tx" x="63" y="282" text-anchor="middle" fill="#A32D2D">(NEW)</text>
+```
+
+```css
+.room-label { font-family: system-ui; font-size: 11px; fill: var(--text-primary); font-weight: 500; }
+.area-label { font-family: system-ui; font-size: 9px; fill: var(--text-tertiary); }
+```
+
+### Circulation Arrow
+
+```xml
+<defs>
+  <marker id="circ-arrow" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+    <path d="M0,0 L10,5 L0,10 Z" class="circulation-fill"/>
+  </marker>
+</defs>
+
+<path class="circulation" d="M300,250 L200,250 L145,250 L145,280" marker-end="url(#circ-arrow)"/>
+<text class="tx" x="250" y="242" fill="#3B6D11" font-weight="500">New corridor access</text>
+```
+
+```css
+.circulation { stroke: #3B6D11; stroke-width: 2; fill: none; }
+.circulation-fill { fill: #3B6D11; }
+```
+
+### North Arrow and Scale Bar
+
+```xml
+<!-- North arrow -->
+<g transform="translate(520, 260)">
+  <circle cx="0" cy="0" r="20" fill="none" stroke="var(--text-tertiary)" stroke-width="0.5"/>
+  <polygon points="0,-18 -5,5 0,0 5,5" fill="var(--text-primary)"/>
+  <text class="tx" x="0" y="-22" text-anchor="middle">N</text>
+</g>
+
+<!-- Scale bar -->
+<g transform="translate(420, 300)">
+  <line x1="0" y1="0" x2="100" y2="0" stroke="var(--text-primary)" stroke-width="2"/>
+  <line x1="0" y1="-5" x2="0" y2="5" stroke="var(--text-primary)" stroke-width="1"/>
+  <line x1="50" y1="-3" x2="50" y2="3" stroke="var(--text-primary)" stroke-width="1"/>
+  <line x1="100" y1="-5" x2="100" y2="5" stroke="var(--text-primary)" stroke-width="1"/>
+  <text class="tx" x="0" y="15" text-anchor="middle">0</text>
+  <text class="tx" x="50" y="15" text-anchor="middle">5'</text>
+  <text class="tx" x="100" y="15" text-anchor="middle">10'</text>
+</g>
+```
+
+## Area Comparison Table
+
+### Table Structure
+
+```xml
+<!-- Header row -->
+<rect class="table-header" x="0" y="0" width="180" height="28" rx="4 4 0 0"/>
+<text class="ts" x="90" y="18" text-anchor="middle" font-weight="500">Room</text>
+
+<!-- Normal row -->
+<rect class="table-row" x="0" y="28" width="180" height="24"/>
+<text class="tx" x="10" y="44">Master Bedroom</text>
+<text class="tx" x="230" y="44" text-anchor="middle">195</text>
+
+<!-- Alternating row -->
+<rect class="table-row-alt" x="0" y="52" width="180" height="24"/>
+
+<!-- Highlighted row (for changes) -->
+<rect class="table-highlight" x="0" y="100" width="180" height="24"/>
+<text class="tx" x="10" y="116" fill="#A32D2D" font-weight="500">Bedroom 4 (NEW)</text>
+<text class="tx" x="430" y="116" text-anchor="middle" fill="#3B6D11">+100</text>
+
+<!-- Total row -->
+<rect x="0" y="268" width="180" height="28" fill="var(--bg-secondary)" stroke="var(--border)" stroke-width="1"/>
+<text class="ts" x="10" y="286" font-weight="500">TOTAL CARPET AREA</text>
+```
+
+```css
+.table-header { fill: var(--bg-secondary); }
+.table-row { fill: var(--bg-primary); stroke: var(--border); stroke-width: 0.5; }
+.table-row-alt { fill: var(--bg-tertiary); stroke: var(--border); stroke-width: 0.5; }
+.table-highlight { fill: rgba(163, 45, 45, 0.1); stroke: #A32D2D; stroke-width: 0.5; }
+```
+
+## Layout Notes
+
+- **ViewBox**: 800×780 (portrait for floor plan + table)
+- **Scale**: 10px = 1 foot (apartment ~50ft × 33ft)
+- **Floor plan origin**: Offset at (50, 60) for margins
+- **Wall thickness**: 6px outer, 3px inner (represents ~6" walls)
+- **Room labels**: Centered in each room with area below
+- **Table placement**: Below floor plan with full width
+
+## Color Coding
+
+| Element | Color | Usage |
+|---------|-------|-------|
+| Proposed walls | Red (#A32D2D) dotted | New construction |
+| New room fill | Red 15% opacity | Bedroom 4 area |
+| Circulation | Green (#3B6D11) | New access path |
+| Window glass | Blue (#378ADD) | Glass indication |
+| Bedrooms | Purple/Teal/Amber tints | Room differentiation |
+| Wet areas | Blue tint | Bathrooms |
+| Living | Coral tint | Common areas |
+
+## When to Use This Pattern
+
+Use this diagram style for:
+- Apartment/house floor plans
+- Office layout planning
+- Renovation proposals showing before/after
+- Space planning with area calculations
+- Real estate marketing materials
+- Interior design presentations
+- Building permit documentation
@@ -0,0 +1,276 @@
+# Automated Password Reset Flow
+
+A two-section flowchart tracing the full user journey for a web application password reset: the initial request phase (forgot password → email check → token generation) and the reset-form phase (link click → new password entry → token/password validation). Demonstrates multi-exit decision diamonds, a three-column branching layout, a loop-back path, and a cross-section separator arrow.
+
+## Key Patterns Used
+
+- **Three-column layout**: Left column (error/terminal branches at cx=115), center column (main happy path at cx=340), right column (expired-token branch at cx=552) — allows side branches to live at the same y-level as center nodes without overlap
+- **Decision diamonds with `<polygon>`**: Each decision uses a `<g class="decision">` wrapper containing a `<polygon>` and centered `<text>`; the diamond points are computed as `cx±hw, cy±hh` (hw=100, hh=28)
+- **Pill-shaped terminals**: Start and end nodes use `rx=22` on their `<rect>` to signal entry/exit points; all mid-flow process nodes use `rx=8`
+- **Three-branch decision paths**: Each diamond has a "Yes" branch (down, short `<line>`) and a "No" branch (`<path>` going horizontal then vertical to a side column)
+- **Loop-back path**: Mismatch error node loops back to the password-entry node via a routing corridor at x=215 — a 5-px gap between the left column (right edge x=210) and center column (left edge x=220); the path exits the bottom of the error node, drops below it, travels right to x=215, then goes up to the target node's center y, then right 5 px into the node's left edge
+- **Section separator**: A dashed horizontal `<line>` at y=452 splits the two phases; the connecting arrow crosses it with a faded label ("user receives email") to preserve flow continuity
+- **Italic annotation**: The exact UX copy for the generic message ("If that email exists…") is shown as a faded italic `ts` text block below the left-branch terminal node
+- **Legend row**: Five inline swatches (gray, purple, teal, red, amber diamond) at the bottom explain the color-to-role mapping
+
+## Diagram
+
+```xml
+<svg width="100%" viewBox="0 0 680 960" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="arrow" viewBox="0 0 10 10" refX="8" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+      <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+            stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+    </marker>
+  </defs>
+
+  <!--
+    Column layout (680px viewBox, safe area x=40–640):
+      Left  col : x=20,  w=190, cx=115  (error / terminal branches)
+      Center col: x=220, w=240, cx=340  (main happy path)
+      Right  col: x=465, w=175, cx=552  (expired-token branch)
+      Loop corridor at x=215 (5-px gap between left and center cols)
+  -->
+
+  <!-- ═══ SECTION 1 — Forgot password request ═══ -->
+  <text class="ts" x="40" y="38" opacity=".45">Section 1 — Forgot password request</text>
+
+  <!-- START terminal (pill rx=22 signals start/end) -->
+  <g class="c-gray">
+    <rect x="220" y="46" width="240" height="44" rx="22"/>
+    <text class="th" x="340" y="68" text-anchor="middle" dominant-baseline="central">User: &quot;Forgot password&quot;</text>
+  </g>
+
+  <line x1="340" y1="90" x2="340" y2="108" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- N2 · Enter email -->
+  <g class="c-gray">
+    <rect x="220" y="108" width="240" height="44" rx="8"/>
+    <text class="th" x="340" y="130" text-anchor="middle" dominant-baseline="central">Enter email address</text>
+  </g>
+
+  <line x1="340" y1="152" x2="340" y2="172" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- D1 · Email in system?  diamond: center=(340,200) hw=100 hh=28 -->
+  <g class="decision">
+    <polygon points="340,172 440,200 340,228 240,200"/>
+    <text class="th" x="340" y="200" text-anchor="middle" dominant-baseline="central">Email in system?</text>
+  </g>
+
+  <!-- D1 "No" → left column -->
+  <path d="M 240,200 L 115,200 L 115,248" class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="178" y="193" text-anchor="middle" opacity=".75">No</text>
+
+  <!-- D1 "Yes" → continue down -->
+  <line x1="340" y1="228" x2="340" y2="248" class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="348" y="242" text-anchor="start" opacity=".75">Yes</text>
+
+  <!-- ── Left branch (D1 = No): generic security message → end ── -->
+
+  <!-- L1 · Generic message (security: never confirm email existence) -->
+  <g class="c-gray">
+    <rect x="20" y="248" width="190" height="56" rx="8"/>
+    <text class="th" x="115" y="269" text-anchor="middle" dominant-baseline="central">Generic message shown</text>
+    <text class="ts" x="115" y="287" text-anchor="middle" dominant-baseline="central">Email sent if found</text>
+  </g>
+
+  <line x1="115" y1="304" x2="115" y2="324" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- L2 · End terminal (left) -->
+  <g class="c-gray">
+    <rect x="20" y="324" width="190" height="44" rx="22"/>
+    <text class="th" x="115" y="346" text-anchor="middle" dominant-baseline="central">Request handled</text>
+  </g>
+
+  <!-- Italic annotation: actual UX copy shown below the end node -->
+  <text class="ts" x="20" y="384" opacity=".45" font-style="italic">&quot;If that email exists, a reset</text>
+  <text class="ts" x="20" y="398" opacity=".45" font-style="italic">link has been sent.&quot;</text>
+
+  <!-- ── Center Yes branch: system generates & sends token ── -->
+
+  <!-- N3 · Generate unique token -->
+  <g class="c-purple">
+    <rect x="220" y="248" width="240" height="56" rx="8"/>
+    <text class="th" x="340" y="269" text-anchor="middle" dominant-baseline="central">Generate unique token</text>
+    <text class="ts" x="340" y="287" text-anchor="middle" dominant-baseline="central">Time-limited, cryptographic</text>
+  </g>
+
+  <line x1="340" y1="304" x2="340" y2="324" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- N4 · Store token + user ID -->
+  <g class="c-purple">
+    <rect x="220" y="324" width="240" height="44" rx="8"/>
+    <text class="th" x="340" y="346" text-anchor="middle" dominant-baseline="central">Store token + user ID</text>
+  </g>
+
+  <line x1="340" y1="368" x2="340" y2="388" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- N5 · Send reset email -->
+  <g class="c-teal">
+    <rect x="220" y="388" width="240" height="44" rx="8"/>
+    <text class="th" x="340" y="410" text-anchor="middle" dominant-baseline="central">Send reset link via email</text>
+  </g>
+
+  <!-- ═══ Section separator ═══ -->
+  <line x1="40" y1="452" x2="640" y2="452"
+        stroke="var(--border)" stroke-width="1" stroke-dasharray="8 5"/>
+
+  <!-- Arrow crossing separator (with inline label) -->
+  <line x1="340" y1="432" x2="340" y2="472" class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="348" y="448" text-anchor="start" opacity=".55">user receives email</text>
+
+  <text class="ts" x="40" y="464" opacity=".45">Section 2 — Password reset form</text>
+
+  <!-- ═══ SECTION 2 — Password reset form ═══ -->
+
+  <!-- N6 · User clicks reset link -->
+  <g class="c-gray">
+    <rect x="220" y="480" width="240" height="44" rx="8"/>
+    <text class="th" x="340" y="502" text-anchor="middle" dominant-baseline="central">User clicks reset link</text>
+  </g>
+
+  <line x1="340" y1="524" x2="340" y2="544" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- N7 · Enter new password ×2 -->
+  <g class="c-gray">
+    <rect x="220" y="544" width="240" height="56" rx="8"/>
+    <text class="th" x="340" y="565" text-anchor="middle" dominant-baseline="central">Enter new password ×2</text>
+    <text class="ts" x="340" y="583" text-anchor="middle" dominant-baseline="central">Confirm both passwords match</text>
+  </g>
+
+  <line x1="340" y1="600" x2="340" y2="620" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- D2 · Token expired?  diamond: center=(340,648) hw=100 hh=28 -->
+  <g class="decision">
+    <polygon points="340,620 440,648 340,676 240,648"/>
+    <text class="th" x="340" y="648" text-anchor="middle" dominant-baseline="central">Token expired?</text>
+  </g>
+
+  <!-- D2 "Yes" → right column (expired-token branch) -->
+  <path d="M 440,648 L 552,648 L 552,692" class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="496" y="641" text-anchor="middle" opacity=".75">Yes</text>
+
+  <!-- D2 "No" → down to password-match check -->
+  <line x1="340" y1="676" x2="340" y2="714" class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="348" y="698" text-anchor="start" opacity=".75">No</text>
+
+  <!-- ── Right branch (D2 = Yes): token expired → dead end ── -->
+
+  <!-- R1 · Token expired error -->
+  <g class="c-red">
+    <rect x="465" y="692" width="175" height="56" rx="8"/>
+    <text class="th" x="552" y="713" text-anchor="middle" dominant-baseline="central">Token expired</text>
+    <text class="ts" x="552" y="731" text-anchor="middle" dominant-baseline="central">Show expiry error</text>
+  </g>
+
+  <line x1="552" y1="748" x2="552" y2="768" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- R2 · End terminal (right) -->
+  <g class="c-gray">
+    <rect x="465" y="768" width="175" height="44" rx="22"/>
+    <text class="th" x="552" y="790" text-anchor="middle" dominant-baseline="central">End — request again</text>
+  </g>
+
+  <!-- D3 · Passwords match?  diamond: center=(340,742) hw=100 hh=28 -->
+  <g class="decision">
+    <polygon points="340,714 440,742 340,770 240,742"/>
+    <text class="th" x="340" y="742" text-anchor="middle" dominant-baseline="central">Passwords match?</text>
+  </g>
+
+  <!-- D3 "No" → left column (mismatch branch) -->
+  <path d="M 240,742 L 115,742 L 115,786" class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="178" y="735" text-anchor="middle" opacity=".75">No</text>
+
+  <!-- D3 "Yes" → down to reset -->
+  <line x1="340" y1="770" x2="340" y2="790" class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="348" y="783" text-anchor="start" opacity=".75">Yes</text>
+
+  <!-- ── Left branch (D3 = No): passwords don't match → loop back ── -->
+
+  <!-- L3 · Password mismatch error -->
+  <g class="c-red">
+    <rect x="20" y="786" width="190" height="56" rx="8"/>
+    <text class="th" x="115" y="807" text-anchor="middle" dominant-baseline="central">Password mismatch</text>
+    <text class="ts" x="115" y="825" text-anchor="middle" dominant-baseline="central">Passwords do not match</text>
+  </g>
+
+  <!-- Loop-back arrow: exits L3 bottom → drops to y=862 →
+       travels right to corridor x=215 → climbs to N7 center y=572 →
+       enters N7 left edge at (220, 572) pointing right -->
+  <path d="M 115,842 L 115,862 L 215,862 L 215,572 L 220,572"
+        class="arr" marker-end="url(#arrow)"/>
+  <text class="ts" x="224" y="538" text-anchor="start" opacity=".6">retry</text>
+
+  <!-- ── Center Yes branch (D3 = Yes): reset password & invalidate token ── -->
+
+  <!-- N8 · Reset password -->
+  <g class="c-teal">
+    <rect x="220" y="790" width="240" height="56" rx="8"/>
+    <text class="th" x="340" y="811" text-anchor="middle" dominant-baseline="central">Reset password</text>
+    <text class="ts" x="340" y="829" text-anchor="middle" dominant-baseline="central">Invalidate used token</text>
+  </g>
+
+  <line x1="340" y1="846" x2="340" y2="866" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- N9 · Success terminal -->
+  <g class="c-green">
+    <rect x="220" y="866" width="240" height="44" rx="22"/>
+    <text class="th" x="340" y="888" text-anchor="middle" dominant-baseline="central">Password reset complete</text>
+  </g>
+
+  <!-- ═══ Legend ═══ -->
+  <text class="ts" x="40" y="930" opacity=".4">Legend —</text>
+  <rect x="108" y="920" width="13" height="13" rx="2" fill="#F1EFE8" stroke="#5F5E5A" stroke-width="0.5"/>
+  <text class="ts" x="126" y="930" opacity=".7">User action</text>
+  <rect x="210" y="920" width="13" height="13" rx="2" fill="#EEEDFE" stroke="#534AB7" stroke-width="0.5"/>
+  <text class="ts" x="228" y="930" opacity=".7">System process</text>
+  <rect x="334" y="920" width="13" height="13" rx="2" fill="#E1F5EE" stroke="#0F6E56" stroke-width="0.5"/>
+  <text class="ts" x="352" y="930" opacity=".7">Email / success</text>
+  <rect x="455" y="920" width="13" height="13" rx="2" fill="#FCEBEB" stroke="#A32D2D" stroke-width="0.5"/>
+  <text class="ts" x="473" y="930" opacity=".7">Error state</text>
+  <polygon points="556,926 566,932 556,938 546,932" fill="#FAEEDA" stroke="#854F0B" stroke-width="0.5"/>
+  <text class="ts" x="572" y="932" opacity=".7">Decision</text>
+
+</svg>
+```
+
+## Custom CSS
+
+Add these classes to the hosting page `<style>` block (in addition to the standard skill CSS):
+
+```css
+/* Decision diamond — amber fill, same palette as c-amber */
+.decision > polygon { fill: #FAEEDA; stroke: #854F0B; stroke-width: 0.5; }
+.decision > .th     { fill: #633806; }
+
+@media (prefers-color-scheme: dark) {
+  .decision > polygon { fill: #633806; stroke: #EF9F27; }
+  .decision > .th     { fill: #FAC775; }
+}
+```
+
+## Color Assignments
+
+| Element | Color | Reason |
+|---------|-------|--------|
+| Start / end terminals | `c-gray` | Neutral entry and exit points |
+| User actions (enter email, click link, enter password) | `c-gray` | User-facing steps with no system processing |
+| Generic message + request-handled terminal | `c-gray` | Intentionally neutral — the security message must not reveal data |
+| Generate & store token | `c-purple` | Backend system operations |
+| Send reset email | `c-teal` | Positive external action (outbound communication) |
+| Token expired error | `c-red` | Failure / blocking error state |
+| Password mismatch error | `c-red` | Validation failure |
+| Reset password + success | `c-teal` / `c-green` | Positive outcome: teal for the action, green pill for the terminal |
+| Decision diamonds | `c-amber` (custom `.decision`) | Warning / branch point — matches amber semantic meaning |
+
+## Layout Notes
+
+- **ViewBox**: 680×960 — tall flowchart with two phases
+- **Three-column structure**: Left (cx=115), center (cx=340), right (cx=552) — each branch stays within its column; only `<path>` arrows cross column boundaries
+- **Diamond formula**: `<polygon points="cx,cy-hh cx+hw,cy cx,cy+hh cx-hw,cy"/>` with hw=100, hh=28 gives a 200×56px diamond that sits flush with the center column (x=220–460)
+- **Branch routing pattern**: "No" paths use `<path d="M left_point,cy L side_cx,cy L side_cx,node_top">` — one horizontal segment + one vertical segment, no curves needed
+- **Loop corridor**: The 5-px gap at x=210–220 between left and center columns provides a clean vertical channel for the loop-back path without any node overlap; the path exits node bottom, drops 20px, goes right to x=215, climbs to target y, enters from left
+- **Section separator**: A dashed `<line>` at y=452 with `stroke-dasharray="8 5"` provides a visual phase break; the single connecting arrow crosses it at center, with a faded label on the arrow
+- **Pill terminals**: `rx=22` (half the 44px node height) produces a perfect capsule/pill shape — use this consistently for all start/end terminals
+- **Error annotation**: The exact UX copy is rendered as faded (`opacity=".45"`) italic `ts` text below the relevant node, keeping it informative without cluttering the flow
@@ -0,0 +1,240 @@
+# Autonomous LLM Research Agent Flow
+
+A multi-section flowchart showing Karpathy's autoresearch framework: human-agent handoff, the autonomous experiment loop with keep/discard decision branching, and the modifiable training pipeline. Demonstrates loop-back arrows, convergent decision paths, and semantic color coding for outcomes.
+
+## Key Patterns Used
+
+- **Three-section layout**: Setup row, main loop container, and detail container — each visually distinct
+- **Neutral dashed containers**: Loop and training pipeline use `var(--bg-secondary)` fill with dashed borders to recede behind colored content nodes
+- **Decision branching with convergence**: "val_bpb improved?" splits into Keep (green) and Discard (red), then both converge back to "Log to results.tsv"
+- **Loop-back arrow**: Dashed path with rounded corners on the right side of the container showing infinite repetition
+- **Semantic color for outcomes**: Green = improvement (keep), Red = no improvement (discard) — not arbitrary decoration
+- **Highlighted key step**: "Run training" uses `c-coral` to visually distinguish the most important step from other `c-teal` actions
+- **Horizontal pipeline flow**: Training details section uses left-to-right arrow-connected nodes (GPT → MuonAdamW → Evaluation)
+- **Footer metadata**: Fixed constraints shown as subtle centered text below the pipeline nodes
+- **Legend row**: Color key at the bottom explaining what each color means
+
+## Diagram
+
+```xml
+<svg width="100%" viewBox="0 0 680 920" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="arrow" viewBox="0 0 10 10" refX="8" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+      <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+            stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+    </marker>
+  </defs>
+
+  <!-- ========================================== -->
+  <!-- SECTION 1: SETUP (Human → program.md → AI) -->
+  <!-- ========================================== -->
+
+  <text class="ts" x="40" y="30" text-anchor="start" opacity=".5">One-time setup</text>
+
+  <!-- Human -->
+  <g class="node c-gray">
+    <rect x="60" y="42" width="140" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="130" y="62" text-anchor="middle" dominant-baseline="central">Human</text>
+    <text class="ts" x="130" y="82" text-anchor="middle" dominant-baseline="central">Researcher</text>
+  </g>
+
+  <!-- Arrow: Human → program.md -->
+  <line x1="200" y1="70" x2="250" y2="70" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- program.md -->
+  <g class="node c-gray">
+    <rect x="250" y="42" width="180" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="340" y="62" text-anchor="middle" dominant-baseline="central">program.md</text>
+    <text class="ts" x="340" y="82" text-anchor="middle" dominant-baseline="central">Agent instructions</text>
+  </g>
+
+  <!-- Arrow: program.md → AI Agent -->
+  <line x1="430" y1="70" x2="470" y2="70" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- AI Agent -->
+  <g class="node c-purple">
+    <rect x="470" y="42" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="550" y="62" text-anchor="middle" dominant-baseline="central">AI agent</text>
+    <text class="ts" x="550" y="82" text-anchor="middle" dominant-baseline="central">Claude / Codex</text>
+  </g>
+
+  <!-- Arrow: Setup row → Loop (from program.md center down) -->
+  <line x1="340" y1="98" x2="340" y2="142" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- ========================================== -->
+  <!-- SECTION 2: AUTONOMOUS EXPERIMENT LOOP      -->
+  <!-- ========================================== -->
+
+  <!-- Loop container (neutral dashed) -->
+  <g>
+    <rect x="40" y="142" width="600" height="528" rx="16"
+          stroke-width="1" stroke-dasharray="6 4"
+          fill="var(--bg-secondary)" stroke="var(--border)"/>
+    <text class="th" x="66" y="170">Autonomous experiment loop</text>
+    <text class="ts" x="66" y="188">~12 experiments/hour — runs until manually stopped</text>
+  </g>
+
+  <!-- Step 1: Read code + past results -->
+  <g class="node c-teal">
+    <rect x="170" y="208" width="280" height="44" rx="8" stroke-width="0.5"/>
+    <text class="th" x="310" y="230" text-anchor="middle" dominant-baseline="central">Read code + past results</text>
+  </g>
+
+  <!-- Arrow: S1 → S2 -->
+  <line x1="310" y1="252" x2="310" y2="274" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Step 2: Propose + edit train.py -->
+  <g class="node c-teal">
+    <rect x="170" y="274" width="280" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="310" y="294" text-anchor="middle" dominant-baseline="central">Propose + edit train.py</text>
+    <text class="ts" x="310" y="314" text-anchor="middle" dominant-baseline="central">Arch, optimizer, hyperparameters</text>
+  </g>
+
+  <!-- Arrow: S2 → S3 -->
+  <line x1="310" y1="330" x2="310" y2="352" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Step 3: Run training (highlighted — key step) -->
+  <g class="node c-coral">
+    <rect x="170" y="352" width="280" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="310" y="372" text-anchor="middle" dominant-baseline="central">Run training</text>
+    <text class="ts" x="310" y="392" text-anchor="middle" dominant-baseline="central">uv run train.py (5 min budget)</text>
+  </g>
+
+  <!-- Arrow: S3 → S4 -->
+  <line x1="310" y1="408" x2="310" y2="430" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Step 4: Decision — val_bpb improved? -->
+  <g class="node c-gray">
+    <rect x="170" y="430" width="280" height="44" rx="8" stroke-width="0.5"/>
+    <text class="th" x="310" y="452" text-anchor="middle" dominant-baseline="central">val_bpb improved?</text>
+  </g>
+
+  <!-- Decision arrows to Keep / Discard -->
+  <line x1="240" y1="474" x2="175" y2="508" class="arr" marker-end="url(#arrow)"/>
+  <line x1="380" y1="474" x2="445" y2="508" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Decision labels -->
+  <text class="ts" x="195" y="496" opacity=".6">yes</text>
+  <text class="ts" x="416" y="496" opacity=".6">no</text>
+
+  <!-- Keep — advance branch -->
+  <g class="node c-green">
+    <rect x="70" y="508" width="210" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="175" y="528" text-anchor="middle" dominant-baseline="central">Keep</text>
+    <text class="ts" x="175" y="548" text-anchor="middle" dominant-baseline="central">Advance git branch</text>
+  </g>
+
+  <!-- Discard — git reset -->
+  <g class="node c-red">
+    <rect x="340" y="508" width="210" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="445" y="528" text-anchor="middle" dominant-baseline="central">Discard</text>
+    <text class="ts" x="445" y="548" text-anchor="middle" dominant-baseline="central">Git reset to previous</text>
+  </g>
+
+  <!-- Converge arrows: Keep → Log, Discard → Log -->
+  <line x1="175" y1="564" x2="250" y2="590" class="arr" marker-end="url(#arrow)"/>
+  <line x1="445" y1="564" x2="370" y2="590" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Step 6: Log to results.tsv -->
+  <g class="node c-teal">
+    <rect x="170" y="590" width="280" height="44" rx="8" stroke-width="0.5"/>
+    <text class="th" x="310" y="612" text-anchor="middle" dominant-baseline="central">Log to results.tsv</text>
+  </g>
+
+  <!-- Loop-back arrow (dashed, right side) -->
+  <path d="M 450 612 L 564 612 Q 576 612 576 600 L 576 242 Q 576 230 564 230 L 450 230"
+        fill="none" class="arr" stroke-dasharray="4 3" marker-end="url(#arrow)"/>
+
+  <!-- ========================================== -->
+  <!-- SECTION 3: TRAINING PIPELINE DETAILS       -->
+  <!-- ========================================== -->
+
+  <!-- Connection arrow: Loop → Training details -->
+  <line x1="310" y1="670" x2="310" y2="710" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Training container (neutral dashed) -->
+  <g>
+    <rect x="40" y="710" width="600" height="170" rx="16"
+          stroke-width="1" stroke-dasharray="6 4"
+          fill="var(--bg-secondary)" stroke="var(--border)"/>
+    <text class="th" x="66" y="738">train.py — modifiable training pipeline</text>
+    <text class="ts" x="66" y="756">Runs during each training step — single GPU, single file</text>
+  </g>
+
+  <!-- GPT model -->
+  <g class="node c-coral">
+    <rect x="70" y="774" width="155" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="147" y="794" text-anchor="middle" dominant-baseline="central">GPT model</text>
+    <text class="ts" x="147" y="814" text-anchor="middle" dominant-baseline="central">RoPE, FlashAttn3</text>
+  </g>
+
+  <!-- Arrow: GPT → MuonAdamW -->
+  <line x1="225" y1="802" x2="260" y2="802" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- MuonAdamW optimizer -->
+  <g class="node c-coral">
+    <rect x="260" y="774" width="155" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="337" y="794" text-anchor="middle" dominant-baseline="central">MuonAdamW</text>
+    <text class="ts" x="337" y="814" text-anchor="middle" dominant-baseline="central">Hybrid optimizer</text>
+  </g>
+
+  <!-- Arrow: MuonAdamW → Evaluation -->
+  <line x1="415" y1="802" x2="450" y2="802" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Evaluation -->
+  <g class="node c-amber">
+    <rect x="450" y="774" width="155" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="527" y="794" text-anchor="middle" dominant-baseline="central">Evaluation</text>
+    <text class="ts" x="527" y="814" text-anchor="middle" dominant-baseline="central">val_bpb metric</text>
+  </g>
+
+  <!-- Footer: fixed constraints -->
+  <text class="ts" x="340" y="856" text-anchor="middle" opacity=".5">climbmix-400b data · 8K BPE vocab · 300s budget · 2048 context</text>
+
+  <!-- ========================================== -->
+  <!-- LEGEND                                     -->
+  <!-- ========================================== -->
+
+  <g class="c-teal"><rect x="40" y="890" width="14" height="14" rx="3" stroke-width="0.5"/></g>
+  <text class="ts" x="62" y="902">Agent actions</text>
+
+  <g class="c-coral"><rect x="170" y="890" width="14" height="14" rx="3" stroke-width="0.5"/></g>
+  <text class="ts" x="192" y="902">Training run</text>
+
+  <g class="c-green"><rect x="300" y="890" width="14" height="14" rx="3" stroke-width="0.5"/></g>
+  <text class="ts" x="322" y="902">Improvement</text>
+
+  <g class="c-red"><rect x="430" y="890" width="14" height="14" rx="3" stroke-width="0.5"/></g>
+  <text class="ts" x="452" y="902">No improvement</text>
+
+</svg>
+```
+
+## Color Assignments
+
+| Element | Color | Reason |
+|---------|-------|--------|
+| Human, program.md | `c-gray` | Neutral setup / input nodes |
+| AI agent | `c-purple` | The active intelligent actor |
+| Loop action steps | `c-teal` | Agent's analytical/editing actions |
+| Run training | `c-coral` | Highlighted key step — the 5-min training run |
+| Decision check | `c-gray` | Neutral evaluation checkpoint |
+| Keep (improved) | `c-green` | Semantic success — val_bpb decreased |
+| Discard (not improved) | `c-red` | Semantic failure — no improvement |
+| Training pipeline nodes | `c-coral` | Training infrastructure components |
+| Evaluation node | `c-amber` | Distinct from training — measurement/metric role |
+| Containers | Neutral (dashed) | Subtle grouping that recedes behind content |
+
+## Layout Notes
+
+- **ViewBox**: 680×920 (standard width, tall for 3 sections)
+- **Three sections**: Setup row (y=30–98), loop container (y=142–670), training details (y=710–880)
+- **Container style**: Dashed border (`stroke-dasharray="6 4"`), neutral fill (`var(--bg-secondary)`), `stroke-width="1"` — not colored, so inner nodes pop
+- **Loop-back arrow**: Dashed `<path>` with quadratic curves (`Q`) at corners for smooth rounded turns, running up the right side of the loop container from "Log" back to "Read code"
+- **Decision pattern**: Single question node ("val_bpb improved?") with diagonal arrows to Keep/Discard, then convergent diagonal arrows back to "Log to results.tsv"
+- **Decision labels**: "yes"/"no" labels placed along the diagonal arrows with `opacity=".6"` to stay subtle
+- **Key step highlight**: "Run training" uses `c-coral` while surrounding steps use `c-teal`, drawing the eye to the most important step
+- **Horizontal sub-flow**: Training pipeline uses left-to-right arrow-connected nodes (GPT model → MuonAdamW → Evaluation)
+- **Footer metadata**: Fixed constraints (data, vocab, budget, context) shown as a single centered `ts` text line with `opacity=".5"`
+- **Legend**: Four color swatches at the bottom explaining the semantic meaning of each color used
@@ -0,0 +1,161 @@
+# Journey of a Banana: From Tree to Smoothie
+
+A narrative journey diagram following a single banana across 3,000 miles and 3 weeks, from harvest in Costa Rica to a smoothie in the consumer's kitchen. Demonstrates storytelling through visualization, winding path layout, and progressive state changes.
+
+## Key Patterns Used
+
+- **Winding journey path**: S-curve connecting all stages visually
+- **Location markers**: Country flags and place names for geographic context
+- **Progressive state changes**: Banana color changes (green → yellow → brown → frozen → smoothie)
+- **Narrative details**: Fun elements like spider check, stickers, price tags
+- **Timeline**: Bottom timeline showing duration of journey
+- **Environmental context**: Ocean waves, gas clouds, store awning
+
+## New Shape Techniques
+
+### Banana (curved fruit shape)
+```xml
+<!-- Green banana -->
+<path class="banana-green" d="M 5 0 Q 0 10 3 20 Q 6 25 10 20 Q 13 10 8 0 Z"/>
+
+<!-- Yellow banana -->
+<path class="banana-yellow" d="M 0 5 Q -6 18 0 32 Q 7 40 15 30 Q 20 15 12 5 Z"/>
+
+<!-- Brown overripe banana with spots -->
+<path class="banana-brown" d="M 0 5 Q -5 15 0 28 Q 6 35 14 26 Q 18 14 12 5 Z"/>
+<circle class="banana-spots" cx="5" cy="15" r="1.5"/>
+<circle class="banana-spots" cx="9" cy="20" r="1"/>
+```
+
+### Banana Tree
+```xml
+<!-- Trunk -->
+<rect class="tree-trunk" x="55" y="50" width="15" height="60" rx="3"/>
+<!-- Leaves (rotated ellipses) -->
+<ellipse class="tree-leaf" cx="62" cy="45" rx="40" ry="15" transform="rotate(-20, 62, 45)"/>
+<ellipse class="tree-leaf" cx="62" cy="50" rx="35" ry="12" transform="rotate(25, 62, 50)"/>
+<!-- Banana bunch hanging -->
+<g transform="translate(40, 55)">
+  <path class="banana-green" d="M 5 0 Q 0 10 3 20 Q 6 25 10 20 Q 13 10 8 0 Z"/>
+  <path class="banana-green" d="M 12 2 Q 8 12 11 22 Q 14 27 18 22 Q 21 12 16 2 Z"/>
+  <rect class="stem" x="8" y="-5" width="12" height="8" rx="2"/>
+</g>
+```
+
+### Cargo Ship
+```xml
+<!-- Ocean waves -->
+<path class="ocean" d="M 0 90 Q 30 85 60 90 Q 90 95 120 90 Q 150 85 180 90 L 180 110 L 0 110 Z" opacity="0.5"/>
+<!-- Hull -->
+<path class="ship-hull" d="M 20 90 L 30 60 L 160 60 L 170 90 Q 150 95 95 95 Q 40 95 20 90 Z"/>
+<!-- Deck -->
+<rect class="ship-deck" x="40" y="45" width="110" height="18" rx="2"/>
+<!-- Reefer containers -->
+<rect class="container" x="45" y="25" width="30" height="22" rx="2"/>
+<!-- Refrigeration symbol -->
+<text x="60" y="40" text-anchor="middle" fill="#185FA5" style="font-size:10px">❄</text>
+<!-- Smoke stack -->
+<rect x="145" y="35" width="8" height="15" fill="#444441"/>
+```
+
+### Inspector Figure
+```xml
+<!-- Body -->
+<rect class="inspector" x="10" y="20" width="25" height="35" rx="3"/>
+<!-- Head -->
+<circle class="inspector" cx="22" cy="12" r="10"/>
+<!-- Hat -->
+<rect x="12" y="2" width="20" height="6" rx="2" fill="#534AB7"/>
+<!-- Clipboard -->
+<rect class="clipboard" x="38" y="28" width="15" height="20" rx="2"/>
+<line x1="42" y1="34" x2="50" y2="34" stroke="#888780" stroke-width="1"/>
+```
+
+### Spider with "No" Symbol
+```xml
+<circle cx="15" cy="15" r="18" fill="none" stroke="#A32D2D" stroke-width="2"/>
+<line x1="3" y1="3" x2="27" y2="27" stroke="#A32D2D" stroke-width="2"/>
+<!-- Spider body -->
+<ellipse class="spider" cx="15" cy="15" rx="4" ry="5"/>
+<ellipse class="spider" cx="15" cy="10" rx="3" ry="3"/>
+<!-- Legs -->
+<line x1="12" y1="14" x2="5" y2="10" stroke="#2C2C2A" stroke-width="1"/>
+<line x1="18" y1="14" x2="25" y2="10" stroke="#2C2C2A" stroke-width="1"/>
+```
+
+### Blender with Smoothie
+```xml
+<!-- Blender jar -->
+<path class="blender" d="M 5 5 L 0 45 L 35 45 L 30 5 Z"/>
+<!-- Smoothie inside (wavy top) -->
+<path class="smoothie" d="M 3 20 L 0 45 L 35 45 L 32 20 Q 25 18 17 22 Q 10 18 3 20 Z"/>
+<!-- Blender base -->
+<rect class="blender" x="-2" y="45" width="40" height="12" rx="3"/>
+<!-- Lid -->
+<rect x="8" y="0" width="20" height="8" rx="2" fill="#AFA9EC" stroke="#534AB7"/>
+<!-- Banana chunks floating -->
+<ellipse cx="12" cy="32" rx="4" ry="2" fill="#FAC775"/>
+```
+
+### Winding Journey Path
+```xml
+<path class="journey-path" d="
+  M 80 100 
+  L 200 100 
+  Q 280 100 280 150 
+  L 280 180
+  Q 280 220 320 220
+  L 520 220
+  Q 560 220 560 260
+  L 560 320
+  Q 560 360 520 360
+  L 280 360
+  ...
+"/>
+```
+
+## CSS Classes
+
+```css
+/* Journey */
+.journey-path { stroke: #D3D1C7; stroke-width: 3; fill: none; stroke-linecap: round; }
+
+/* Banana ripeness stages */
+.banana-green { fill: #97C459; stroke: #3B6D11; stroke-width: 0.5; }
+.banana-yellow { fill: #FAC775; stroke: #BA7517; stroke-width: 0.5; }
+.banana-brown { fill: #854F0B; stroke: #633806; stroke-width: 0.5; }
+.banana-spots { fill: #633806; }
+
+/* Environment elements */
+.tree-trunk { fill: #854F0B; stroke: #633806; stroke-width: 1; }
+.tree-leaf { fill: #97C459; stroke: #3B6D11; stroke-width: 0.5; }
+.ocean { fill: #85B7EB; }
+.ship-hull { fill: #5F5E5A; stroke: #444441; stroke-width: 1; }
+.container { fill: #E6F1FB; stroke: #185FA5; stroke-width: 1; }
+.gas-cloud { fill: #C0DD97; stroke: #97C459; stroke-width: 0.5; opacity: 0.6; }
+
+/* Buildings */
+.packhouse { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1; }
+.warehouse { fill: #FAEEDA; stroke: #854F0B; stroke-width: 1; }
+.store { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 1; }
+
+/* Kitchen */
+.counter { fill: #FAECE7; stroke: #993C1D; stroke-width: 1; }
+.blender { fill: #EEEDFE; stroke: #534AB7; stroke-width: 1; }
+.smoothie { fill: #FAC775; }
+.freezer { fill: #E6F1FB; stroke: #185FA5; stroke-width: 1; }
+
+/* Details */
+.sticker { fill: #378ADD; stroke: #185FA5; stroke-width: 0.3; }
+.spider { fill: #2C2C2A; stroke: #1a1a18; stroke-width: 0.3; }
+```
+
+## Layout Notes
+
+- **ViewBox**: 850×680 (tall for winding path)
+- **Path style**: S-curve winding path connects all 7 stages
+- **Location labels**: Country flags + place names anchor geographic context
+- **State progression**: Same object (banana) shown in different states throughout
+- **Timeline**: Horizontal timeline at bottom shows journey duration
+- **Narrative elements**: Fun details (spider, stickers, price tags) add storytelling value
+- **Environmental context**: Ocean waves, gas clouds, awnings create sense of place
@@ -0,0 +1,209 @@
+# Commercial Aircraft Structure
+
+A physical/structural diagram showing an aircraft side profile using appropriate SVG shapes beyond rectangles - paths, polygons, ellipses for realistic representation.
+
+## Key Patterns Used
+
+- **Path elements**: Curved fuselage body with nose cone using quadratic bezier curves
+- **Polygon elements**: Tapered wing shape, triangular stabilizers, control surfaces
+- **Ellipse elements**: Engines (cylinders), wheels (circles)
+- **Line elements**: Landing gear struts, leader lines for labels
+- **Dashed strokes**: Interior sections (fuel tank), movable control surfaces (rudder, elevator)
+- **Layered composition**: Cabin sections drawn inside the fuselage shape
+- **Leader lines with labels**: Connect labels to components they describe
+
+## Diagram
+
+```xml
+<svg width="100%" viewBox="0 0 680 400" xmlns="http://www.w3.org/2000/svg">
+
+  <!-- FUSELAGE - main body cylinder with nose cone -->
+  <path class="fuselage" d="
+    M 80 180
+    Q 40 180 40 200
+    Q 40 220 80 220
+    L 560 220
+    Q 580 220 580 200
+    Q 580 180 560 180
+    Z
+  "/>
+  
+  <!-- Nose cone -->
+  <path class="fuselage" d="
+    M 80 180
+    Q 50 180 35 200
+    Q 50 220 80 220
+  " fill="none" stroke-width="1"/>
+
+  <!-- COCKPIT windows -->
+  <path class="cockpit" d="
+    M 45 190
+    L 75 185
+    L 75 200
+    L 50 200
+    Z
+  "/>
+  <line x1="55" y1="188" x2="55" y2="200" stroke="#534AB7" stroke-width="0.5"/>
+  <line x1="65" y1="186" x2="65" y2="200" stroke="#534AB7" stroke-width="0.5"/>
+
+  <!-- CABIN SECTIONS (inside fuselage) -->
+  <!-- First class -->
+  <rect class="first-class" x="85" y="183" width="50" height="34" rx="2"/>
+  <text class="tl" x="110" y="203" text-anchor="middle">First</text>
+  
+  <!-- Business class -->
+  <rect class="business-class" x="140" y="183" width="80" height="34" rx="2"/>
+  <text class="tl" x="180" y="203" text-anchor="middle">Business</text>
+  
+  <!-- Economy class -->
+  <rect class="economy-class" x="225" y="183" width="200" height="34" rx="2"/>
+  <text class="tl" x="325" y="203" text-anchor="middle">Economy</text>
+
+  <!-- CARGO HOLD (lower section indication) -->
+  <line x1="85" y1="217" x2="520" y2="217" class="leader"/>
+  <text class="tl" x="300" y="228" text-anchor="middle" opacity=".6">Cargo hold below deck</text>
+
+  <!-- WING - main wing shape -->
+  <polygon class="wing" points="
+    200,220
+    120,300
+    130,305
+    160,305
+    340,235
+    340,220
+  "/>
+  
+  <!-- Wing fuel tank (dashed interior) -->
+  <polygon class="fuel-tank" points="
+    210,225
+    150,280
+    160,283
+    180,283
+    310,232
+    310,225
+  "/>
+  <text class="tl" x="220" y="260" opacity=".7">Fuel</text>
+
+  <!-- Flaps (trailing edge) -->
+  <polygon class="flap" points="
+    130,300
+    120,305
+    160,310
+    165,305
+  "/>
+  <text class="tl" x="143" y="320">Flaps</text>
+
+  <!-- ENGINE under wing -->
+  <ellipse class="engine" cx="175" cy="285" rx="25" ry="12"/>
+  <ellipse cx="155" cy="285" rx="8" ry="10" fill="none" stroke="#993C1D" stroke-width="0.5"/>
+  <!-- Engine pylon -->
+  <line x1="175" y1="273" x2="190" y2="245" stroke="#5F5E5A" stroke-width="2"/>
+  <text class="tl" x="175" y="308" text-anchor="middle">Engine</text>
+
+  <!-- TAIL SECTION -->
+  <!-- Vertical stabilizer -->
+  <polygon class="tail-v" points="
+    520,180
+    560,100
+    580,100
+    580,180
+  "/>
+  <text class="tl" x="565" y="150" text-anchor="middle">Vertical</text>
+  <text class="tl" x="565" y="162" text-anchor="middle">stabilizer</text>
+  
+  <!-- Rudder -->
+  <polygon points="575,105 590,105 590,178 580,178" fill="none" stroke="#185FA5" stroke-width="0.5" stroke-dasharray="3 2"/>
+  <text class="tl" x="595" y="145" opacity=".6">Rudder</text>
+
+  <!-- Horizontal stabilizer -->
+  <polygon class="tail-h" points="
+    500,195
+    460,175
+    465,170
+    580,170
+    580,180
+    520,195
+  "/>
+  <text class="tl" x="510" y="166">Horizontal stabilizer</text>
+  
+  <!-- Elevator -->
+  <polygon points="462,174 450,168 455,163 467,169" fill="none" stroke="#185FA5" stroke-width="0.5" stroke-dasharray="3 2"/>
+  <text class="tl" x="440" y="158" opacity=".6">Elevator</text>
+
+  <!-- LANDING GEAR -->
+  <!-- Nose gear -->
+  <line class="gear" x1="100" y1="220" x2="100" y2="260" stroke-width="3"/>
+  <ellipse class="wheel" cx="100" cy="268" rx="8" ry="10"/>
+  <text class="tl" x="100" y="290" text-anchor="middle">Nose gear</text>
+
+  <!-- Main gear (under wing/fuselage junction) -->
+  <line class="gear" x1="280" y1="220" x2="280" y2="270" stroke-width="4"/>
+  <line class="gear" x1="268" y1="265" x2="292" y2="265" stroke-width="3"/>
+  <ellipse class="wheel" cx="268" cy="278" rx="10" ry="12"/>
+  <ellipse class="wheel" cx="292" cy="278" rx="10" ry="12"/>
+  <text class="tl" x="280" y="302" text-anchor="middle">Main gear</text>
+
+  <!-- LABELS with leader lines -->
+  <!-- Cockpit label -->
+  <line class="leader" x1="60" y1="175" x2="60" y2="140"/>
+  <text class="ts" x="60" y="132" text-anchor="middle">Cockpit</text>
+
+  <!-- Wing label -->
+  <line class="leader" x1="250" y1="250" x2="290" y2="330"/>
+  <text class="ts" x="290" y="345" text-anchor="middle">Wing structure</text>
+  <text class="tl" x="290" y="358" text-anchor="middle">Spars, ribs, skin</text>
+
+  <!-- Fuselage label -->
+  <line class="leader" x1="400" y1="180" x2="400" y2="140"/>
+  <text class="ts" x="400" y="132" text-anchor="middle">Fuselage</text>
+  <text class="tl" x="400" y="145" text-anchor="middle">Pressure vessel</text>
+
+</svg>
+```
+
+## CSS Classes for Physical Diagrams
+
+When creating physical/structural diagrams, define semantic classes for each component type:
+
+```css
+/* Structure shapes */
+.fuselage { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1; }
+.wing { fill: #E6F1FB; stroke: #185FA5; stroke-width: 1; }
+.tail-v { fill: #E6F1FB; stroke: #185FA5; stroke-width: 1; }
+.tail-h { fill: #E6F1FB; stroke: #185FA5; stroke-width: 1; }
+
+/* Interior sections */
+.cockpit { fill: #EEEDFE; stroke: #534AB7; stroke-width: 1; }
+.first-class { fill: #FBEAF0; stroke: #993556; stroke-width: 0.5; }
+.business-class { fill: #FAECE7; stroke: #993C1D; stroke-width: 0.5; }
+.economy-class { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 0.5; }
+.cargo { fill: #D3D1C7; stroke: #5F5E5A; stroke-width: 0.5; }
+
+/* Systems */
+.engine { fill: #FAECE7; stroke: #993C1D; stroke-width: 1; }
+.fuel-tank { fill: #FAEEDA; stroke: #854F0B; stroke-width: 0.5; stroke-dasharray: 3 2; }
+.flap { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 0.5; }
+
+/* Mechanical */
+.gear { fill: #444441; stroke: #2C2C2A; stroke-width: 0.5; }
+.wheel { fill: #2C2C2A; stroke: #1a1a18; stroke-width: 0.5; }
+```
+
+## Shape Selection Guide
+
+| Physical form | SVG element | Example |
+|---------------|-------------|---------|
+| Curved body | `<path>` with Q (quadratic) or C (cubic) curves | Fuselage, nose cone |
+| Tapered/angular | `<polygon>` | Wings, stabilizers |
+| Cylindrical | `<ellipse>` | Engines, wheels, tanks |
+| Linear structure | `<line>` | Struts, pylons, gear legs |
+| Internal sections | `<rect>` inside parent shape | Cabin classes |
+| Dashed boundaries | `stroke-dasharray` on any shape | Fuel tanks, control surfaces |
+
+## Layout Notes
+
+- **ViewBox**: 680×400 (wider aspect ratio suits side profile)
+- **Layering**: Draw outer structures first, then interior details on top
+- **Leader lines**: Use `.leader` class (dashed) to connect labels to components
+- **Text sizes**: Use `.tl` (10px) for component labels, `.ts` (12px) for section labels
+- **Semantic colors**: Group by system (structure=blue, propulsion=coral, fuel=amber, etc.)
@@ -0,0 +1,236 @@
+# Out-of-Order CPU Core Microarchitecture
+
+A structural diagram showing the internal pipeline stages of a modern superscalar out-of-order CPU core. Demonstrates multi-stage vertical flow with parallel paths, fan-out patterns for execution ports, and a separate memory hierarchy sidebar.
+
+## Key Patterns Used
+
+- **Multi-stage vertical flow**: Six pipeline stages (Front End → Rename → Schedule → Execute → Retire)
+- **Parallel decode paths**: Main decode and µop cache bypass (dashed line for cache hit)
+- **Container grouping**: Logical stages grouped in colored containers
+- **Fan-out pattern**: Single scheduler dispatching to 6 execution ports
+- **Sidebar layout**: Memory hierarchy placed in separate column on right
+- **Stage labels**: Left-aligned labels indicating pipeline phase
+- **Color-coded semantics**: Different colors for each functional unit category
+
+## Diagram Type
+
+This is a **hybrid structural/flow** diagram:
+- **Flow aspect**: Instructions move top-to-bottom through pipeline stages
+- **Structural aspect**: Components are grouped by function (rename unit, execution cluster)
+- **Sidebar**: Memory hierarchy is architecturally separate but connected via data paths
+
+## Pipeline Stage Breakdown
+
+### Front End (Purple)
+```xml
+<!-- Fetch Unit -->
+<g class="node c-purple">
+  <rect x="40" y="70" width="140" height="56" rx="8" stroke-width="0.5"/>
+  <text class="th" x="110" y="90" text-anchor="middle" dominant-baseline="central">Fetch unit</text>
+  <text class="ts" x="110" y="110" text-anchor="middle" dominant-baseline="central">6-wide, 32B/cycle</text>
+</g>
+
+<!-- Branch Predictor (subordinate) -->
+<g class="node c-purple">
+  <rect x="40" y="140" width="140" height="44" rx="8" stroke-width="0.5"/>
+  <text class="th" x="110" y="162" text-anchor="middle" dominant-baseline="central">Branch predictor</text>
+</g>
+
+<!-- Decode -->
+<g class="node c-purple">
+  <rect x="230" y="70" width="160" height="56" rx="8" stroke-width="0.5"/>
+  <text class="th" x="310" y="90" text-anchor="middle" dominant-baseline="central">Decode</text>
+  <text class="ts" x="310" y="110" text-anchor="middle" dominant-baseline="central">x86 → µops, 6-wide</text>
+</g>
+```
+
+### µop Cache Bypass Path (Teal)
+The µop cache (Decoded Stream Buffer) provides an alternate path that bypasses the complex decoder:
+
+```xml
+<!-- µop Cache parallel to decode -->
+<g class="node c-teal">
+  <rect x="230" y="150" width="160" height="50" rx="8" stroke-width="0.5"/>
+  <text class="th" x="310" y="168" text-anchor="middle" dominant-baseline="central">µop cache (DSB)</text>
+  <text class="ts" x="310" y="186" text-anchor="middle" dominant-baseline="central">4K entries, 8-wide</text>
+</g>
+
+<!-- Dashed bypass path indicating cache hit -->
+<path d="M180 110 L205 110 L205 175 L230 175" fill="none" class="arr" 
+      stroke-dasharray="4 3" marker-end="url(#arrow)"/>
+<text class="tx" x="164" y="148" opacity=".6">hit</text>
+```
+
+### Rename/Allocate Container (Coral)
+Groups related rename components in a container:
+
+```xml
+<!-- Outer container -->
+<g class="c-coral">
+  <rect x="40" y="250" width="530" height="130" rx="12" stroke-width="0.5"/>
+  <text class="th" x="60" y="274">Rename / allocate</text>
+  <text class="ts" x="60" y="292">Map architectural → physical registers</text>
+</g>
+
+<!-- Inner components -->
+<g class="node c-coral">
+  <rect x="60" y="310" width="180" height="56" rx="8" stroke-width="0.5"/>
+  <text class="th" x="150" y="330" text-anchor="middle" dominant-baseline="central">Register alias table</text>
+  <text class="ts" x="150" y="350" text-anchor="middle" dominant-baseline="central">180 physical regs</text>
+</g>
+```
+
+### Scheduler Fan-Out Pattern (Amber → Teal)
+Single unified scheduler dispatching to multiple execution ports:
+
+```xml
+<!-- Unified Scheduler -->
+<g class="node c-amber">
+  <rect x="140" y="420" width="330" height="50" rx="8" stroke-width="0.5"/>
+  <text class="th" x="305" y="438" text-anchor="middle" dominant-baseline="central">Unified scheduler</text>
+  <text class="ts" x="305" y="456" text-anchor="middle" dominant-baseline="central">97 entries, out-of-order dispatch</text>
+</g>
+
+<!-- Fan-out arrows to 6 ports -->
+<line x1="170" y1="470" x2="90" y2="540" class="arr" marker-end="url(#arrow)"/>
+<line x1="215" y1="470" x2="170" y2="540" class="arr" marker-end="url(#arrow)"/>
+<line x1="265" y1="470" x2="250" y2="540" class="arr" marker-end="url(#arrow)"/>
+<line x1="305" y1="470" x2="330" y2="540" class="arr" marker-end="url(#arrow)"/>
+<line x1="355" y1="470" x2="410" y2="540" class="arr" marker-end="url(#arrow)"/>
+<line x1="420" y1="470" x2="490" y2="540" class="arr" marker-end="url(#arrow)"/>
+```
+
+### Execution Port Box Pattern
+Compact boxes showing port number and capabilities:
+
+```xml
+<!-- Execution port with multi-line capability -->
+<g class="node c-teal">
+  <rect x="55" y="540" width="70" height="64" rx="6" stroke-width="0.5"/>
+  <text class="th" x="90" y="560" text-anchor="middle" dominant-baseline="central">Port 0</text>
+  <text class="tx" x="90" y="576" text-anchor="middle" dominant-baseline="central">ALU</text>
+  <text class="tx" x="90" y="590" text-anchor="middle" dominant-baseline="central">DIV</text>
+</g>
+```
+
+### Reorder Buffer (Pink)
+Wide horizontal bar at bottom showing retirement:
+
+```xml
+<g class="c-pink">
+  <rect x="40" y="670" width="530" height="40" rx="10" stroke-width="0.5"/>
+  <text class="th" x="305" y="694" text-anchor="middle" dominant-baseline="central">Reorder buffer (ROB) — 512 entries, 8-wide retire</text>
+</g>
+```
+
+### Memory Hierarchy Sidebar (Blue)
+Separate column showing cache levels:
+
+```xml
+<!-- Container -->
+<g class="c-blue">
+  <rect x="600" y="30" width="190" height="360" rx="16" stroke-width="0.5"/>
+  <text class="th" x="695" y="54" text-anchor="middle">Memory hierarchy</text>
+</g>
+
+<!-- Cache levels stacked vertically -->
+<g class="node c-blue">
+  <rect x="620" y="70" width="150" height="50" rx="8" stroke-width="0.5"/>
+  <text class="th" x="695" y="88" text-anchor="middle" dominant-baseline="central">L1-I cache</text>
+  <text class="ts" x="695" y="106" text-anchor="middle" dominant-baseline="central">32 KB, 8-way</text>
+</g>
+<!-- Additional levels follow same pattern -->
+```
+
+## Connection Patterns
+
+### Instruction Fetch Path
+Horizontal arrow from L1-I cache to fetch unit:
+```xml
+<path d="M620 95 L200 95" fill="none" class="arr" marker-end="url(#arrow)"/>
+<text class="tx" x="410" y="88" text-anchor="middle" opacity=".6">instruction fetch</text>
+```
+
+### Load/Store Path
+Complex path from execution ports to L1-D cache:
+```xml
+<path d="M250 604 L250 640 L580 640 L580 160 L620 160" fill="none" class="arr" marker-end="url(#arrow)"/>
+<text class="tx" x="415" y="652" text-anchor="middle" opacity=".6">load / store</text>
+```
+
+### Commit Path (dashed)
+Dashed line showing write-back from ROB to register file:
+```xml
+<path d="M550 690 L580 690 L580 445 L595 445" fill="none" class="arr" stroke-dasharray="4 3"/>
+<text class="tx" x="590" y="578" opacity=".6" transform="rotate(-90 590 578)">commit</text>
+```
+
+### Path Merge (Decode + µop Cache)
+Two paths converging before rename:
+```xml
+<line x1="390" y1="98" x2="430" y2="98" class="arr"/>
+<line x1="390" y1="175" x2="430" y2="175" class="arr"/>
+<path d="M430 98 L430 175" fill="none" stroke="var(--text-secondary)" stroke-width="1.5"/>
+<line x1="430" y1="136" x2="470" y2="136" class="arr" marker-end="url(#arrow)"/>
+```
+
+## Text Classes
+
+This diagram uses an additional text class for very small labels:
+
+```css
+.tx { font-family: system-ui, -apple-system, sans-serif; font-size: 10px; fill: var(--text-secondary); }
+```
+
+Used for:
+- Execution port capability labels (ALU, Branch, Load, etc.)
+- Connection labels (instruction fetch, load/store, commit)
+- DRAM latency annotation
+
+## Color Semantic Mapping
+
+| Color | Stage | Components |
+|-------|-------|------------|
+| `c-purple` | Front end | Fetch, Branch predictor, Decode |
+| `c-teal` | Execution | µop cache, Execution ports |
+| `c-coral` | Rename | RAT, Physical RF, Free list |
+| `c-amber` | Schedule | Unified scheduler |
+| `c-pink` | Retire | Reorder buffer |
+| `c-blue` | Memory | L1-I, L1-D, L2, DRAM |
+| `c-gray` | External | Off-chip DRAM |
+
+## Layout Notes
+
+- **ViewBox**: 820×720 (taller than wide for vertical pipeline flow)
+- **Main pipeline**: x=40 to x=570 (530px width)
+- **Memory sidebar**: x=600 to x=790 (190px width)
+- **Stage labels**: x=30, left-aligned, 50% opacity
+- **Vertical spacing**: ~80-100px between major stages
+- **Container padding**: 20px inside containers
+- **Port spacing**: 80px between execution port centers
+- **Legend**: Bottom-right of memory sidebar, explains color coding
+
+## Architectural Details Shown
+
+| Component | Specification | Notes |
+|-----------|---------------|-------|
+| Fetch | 6-wide, 32B/cycle | Typical modern Intel/AMD |
+| Decode | 6-wide, x86→µops | Complex decoder |
+| µop Cache | 4K entries, 8-wide | Bypass for hot code |
+| RAT | 180 physical regs | Supports deep OoO |
+| Scheduler | 97 entries | Unified RS |
+| Execution | 6 ports | ALU×2, Load, Store×2, Vector |
+| ROB | 512 entries, 8-wide | In-order retirement |
+| L1-I | 32 KB, 8-way | Instruction cache |
+| L1-D | 48 KB, 12-way | Data cache |
+| L2 | 1.25 MB, 20-way | Unified |
+| DRAM | DDR5-6400, ~80ns | Off-chip |
+
+## When to Use This Pattern
+
+Use this diagram style for:
+- CPU/GPU microarchitecture visualization
+- Compiler pipeline stages
+- Network packet processing pipelines
+- Any system with parallel execution units fed by a scheduler
+- Hardware designs with multiple functional units
@@ -0,0 +1,182 @@
+# Electricity Grid: Generation to Consumption
+
+A left-to-right flow diagram showing electricity from multiple generation sources through transmission and distribution networks to end consumers. Demonstrates multi-stage flow layout, voltage level visual hierarchy, and smart grid data overlay.
+
+## Key Patterns Used
+
+- **Multi-stage horizontal flow**: Four distinct columns (Generation → Transmission → Distribution → Consumption)
+- **Stage dividers**: Vertical dashed lines separating each phase
+- **Voltage level hierarchy**: Different line weights/colors for HV, MV, LV
+- **Smart grid data overlay**: Dashed data flow lines from control center
+- **Capacity labels**: Power ratings on generation sources
+- **Multiple source convergence**: Four generators feeding into single transmission grid
+
+## New Shape Techniques
+
+### Nuclear Plant (cooling tower + reactor)
+```xml
+<!-- Cooling tower (hyperbolic curve) -->
+<path class="nuclear-tower" d="M 25 80 Q 15 60 20 40 Q 25 20 40 15 Q 55 20 60 40 Q 65 60 55 80 Z"/>
+<!-- Steam clouds -->
+<ellipse class="nuclear-steam" cx="40" cy="8" rx="12" ry="6"/>
+<!-- Reactor dome -->
+<rect class="nuclear-building" x="65" y="45" width="40" height="35" rx="3"/>
+<ellipse class="nuclear-building" cx="85" cy="45" rx="20" ry="8"/>
+```
+
+### Gas Peaker Plant (with flames)
+```xml
+<rect class="gas-plant" x="0" y="25" width="70" height="40" rx="3"/>
+<!-- Smokestacks -->
+<rect class="gas-stack" x="15" y="5" width="8" height="25" rx="1"/>
+<!-- Flame -->
+<path class="gas-flame" d="M 19 5 Q 17 0 19 -3 Q 21 0 19 5"/>
+<!-- Turbine housing -->
+<ellipse class="gas-plant" cx="55" cy="45" rx="12" ry="8"/>
+```
+
+### Transmission Pylon with Insulators
+```xml
+<!-- Tapered tower -->
+<polygon class="pylon" points="20,0 25,0 30,80 15,80"/>
+<!-- Cross arms -->
+<line class="pylon-arm" x1="5" y1="10" x2="40" y2="10"/>
+<line class="pylon-arm" x1="8" y1="25" x2="37" y2="25"/>
+<!-- Insulators (where lines attach) -->
+<circle class="insulator" cx="8" cy="10" r="3"/>
+<circle class="insulator" cx="37" cy="10" r="3"/>
+```
+
+### Transformer Symbol
+```xml
+<!-- Two coils with core -->
+<circle class="transformer-coil" cx="25" cy="25" r="12"/>
+<circle class="transformer-coil" cx="55" cy="25" r="12"/>
+<rect class="transformer-core" x="35" y="15" width="10" height="20" rx="2"/>
+<!-- Busbars -->
+<line x1="0" y1="15" x2="-10" y2="15" stroke="#EF9F27" stroke-width="3"/>
+```
+
+### Pole-mounted Transformer
+```xml
+<rect class="pole" x="18" y="0" width="4" height="60"/>
+<line x1="10" y1="8" x2="30" y2="8" stroke="#854F0B" stroke-width="2"/>
+<rect class="dist-transformer" x="8" y="15" width="24" height="18" rx="2"/>
+<line class="lv-line" x1="20" y1="33" x2="20" y2="60"/>
+```
+
+### House with Roof
+```xml
+<rect class="home" x="0" y="25" width="35" height="30" rx="2"/>
+<polygon class="home-roof" points="0,25 17,8 35,25"/>
+<!-- Door -->
+<rect x="8" y="35" width="8" height="15" fill="#085041"/>
+<!-- Window -->
+<rect x="22" y="32" width="8" height="8" fill="#9FE1CB"/>
+```
+
+### Factory Building
+```xml
+<rect class="factory" x="0" y="15" width="90" height="50" rx="3"/>
+<!-- Smokestacks -->
+<rect class="factory-stack" x="15" y="0" width="10" height="20"/>
+<!-- Windows row -->
+<rect x="10" y="30" width="15" height="12" fill="#F5C4B3"/>
+<rect x="30" y="30" width="15" height="12" fill="#F5C4B3"/>
+<!-- Loading dock -->
+<rect x="55" y="50" width="30" height="15" fill="#993C1D"/>
+```
+
+### EV Charger with Car
+```xml
+<!-- Charging station -->
+<rect class="ev-charger" x="20" y="0" width="25" height="45" rx="3"/>
+<rect x="24" y="5" width="17" height="12" rx="1" fill="#3C3489"/>
+<!-- Cable -->
+<path d="M 32 20 Q 32 35 45 40" stroke="#534AB7" stroke-width="2" fill="none"/>
+<circle cx="45" cy="40" r="4" fill="#534AB7"/>
+<!-- Status light -->
+<circle cx="32" cy="38" r="3" fill="#97C459"/>
+
+<!-- EV Car -->
+<path class="ev-car" d="M 5 20 L 5 12 Q 5 5 15 5 L 45 5 Q 55 5 55 12 L 55 20 Z"/>
+<!-- Windows -->
+<rect x="10" y="8" width="15" height="8" rx="2" fill="#534AB7"/>
+<!-- Wheels -->
+<circle cx="15" cy="22" r="5" fill="#2C2C2A"/>
+<!-- Charging bolt icon -->
+<path d="M 28 12 L 32 8 L 30 11 L 34 11 L 30 16 L 32 13 Z" fill="#97C459"/>
+```
+
+## Voltage Level Line Styles
+
+```css
+/* High voltage (transmission) - thick, bright */
+.hv-line { stroke: #EF9F27; stroke-width: 2.5; fill: none; }
+
+/* Medium voltage (distribution) - medium */
+.mv-line { stroke: #BA7517; stroke-width: 2; fill: none; }
+
+/* Low voltage (consumer) - thin, darker */
+.lv-line { stroke: #854F0B; stroke-width: 1.5; fill: none; }
+
+/* Smart grid data - dashed purple */
+.data-flow { stroke: #7F77DD; stroke-width: 1; fill: none; stroke-dasharray: 3 2; opacity: 0.7; }
+```
+
+## Flow Arrow Marker
+
+```xml
+<defs>
+  <marker id="flow-arrow" viewBox="0 0 10 10" refX="9" refY="5" 
+          markerWidth="6" markerHeight="6" orient="auto">
+    <path d="M0,0 L10,5 L0,10 Z" fill="#EF9F27"/>
+  </marker>
+</defs>
+<!-- Usage -->
+<line x1="140" y1="105" x2="210" y2="105" class="hv-line" marker-end="url(#flow-arrow)"/>
+```
+
+## CSS Classes
+
+```css
+/* Generation */
+.nuclear-tower { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1; }
+.nuclear-building { fill: #EEEDFE; stroke: #534AB7; stroke-width: 1; }
+.solar-panel { fill: #3C3489; stroke: #534AB7; stroke-width: 0.5; }
+.wind-tower { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1; }
+.wind-blade { fill: #F1EFE8; stroke: #888780; stroke-width: 0.5; }
+.gas-plant { fill: #FAECE7; stroke: #993C1D; stroke-width: 1; }
+.gas-flame { fill: #EF9F27; }
+
+/* Transmission */
+.pylon { fill: #5F5E5A; stroke: #444441; stroke-width: 0.5; }
+.insulator { fill: #FAEEDA; stroke: #854F0B; stroke-width: 0.5; }
+.substation { fill: #E6F1FB; stroke: #185FA5; stroke-width: 1; }
+.transformer-coil { fill: none; stroke: #185FA5; stroke-width: 1.5; }
+
+/* Distribution */
+.pole { fill: #854F0B; stroke: #633806; stroke-width: 0.5; }
+.dist-transformer { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 1; }
+
+/* Consumption */
+.home { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 1; }
+.home-roof { fill: #0F6E56; stroke: #085041; stroke-width: 0.5; }
+.factory { fill: #FAECE7; stroke: #993C1D; stroke-width: 1; }
+.ev-charger { fill: #EEEDFE; stroke: #534AB7; stroke-width: 1; }
+.ev-car { fill: #3C3489; stroke: #534AB7; stroke-width: 0.5; }
+
+/* Smart grid */
+.smart-grid { fill: #EEEDFE; stroke: #534AB7; stroke-width: 1.5; }
+```
+
+## Layout Notes
+
+- **ViewBox**: 820×520 (wide for 4-column layout)
+- **Column widths**: ~200px per stage
+- **Stage dividers**: Vertical dashed lines at x=200, 420, 620
+- **Stage labels**: Top of diagram, uppercase for emphasis
+- **Flow direction**: Left-to-right with arrows showing power flow
+- **Data overlay**: Smart grid data lines use different style (dashed purple) to distinguish from power lines
+- **Capacity labels**: Show MW ratings on generators for context
+- **Voltage labels**: Show transformation ratios at substations
@@ -0,0 +1,172 @@
+# Feature Film Production Pipeline
+
+A phased workflow showing the five stages of filmmaking, using containers with inner nodes and horizontal sub-flows within a phase.
+
+## Key Patterns Used
+
+- **Phase containers**: Large rounded rectangles with neutral background and dashed borders
+- **Inner task nodes**: Smaller colored nodes inside containers for sub-tasks
+- **Horizontal flow within container**: Post-production shows sequential pipeline with arrows (Editing → Color → VFX → Sound → Score)
+- **Consistent phase spacing**: ~30px gap between phase containers
+- **Phase labels with subtitles**: Each container has title + description
+
+## Diagram
+
+```xml
+<svg width="100%" viewBox="0 0 680 780" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="arrow" viewBox="0 0 10 10" refX="8" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+      <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+            stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+    </marker>
+  </defs>
+
+  <!-- Phase 1: Development -->
+  <g>
+    <rect x="40" y="30" width="600" height="110" rx="16" stroke-width="1" stroke-dasharray="6 4" fill="var(--bg-secondary)" stroke="var(--border)"/>
+    <text class="th" x="66" y="56">Development</text>
+    <text class="ts" x="66" y="74">Concept to greenlight</text>
+  </g>
+  <g class="node c-purple">
+    <rect x="70" y="90" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="150" y="108" text-anchor="middle" dominant-baseline="central">Script / screenplay</text>
+  </g>
+  <g class="node c-purple">
+    <rect x="260" y="90" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="340" y="108" text-anchor="middle" dominant-baseline="central">Financing / budget</text>
+  </g>
+  <g class="node c-purple">
+    <rect x="450" y="90" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="530" y="108" text-anchor="middle" dominant-baseline="central">Casting leads</text>
+  </g>
+
+  <!-- Arrow to Phase 2 -->
+  <line x1="340" y1="140" x2="340" y2="170" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Phase 2: Pre-production -->
+  <g>
+    <rect x="40" y="170" width="600" height="110" rx="16" stroke-width="1" stroke-dasharray="6 4" fill="var(--bg-secondary)" stroke="var(--border)"/>
+    <text class="th" x="66" y="196">Pre-production</text>
+    <text class="ts" x="66" y="214">Planning and preparation</text>
+  </g>
+  <g class="node c-teal">
+    <rect x="70" y="230" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="150" y="248" text-anchor="middle" dominant-baseline="central">Storyboards</text>
+  </g>
+  <g class="node c-teal">
+    <rect x="260" y="230" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="340" y="248" text-anchor="middle" dominant-baseline="central">Location scouting</text>
+  </g>
+  <g class="node c-teal">
+    <rect x="450" y="230" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="530" y="248" text-anchor="middle" dominant-baseline="central">Crew hiring</text>
+  </g>
+
+  <!-- Arrow to Phase 3 -->
+  <line x1="340" y1="280" x2="340" y2="310" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Phase 3: Production -->
+  <g>
+    <rect x="40" y="310" width="600" height="110" rx="16" stroke-width="1" stroke-dasharray="6 4" fill="var(--bg-secondary)" stroke="var(--border)"/>
+    <text class="th" x="66" y="336">Production</text>
+    <text class="ts" x="66" y="354">Principal photography</text>
+  </g>
+  <g class="node c-coral">
+    <rect x="70" y="370" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="150" y="388" text-anchor="middle" dominant-baseline="central">Filming / shooting</text>
+  </g>
+  <g class="node c-coral">
+    <rect x="260" y="370" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="340" y="388" text-anchor="middle" dominant-baseline="central">Production sound</text>
+  </g>
+  <g class="node c-coral">
+    <rect x="450" y="370" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="530" y="388" text-anchor="middle" dominant-baseline="central">VFX plates</text>
+  </g>
+
+  <!-- Arrow to Phase 4 -->
+  <line x1="340" y1="420" x2="340" y2="450" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Phase 4: Post-production -->
+  <g>
+    <rect x="40" y="450" width="600" height="150" rx="16" stroke-width="1" stroke-dasharray="6 4" fill="var(--bg-secondary)" stroke="var(--border)"/>
+    <text class="th" x="66" y="476">Post-production</text>
+    <text class="ts" x="66" y="494">Assembly and finishing</text>
+  </g>
+  <g class="node c-amber">
+    <rect x="70" y="510" width="110" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="125" y="528" text-anchor="middle" dominant-baseline="central">Editing</text>
+  </g>
+  <g class="node c-amber">
+    <rect x="195" y="510" width="110" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="250" y="528" text-anchor="middle" dominant-baseline="central">Color grade</text>
+  </g>
+  <g class="node c-amber">
+    <rect x="320" y="510" width="90" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="365" y="528" text-anchor="middle" dominant-baseline="central">VFX</text>
+  </g>
+  <g class="node c-amber">
+    <rect x="425" y="510" width="100" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="475" y="528" text-anchor="middle" dominant-baseline="central">Sound mix</text>
+  </g>
+  <g class="node c-amber">
+    <rect x="540" y="510" width="80" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="580" y="528" text-anchor="middle" dominant-baseline="central">Score</text>
+  </g>
+  <!-- Flow arrows within post -->
+  <line x1="180" y1="528" x2="195" y2="528" class="arr" marker-end="url(#arrow)"/>
+  <line x1="305" y1="528" x2="320" y2="528" class="arr" marker-end="url(#arrow)"/>
+  <line x1="410" y1="528" x2="425" y2="528" class="arr" marker-end="url(#arrow)"/>
+  <line x1="525" y1="528" x2="540" y2="528" class="arr" marker-end="url(#arrow)"/>
+  <!-- Final delivery label -->
+  <g class="node c-amber">
+    <rect x="240" y="556" width="200" height="32" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="340" y="572" text-anchor="middle" dominant-baseline="central">Final master / DCP</text>
+  </g>
+  <line x1="340" y1="546" x2="340" y2="556" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Arrow to Phase 5 -->
+  <line x1="340" y1="600" x2="340" y2="630" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Phase 5: Distribution -->
+  <g>
+    <rect x="40" y="630" width="600" height="110" rx="16" stroke-width="1" stroke-dasharray="6 4" fill="var(--bg-secondary)" stroke="var(--border)"/>
+    <text class="th" x="66" y="656">Distribution</text>
+    <text class="ts" x="66" y="674">Release and exhibition</text>
+  </g>
+  <g class="node c-blue">
+    <rect x="70" y="690" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="150" y="708" text-anchor="middle" dominant-baseline="central">Film festivals</text>
+  </g>
+  <g class="node c-blue">
+    <rect x="260" y="690" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="340" y="708" text-anchor="middle" dominant-baseline="central">Theatrical release</text>
+  </g>
+  <g class="node c-blue">
+    <rect x="450" y="690" width="160" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="530" y="708" text-anchor="middle" dominant-baseline="central">Streaming / VOD</text>
+  </g>
+</svg>
+```
+
+## Color Assignments
+
+| Element | Color | Reason |
+|---------|-------|--------|
+| Phase containers | Neutral (dashed) | Subtle grouping, doesn't compete with content |
+| Development tasks | `c-purple` | Creative/concept work |
+| Pre-production tasks | `c-teal` | Planning and preparation |
+| Production tasks | `c-coral` | Active filming (main event) |
+| Post-production tasks | `c-amber` | Processing/refinement |
+| Distribution tasks | `c-blue` | Outward delivery/release |
+
+## Layout Notes
+
+- **ViewBox**: 680×780 (standard width, tall for 5 phases)
+- **Container style**: Dashed border (`stroke-dasharray="6 4"`), neutral fill (`var(--bg-secondary)`), `stroke-width="1"`
+- **Container height**: 110px for 3-node phases, 150px for post-production (more complex)
+- **Inner node dimensions**: 160×36px for standard tasks, variable width for post-production sequential flow
+- **Phase gap**: 30px between containers
+- **Horizontal sub-flow**: Post-production uses tightly packed nodes with arrows between them to show sequence
+- **Convergence node**: "Final master / DCP" sits below the horizontal flow, collecting all post outputs
@@ -0,0 +1,165 @@
+# Hospital Emergency Department Flow
+
+A multi-path flowchart showing patient journey through an emergency department with priority-based routing using semantic colors (red=critical, amber=urgent, green=stable).
+
+## Key Patterns Used
+
+- **Semantic color coding**: Red/amber/green for priority levels (not arbitrary decoration)
+- **Stage labels**: Left-aligned faded labels marking workflow phases
+- **Convergent paths**: Multiple entry points merging, then branching, then converging again
+- **Nested containers**: Diagnostics grouped in a container with inner nodes
+- **Legend**: Color key at bottom explaining priority levels
+
+## Diagram
+
+```xml
+<svg width="100%" viewBox="0 0 680 620" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="arrow" viewBox="0 0 10 10" refX="8" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+      <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+            stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+    </marker>
+  </defs>
+
+  <!-- Stage labels -->
+  <text class="ts" x="40" y="68" text-anchor="start" opacity=".5">Arrival</text>
+  <text class="ts" x="40" y="168" text-anchor="start" opacity=".5">Assessment</text>
+  <text class="ts" x="40" y="288" text-anchor="start" opacity=".5">Priority routing</text>
+  <text class="ts" x="40" y="418" text-anchor="start" opacity=".5">Diagnostics</text>
+  <text class="ts" x="40" y="518" text-anchor="start" opacity=".5">Outcome</text>
+
+  <!-- Arrival: Ambulance -->
+  <g class="node c-gray">
+    <rect x="140" y="40" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="220" y="60" text-anchor="middle" dominant-baseline="central">Ambulance</text>
+    <text class="ts" x="220" y="80" text-anchor="middle" dominant-baseline="central">Emergency transport</text>
+  </g>
+
+  <!-- Arrival: Walk-in -->
+  <g class="node c-gray">
+    <rect x="380" y="40" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="460" y="60" text-anchor="middle" dominant-baseline="central">Walk-in</text>
+    <text class="ts" x="460" y="80" text-anchor="middle" dominant-baseline="central">Self-arrival</text>
+  </g>
+
+  <!-- Arrows to Triage -->
+  <line x1="220" y1="96" x2="300" y2="140" class="arr" marker-end="url(#arrow)"/>
+  <line x1="460" y1="96" x2="380" y2="140" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Triage -->
+  <g class="node c-purple">
+    <rect x="240" y="140" width="200" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="340" y="160" text-anchor="middle" dominant-baseline="central">Triage</text>
+    <text class="ts" x="340" y="180" text-anchor="middle" dominant-baseline="central">Nurse assessment, vitals</text>
+  </g>
+
+  <!-- Arrows from Triage to Priority -->
+  <line x1="280" y1="196" x2="140" y2="260" class="arr" marker-end="url(#arrow)"/>
+  <line x1="340" y1="196" x2="340" y2="260" class="arr" marker-end="url(#arrow)"/>
+  <line x1="400" y1="196" x2="540" y2="260" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Priority: Red - Trauma -->
+  <g class="node c-red">
+    <rect x="60" y="260" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="140" y="280" text-anchor="middle" dominant-baseline="central">Trauma bay</text>
+    <text class="ts" x="140" y="300" text-anchor="middle" dominant-baseline="central">Priority: critical</text>
+  </g>
+
+  <!-- Priority: Yellow - Exam rooms -->
+  <g class="node c-amber">
+    <rect x="260" y="260" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="340" y="280" text-anchor="middle" dominant-baseline="central">Exam rooms</text>
+    <text class="ts" x="340" y="300" text-anchor="middle" dominant-baseline="central">Priority: urgent</text>
+  </g>
+
+  <!-- Priority: Green - Waiting -->
+  <g class="node c-green">
+    <rect x="460" y="260" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="540" y="280" text-anchor="middle" dominant-baseline="central">Waiting area</text>
+    <text class="ts" x="540" y="300" text-anchor="middle" dominant-baseline="central">Priority: stable</text>
+  </g>
+
+  <!-- Arrows to Diagnostics -->
+  <line x1="140" y1="316" x2="220" y2="390" class="arr" marker-end="url(#arrow)"/>
+  <line x1="340" y1="316" x2="340" y2="390" class="arr" marker-end="url(#arrow)"/>
+  <line x1="540" y1="316" x2="460" y2="390" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Diagnostics container -->
+  <g class="c-teal">
+    <rect x="140" y="390" width="400" height="56" rx="12" stroke-width="0.5"/>
+  </g>
+
+  <!-- Labs -->
+  <g class="node c-teal">
+    <rect x="160" y="400" width="110" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="215" y="418" text-anchor="middle" dominant-baseline="central">Labs</text>
+  </g>
+
+  <!-- Imaging -->
+  <g class="node c-teal">
+    <rect x="285" y="400" width="110" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="340" y="418" text-anchor="middle" dominant-baseline="central">Imaging</text>
+  </g>
+
+  <!-- Diagnosis -->
+  <g class="node c-teal">
+    <rect x="410" y="400" width="110" height="36" rx="6" stroke-width="0.5"/>
+    <text class="ts" x="465" y="418" text-anchor="middle" dominant-baseline="central">Diagnosis</text>
+  </g>
+
+  <!-- Arrows to Outcomes -->
+  <line x1="215" y1="446" x2="160" y2="490" class="arr" marker-end="url(#arrow)"/>
+  <line x1="340" y1="446" x2="340" y2="490" class="arr" marker-end="url(#arrow)"/>
+  <line x1="465" y1="446" x2="520" y2="490" class="arr" marker-end="url(#arrow)"/>
+
+  <!-- Outcome: Admission -->
+  <g class="node c-coral">
+    <rect x="80" y="490" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="160" y="510" text-anchor="middle" dominant-baseline="central">Admission</text>
+    <text class="ts" x="160" y="530" text-anchor="middle" dominant-baseline="central">Inpatient ward</text>
+  </g>
+
+  <!-- Outcome: Surgery -->
+  <g class="node c-coral">
+    <rect x="260" y="490" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="340" y="510" text-anchor="middle" dominant-baseline="central">Surgery</text>
+    <text class="ts" x="340" y="530" text-anchor="middle" dominant-baseline="central">Operating room</text>
+  </g>
+
+  <!-- Outcome: Discharge -->
+  <g class="node c-coral">
+    <rect x="440" y="490" width="160" height="56" rx="8" stroke-width="0.5"/>
+    <text class="th" x="520" y="510" text-anchor="middle" dominant-baseline="central">Discharge</text>
+    <text class="ts" x="520" y="530" text-anchor="middle" dominant-baseline="central">Home with instructions</text>
+  </g>
+
+  <!-- Legend -->
+  <text class="ts" x="140" y="580" opacity=".5">Priority levels</text>
+  <g class="c-red"><rect x="140" y="592" width="14" height="14" rx="3" stroke-width="0.5"/></g>
+  <text class="ts" x="162" y="604">Critical</text>
+  <g class="c-amber"><rect x="240" y="592" width="14" height="14" rx="3" stroke-width="0.5"/></g>
+  <text class="ts" x="262" y="604">Urgent</text>
+  <g class="c-green"><rect x="340" y="592" width="14" height="14" rx="3" stroke-width="0.5"/></g>
+  <text class="ts" x="362" y="604">Stable</text>
+</svg>
+```
+
+## Color Assignments
+
+| Element | Color | Reason |
+|---------|-------|--------|
+| Entry points (Ambulance, Walk-in) | `c-gray` | Neutral starting points |
+| Triage | `c-purple` | Processing/assessment step |
+| Trauma bay | `c-red` | Critical priority (semantic) |
+| Exam rooms | `c-amber` | Urgent priority (semantic) |
+| Waiting area | `c-green` | Stable priority (semantic) |
+| Diagnostics | `c-teal` | Clinical services category |
+| Outcomes | `c-coral` | Final disposition category |
+
+## Layout Notes
+
+- **ViewBox**: 680×620 (standard width, extended height for 5 stages)
+- **Stage spacing**: ~110-130px between stage rows
+- **Diagonal arrows**: Connect nodes across columns naturally
+- **Container with inner nodes**: Diagnostics uses outer `c-teal` rect with inner node rects
@@ -0,0 +1,114 @@
+# ML Benchmark Grouped Bar Chart with Dual Axis
+
+A quantitative data visualization comparing LLM inference speed across quantization levels with dual Y-axes, threshold markers, and an inset accuracy table.
+
+## Key Patterns Used
+
+- **Grouped bars**: Min/max range pairs per category using semantic color pairs (lighter=min, darker=max)
+- **Dual Y-axis**: Left axis for primary metric (tok/s), right axis for secondary metric (VRAM GB)
+- **Overlay line graph**: `<polyline>` with labeled dots showing VRAM usage across categories
+- **Threshold marker**: Dashed red horizontal line indicating hardware limit (24 GB GPU)
+- **Zone annotations**: Subtle text labels above/below threshold for context
+- **Inset data table**: Alternating row fills below chart with quantitative accuracy data
+- **Semantic color coding**: Each quantization level gets its own color from the skill palette (red=OOM, amber=slow, teal=sweet spot, blue=fast)
+
+## Diagram Type
+
+This is a **quantitative data chart** with:
+- **Grouped vertical bars**: Range bars showing min–max performance per category
+- **Secondary axis line**: VRAM usage overlaid as a connected scatter plot
+- **Threshold annotation**: Hardware constraint line
+- **Inset table**: Supporting accuracy metrics
+
+## Chart Layout Formula
+
+```
+Chart area:  x=90–590, y=70–410 (500px wide, 340px tall)
+Left Y-axis: Primary metric (tok/s)
+             y = 410 − (val / max_val) × 340
+Right Y-axis: Secondary metric (VRAM GB)
+              Same formula, different scale labels
+Groups:       Divide width by number of categories
+Bars:         Each group → min bar (34px) + 8px gap + max bar (34px)
+Line overlay: <polyline> connecting data points across group centers
+Threshold:    Horizontal dashed line at critical value
+Table:        Below chart, alternating row fills
+```
+
+## Data Mapped
+
+| Quantization | Model Size | Speed (tok/s) | VRAM (GB) | MMLU Pro | Status |
+|-------------|-----------|---------------|-----------|----------|--------|
+| FP16 | 62 GB | 0.5–2 | 62 | 75.2 | OOM / unusable |
+| Q8_0 | 32 GB | 3–5 | 32 | 75.0 | Partial offload |
+| Q4_K_M | 16.8 GB | 8–12 | 16.8 | 73.1 | Fits in VRAM ✓ |
+| IQ3_M | 12 GB | 12–15 | 12 | 70.5 | Full GPU speed |
+
+## Bar CSS Classes
+
+```css
+/* Light mode */
+.bar-fp16-min { fill: #FCEBEB; stroke: #A32D2D; stroke-width: 0.75; }
+.bar-fp16-max { fill: #F7C1C1; stroke: #A32D2D; stroke-width: 0.75; }
+.bar-q8-min   { fill: #FAEEDA; stroke: #854F0B; stroke-width: 0.75; }
+.bar-q8-max   { fill: #FAC775; stroke: #854F0B; stroke-width: 0.75; }
+.bar-q4-min   { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 0.75; }
+.bar-q4-max   { fill: #9FE1CB; stroke: #0F6E56; stroke-width: 0.75; }
+.bar-iq3-min  { fill: #E6F1FB; stroke: #185FA5; stroke-width: 0.75; }
+.bar-iq3-max  { fill: #B5D4F4; stroke: #185FA5; stroke-width: 0.75; }
+
+/* Dark mode */
+@media (prefers-color-scheme: dark) {
+  .bar-fp16-min { fill: #501313; stroke: #F09595; }
+  .bar-fp16-max { fill: #791F1F; stroke: #F09595; }
+  .bar-q8-min   { fill: #412402; stroke: #EF9F27; }
+  .bar-q8-max   { fill: #633806; stroke: #EF9F27; }
+  .bar-q4-min   { fill: #04342C; stroke: #5DCAA5; }
+  .bar-q4-max   { fill: #085041; stroke: #5DCAA5; }
+  .bar-iq3-min  { fill: #042C53; stroke: #85B7EB; }
+  .bar-iq3-max  { fill: #0C447C; stroke: #85B7EB; }
+}
+```
+
+## Overlay Line CSS
+
+```css
+.vram-line { stroke: #534AB7; stroke-width: 2.5; fill: none; }
+.vram-dot  { fill: #534AB7; stroke: var(--bg-primary); stroke-width: 2; }
+.vram-label { font-family: system-ui, sans-serif; font-size: 10px; fill: #534AB7; font-weight: 500; }
+```
+
+## Threshold CSS
+
+```css
+.threshold { stroke: #A32D2D; stroke-width: 1; stroke-dasharray: 6 3; fill: none; }
+.threshold-label { font-family: system-ui, sans-serif; font-size: 10px; fill: #A32D2D; font-weight: 500; }
+```
+
+## Table CSS
+
+```css
+.tbl-header { fill: var(--bg-secondary); stroke: var(--border); stroke-width: 0.5; }
+.tbl-row    { fill: transparent; stroke: var(--border); stroke-width: 0.25; }
+.tbl-alt    { fill: var(--bg-secondary); stroke: var(--border); stroke-width: 0.25; }
+```
+
+## Layout Notes
+
+- **ViewBox**: 680×660 (portrait, chart + legend + table)
+- **Chart area**: y=70–410, x=90–590
+- **Legend row**: y=458–470
+- **Inset table**: y=490–620
+- **Bar width**: 34px each, 8px gap between min/max pair
+- **Group spacing**: 125px center-to-center
+- **Dot halo**: White circle (r=6) behind colored dot (r=5) for legibility over bars/grid
+
+## When to Use This Pattern
+
+Use this diagram style for:
+- Model benchmark comparisons across quantization levels
+- Performance vs. resource usage tradeoff analysis
+- Any multi-metric comparison with a hardware/software constraint
+- GPU/TPU/accelerator benchmarking dashboards
+- Accuracy vs. speed Pareto frontiers
+- Hardware requirement sizing charts
@@ -0,0 +1,325 @@
+# Place Order — UML Sequence Diagram
+
+A UML sequence diagram for the 'Place Order' use case in an e-commerce system. Six lifelines (:Customer, :ShoppingCart, :OrderController, :PaymentGateway, :InventorySystem, :EmailService) interact across 14 numbered messages. An **alt** combined fragment (amber) covers the three conditional outcomes — payment authorized, payment failed, and item unavailable. A **par** combined fragment (teal) nested inside the success branch shows concurrent email confirmation and stock-level update. Demonstrates activation bars, two distinct arrowhead types, UML pentagon fragment tags, and guard conditions.
+
+## Key Patterns Used
+
+- **6 lifelines at equal spacing**: Lifeline centers placed at x=90, 190, 290, 390, 490, 590 (100px apart) so the first box left-edge lands at x=40 and the last right-edge lands at x=640 — exactly filling the safe area
+- **Two-row actor headers**: Each lifeline box shows `":"` (small, tertiary color) on one line and the class name (slightly larger, bold) on a second line, matching the UML anonymous-instance notation `:ClassName`
+- **Two separate arrowhead markers**: `#arr-call` is a filled triangle (`<polygon>`) for synchronous calls; `#arr-ret` is an open chevron (`fill="none"`) for dashed return messages — both use `context-stroke` to inherit line color
+- **Activation bars**: Narrow 8px-wide rectangles (`class="activation"`) layered on top of lifeline stems to show object execution periods; OrderController's bar spans the entire interaction; shorter bars mark PaymentGateway, InventorySystem, and EmailService during their active windows
+- **Combined fragment pentagon tag**: Each `alt` / `par` frame uses a `<polygon>` dog-eared label shape in the top-left corner — points follow the pattern `(x,y) (x+w,y) (x+w+6,y+6) (x+w+6,y+18) (x,y+18)` creating the characteristic UML notch
+- **Nested par inside alt**: The `par` rect (teal) sits inside branch 1 of the `alt` rect (amber); inner rect uses inset x/y (+15/+2) so both borders remain visible and distinguishable
+- **Guard conditions**: Italic text in `[square brackets]` placed immediately after each alt frame divider line, or just inside the top frame for branch 1 — rendered with a dedicated `guard-lbl` class (italic, amber color)
+- **Alt branch dividers**: Solid horizontal lines (`.frag-alt-div`) span the full alt rect width to separate the three branches; par branch separator uses a dashed line (`.frag-par-div`) per UML spec
+- **Lifeline end caps**: Short 14px horizontal tick marks at y=590 (bottom of all lifeline stems) to formally terminate each lifeline
+- **Message sequence annotation**: A faint counter row below the legend (①–③ / ④–⑩ / ⑪–⑫ / ⑬–⑭) explains the four message groups without adding noise to the diagram body
+
+## Diagram
+
+```xml
+<svg width="100%" viewBox="0 0 680 648" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <!-- Open chevron arrowhead — return messages -->
+    <marker id="arr-ret" viewBox="0 0 10 10" refX="8" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+      <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+            stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+    </marker>
+
+    <!-- Filled triangle arrowhead — synchronous calls -->
+    <marker id="arr-call" viewBox="0 0 10 10" refX="9" refY="5"
+            markerWidth="7" markerHeight="7" orient="auto">
+      <polygon points="0,1 10,5 0,9" fill="context-stroke"/>
+    </marker>
+  </defs>
+
+  <!--
+    Lifeline centres (x):
+      L1 :Customer        →  90
+      L2 :ShoppingCart    → 190
+      L3 :OrderController → 290
+      L4 :PaymentGateway  → 390
+      L5 :InventorySystem → 490
+      L6 :EmailService    → 590
+    Actor boxes: x = cx−50, y=20, w=100, h=56, rx=6
+    Lifelines:   x = cx,    y1=76, y2=590
+  -->
+
+  <!-- ── 1. LIFELINE DASHED STEMS (drawn first, behind everything) ── -->
+  <line x1="90"  y1="76" x2="90"  y2="590" class="lifeline"/>
+  <line x1="190" y1="76" x2="190" y2="590" class="lifeline"/>
+  <line x1="290" y1="76" x2="290" y2="590" class="lifeline"/>
+  <line x1="390" y1="76" x2="390" y2="590" class="lifeline"/>
+  <line x1="490" y1="76" x2="490" y2="590" class="lifeline"/>
+  <line x1="590" y1="76" x2="590" y2="590" class="lifeline"/>
+
+  <!-- ── 2. ACTOR HEADER BOXES ── -->
+
+  <!-- :Customer -->
+  <rect x="40"  y="20" width="100" height="56" rx="6" class="actor"/>
+  <text class="actor-colon" x="90"  y="40" text-anchor="middle" dominant-baseline="central">:</text>
+  <text class="actor-name"  x="90"  y="58" text-anchor="middle" dominant-baseline="central">Customer</text>
+
+  <!-- :ShoppingCart -->
+  <rect x="140" y="20" width="100" height="56" rx="6" class="actor"/>
+  <text class="actor-colon" x="190" y="37" text-anchor="middle" dominant-baseline="central">:</text>
+  <text class="actor-name"  x="190" y="55" text-anchor="middle" dominant-baseline="central">ShoppingCart</text>
+
+  <!-- :OrderController -->
+  <rect x="240" y="20" width="100" height="56" rx="6" class="actor"/>
+  <text class="actor-colon" x="290" y="37" text-anchor="middle" dominant-baseline="central">:</text>
+  <text class="actor-name"  x="290" y="55" text-anchor="middle" dominant-baseline="central">OrderController</text>
+
+  <!-- :PaymentGateway -->
+  <rect x="340" y="20" width="100" height="56" rx="6" class="actor"/>
+  <text class="actor-colon" x="390" y="37" text-anchor="middle" dominant-baseline="central">:</text>
+  <text class="actor-name"  x="390" y="55" text-anchor="middle" dominant-baseline="central">PaymentGateway</text>
+
+  <!-- :InventorySystem -->
+  <rect x="440" y="20" width="100" height="56" rx="6" class="actor"/>
+  <text class="actor-colon" x="490" y="37" text-anchor="middle" dominant-baseline="central">:</text>
+  <text class="actor-name"  x="490" y="55" text-anchor="middle" dominant-baseline="central">InventorySystem</text>
+
+  <!-- :EmailService -->
+  <rect x="540" y="20" width="100" height="56" rx="6" class="actor"/>
+  <text class="actor-colon" x="590" y="37" text-anchor="middle" dominant-baseline="central">:</text>
+  <text class="actor-name"  x="590" y="55" text-anchor="middle" dominant-baseline="central">EmailService</text>
+
+  <!-- ── 3. ACTIVATION BARS ── -->
+  <!-- ShoppingCart: active while forwarding checkout → placeOrder -->
+  <rect x="186" y="102" width="8" height="26"  rx="1" class="activation"/>
+  <!-- OrderController: active throughout full sequence -->
+  <rect x="286" y="128" width="8" height="415" rx="1" class="activation"/>
+  <!-- PaymentGateway: active during auth check (happy-path branch only) -->
+  <rect x="386" y="154" width="8" height="46"  rx="1" class="activation"/>
+  <!-- InventorySystem: active from reserveItems → updateStockLevels end -->
+  <rect x="486" y="225" width="8" height="128" rx="1" class="activation"/>
+  <!-- EmailService: active during confirmation send -->
+  <rect x="586" y="290" width="8" height="25"  rx="1" class="activation"/>
+
+  <!-- ── 4. PRE-ALT MESSAGES ── -->
+
+  <!-- ① checkout()  :Customer → :ShoppingCart -->
+  <line x1="90"  y1="102" x2="186" y2="102" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="140" y="97" text-anchor="middle">checkout()</text>
+
+  <!-- ② placeOrder(cartItems)  :ShoppingCart → :OrderController -->
+  <line x1="194" y1="128" x2="286" y2="128" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="242" y="123" text-anchor="middle">placeOrder(cartItems)</text>
+
+  <!-- ③ authorizePayment(amount)  :OrderController → :PaymentGateway -->
+  <line x1="294" y1="154" x2="386" y2="154" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="342" y="149" text-anchor="middle">authorizePayment(amount)</text>
+
+  <!-- ── 5. ALT COMBINED FRAGMENT  y=166 → y=563 ── -->
+
+  <!-- Outer alt rectangle -->
+  <rect x="45" y="166" width="590" height="397" rx="3" class="frag-alt-bg"/>
+
+  <!-- Pentagon "alt" tag: TL corner notch shape -->
+  <polygon points="45,166 84,166 90,173 90,185 45,185" class="frag-alt-tag"/>
+  <text class="frag-alt-kw" x="67" y="178" text-anchor="middle" dominant-baseline="central">alt</text>
+
+  <!-- Guard: branch 1 -->
+  <text class="guard-lbl" x="96" y="179" dominant-baseline="central">[payment authorized]</text>
+
+  <!-- ─── Branch 1: payment authorized ─── -->
+
+  <!-- ④ « authorized »  :PaymentGateway → :OrderController (dashed return) -->
+  <line x1="386" y1="200" x2="294" y2="200" class="msg-ret" marker-end="url(#arr-ret)"/>
+  <text class="rlbl" x="342" y="195" text-anchor="middle">« authorized »</text>
+
+  <!-- ⑤ reserveItems(cartItems)  :OrderController → :InventorySystem -->
+  <line x1="294" y1="225" x2="486" y2="225" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="392" y="220" text-anchor="middle">reserveItems(cartItems)</text>
+
+  <!-- ⑥ « itemsReserved »  :InventorySystem → :OrderController (dashed return) -->
+  <line x1="486" y1="250" x2="294" y2="250" class="msg-ret" marker-end="url(#arr-ret)"/>
+  <text class="rlbl" x="392" y="245" text-anchor="middle">« itemsReserved »</text>
+
+  <!-- ── 6. PAR COMBINED FRAGMENT (nested inside alt branch 1)  y=266 → y=373 ── -->
+
+  <!-- Inner par rectangle -->
+  <rect x="60" y="266" width="560" height="107" rx="3" class="frag-par-bg"/>
+
+  <!-- Pentagon "par" tag -->
+  <polygon points="60,266 97,266 102,272 102,284 60,284" class="frag-par-tag"/>
+  <text class="frag-par-kw" x="81" y="275" text-anchor="middle" dominant-baseline="central">par</text>
+
+  <!-- Par branch 1: email confirmation -->
+
+  <!-- ⑦ sendConfirmationEmail()  :OrderController → :EmailService -->
+  <line x1="294" y1="295" x2="586" y2="295" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="442" y="290" text-anchor="middle">sendConfirmationEmail()</text>
+
+  <!-- ⑧ « emailQueued »  :EmailService → :OrderController (dashed return) -->
+  <line x1="586" y1="318" x2="294" y2="318" class="msg-ret" marker-end="url(#arr-ret)"/>
+  <text class="rlbl" x="442" y="313" text-anchor="middle">« emailQueued »</text>
+
+  <!-- Par branch divider (dashed, per UML spec) -->
+  <line x1="60" y1="336" x2="620" y2="336" class="frag-par-div"/>
+
+  <!-- Par branch 2: stock level update -->
+
+  <!-- ⑨ updateStockLevels()  :OrderController → :InventorySystem -->
+  <line x1="294" y1="355" x2="486" y2="355" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="392" y="350" text-anchor="middle">updateStockLevels()</text>
+
+  <!-- PAR fragment ends at y=373 -->
+
+  <!-- ⑩ « orderPlaced »  :OrderController → :Customer (dashed return, after par) -->
+  <line x1="286" y1="395" x2="90"  y2="395" class="msg-ret" marker-end="url(#arr-ret)"/>
+  <text class="rlbl" x="190" y="390" text-anchor="middle">« orderPlaced »</text>
+
+  <!-- ─── Alt else: [payment failed] ─── -->
+
+  <!-- Alt branch divider 1 (solid line) -->
+  <line x1="45" y1="415" x2="635" y2="415" class="frag-alt-div"/>
+  <text class="guard-lbl" x="50" y="429" dominant-baseline="central">[payment failed]</text>
+
+  <!-- ⑪ « authFailed »  :PaymentGateway → :OrderController (dashed return) -->
+  <line x1="390" y1="448" x2="294" y2="448" class="msg-ret" marker-end="url(#arr-ret)"/>
+  <text class="rlbl" x="344" y="443" text-anchor="middle">« authFailed »</text>
+
+  <!-- ⑫ error(PAYMENT_FAILED)  :OrderController → :Customer -->
+  <line x1="286" y1="470" x2="90"  y2="470" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="190" y="465" text-anchor="middle">error(PAYMENT_FAILED)</text>
+
+  <!-- ─── Alt else: [item unavailable] ─── -->
+
+  <!-- Alt branch divider 2 (solid line) -->
+  <line x1="45" y1="490" x2="635" y2="490" class="frag-alt-div"/>
+  <text class="guard-lbl" x="50" y="504" dominant-baseline="central">[item unavailable]</text>
+
+  <!-- ⑬ « unavailable »  :InventorySystem → :OrderController (dashed return) -->
+  <line x1="486" y1="523" x2="294" y2="523" class="msg-ret" marker-end="url(#arr-ret)"/>
+  <text class="rlbl" x="392" y="518" text-anchor="middle">« unavailable »</text>
+
+  <!-- ⑭ error(ITEM_UNAVAILABLE)  :OrderController → :Customer -->
+  <line x1="286" y1="545" x2="90"  y2="545" class="msg-call" marker-end="url(#arr-call)"/>
+  <text class="mlbl" x="190" y="540" text-anchor="middle">error(ITEM_UNAVAILABLE)</text>
+
+  <!-- ALT fragment ends at y=563 -->
+
+  <!-- ── 7. LIFELINE END CAPS (short horizontal tick at y=590) ── -->
+  <line x1="83"  y1="590" x2="97"  y2="590" stroke="var(--text-tertiary)" stroke-width="1.5"/>
+  <line x1="183" y1="590" x2="197" y2="590" stroke="var(--text-tertiary)" stroke-width="1.5"/>
+  <line x1="283" y1="590" x2="297" y2="590" stroke="var(--text-tertiary)" stroke-width="1.5"/>
+  <line x1="383" y1="590" x2="397" y2="590" stroke="var(--text-tertiary)" stroke-width="1.5"/>
+  <line x1="483" y1="590" x2="497" y2="590" stroke="var(--text-tertiary)" stroke-width="1.5"/>
+  <line x1="583" y1="590" x2="597" y2="590" stroke="var(--text-tertiary)" stroke-width="1.5"/>
+
+  <!-- ── 8. LEGEND ── -->
+  <text class="ts" x="45" y="612" opacity=".45">Legend —</text>
+
+  <line x1="110" y1="609" x2="148" y2="609"
+        stroke="var(--text-primary)" stroke-width="1.5" marker-end="url(#arr-call)"/>
+  <text class="ts" x="154" y="613" opacity=".75">Synchronous call</text>
+
+  <line x1="288" y1="609" x2="326" y2="609"
+        stroke="var(--text-secondary)" stroke-width="1.5"
+        stroke-dasharray="5 3" marker-end="url(#arr-ret)"/>
+  <text class="ts" x="332" y="613" opacity=".75">Return message</text>
+
+  <rect x="458" y="603" width="22" height="13" rx="2"
+        fill="#FAEEDA" fill-opacity="0.5" stroke="#854F0B" stroke-width="0.75"/>
+  <text class="ts" x="484" y="613" opacity=".75">alt fragment</text>
+
+  <rect x="558" y="603" width="22" height="13" rx="2"
+        fill="#E1F5EE" fill-opacity="0.6" stroke="#0F6E56" stroke-width="0.75"/>
+  <text class="ts" x="584" y="613" opacity=".75">par fragment</text>
+
+  <!-- Message group annotation -->
+  <text class="ts" x="45" y="632" opacity=".35">
+    ①–③ pre-condition  ·  ④–⑩ happy path  ·  ⑪–⑫ payment failure  ·  ⑬–⑭ item unavailable
+  </text>
+
+</svg>
+```
+
+## Custom CSS
+
+Add these classes to the hosting page `<style>` block (in addition to the standard skill CSS):
+
+```css
+/* ── Actor lifeline header boxes ── */
+.actor       { fill: var(--bg-secondary); stroke: var(--text-secondary); stroke-width: 0.5; }
+.actor-name  { font-family: system-ui, sans-serif; font-size: 11.5px; font-weight: 600;
+               fill: var(--text-primary); }
+.actor-colon { font-family: system-ui, sans-serif; font-size: 10px; fill: var(--text-tertiary); }
+
+/* ── Lifeline dashed stems ── */
+.lifeline { stroke: var(--text-tertiary); stroke-width: 1; stroke-dasharray: 6 4; fill: none; }
+
+/* ── Activation bars ── */
+.activation { fill: var(--bg-secondary); stroke: var(--text-secondary); stroke-width: 0.75; }
+
+/* ── Message arrows ── */
+.msg-call { stroke: var(--text-primary);   stroke-width: 1.5; fill: none; }
+.msg-ret  { stroke: var(--text-secondary); stroke-width: 1.5; fill: none; stroke-dasharray: 6 3; }
+
+/* ── Message labels ── */
+.mlbl { font-family: system-ui, sans-serif; font-size: 11px; fill: var(--text-primary); }
+.rlbl { font-family: system-ui, sans-serif; font-size: 11px; fill: var(--text-secondary);
+        font-style: italic; }
+
+/* ── Combined fragment: alt (amber) ── */
+.frag-alt-bg  { fill: #FAEEDA; fill-opacity: 0.18; stroke: #854F0B; stroke-width: 1; }
+.frag-alt-tag { fill: #FAEEDA; stroke: #854F0B; stroke-width: 0.75; }
+.frag-alt-kw  { font-family: system-ui, sans-serif; font-size: 11px; font-weight: 700;
+                fill: #633806; }
+.frag-alt-div { stroke: #854F0B; stroke-width: 0.75; fill: none; }
+.guard-lbl    { font-family: system-ui, sans-serif; font-size: 10.5px; font-style: italic;
+                fill: #854F0B; }
+
+/* ── Combined fragment: par (teal) ── */
+.frag-par-bg  { fill: #E1F5EE; fill-opacity: 0.35; stroke: #0F6E56; stroke-width: 1; }
+.frag-par-tag { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 0.75; }
+.frag-par-kw  { font-family: system-ui, sans-serif; font-size: 11px; font-weight: 700;
+                fill: #085041; }
+.frag-par-div { stroke: #0F6E56; stroke-width: 0.75; stroke-dasharray: 5 3; fill: none; }
+
+/* ── Dark mode overrides ── */
+@media (prefers-color-scheme: dark) {
+  .actor       { fill: #2c2c2a; stroke: #b4b2a9; }
+  .actor-name  { fill: #e8e6de; }
+  .actor-colon { fill: #888780; }
+  .frag-alt-bg  { fill: #633806; fill-opacity: 0.25; stroke: #EF9F27; }
+  .frag-alt-tag { fill: #633806; stroke: #EF9F27; }
+  .frag-alt-kw  { fill: #FAC775; }
+  .frag-alt-div { stroke: #EF9F27; }
+  .guard-lbl    { fill: #EF9F27; }
+  .frag-par-bg  { fill: #085041; fill-opacity: 0.35; stroke: #5DCAA5; }
+  .frag-par-tag { fill: #085041; stroke: #5DCAA5; }
+  .frag-par-kw  { fill: #9FE1CB; }
+  .frag-par-div { stroke: #5DCAA5; }
+}
+```
+
+## Color Assignments
+
+| Element | Color | Reason |
+|---------|-------|--------|
+| Actor header boxes | Neutral (`var(--bg-secondary)`) | Structural / non-semantic — all lifelines share one style |
+| Activation bars | Neutral (`var(--bg-secondary)`) | Show execution periods without adding semantic color |
+| Synchronous call arrows | `var(--text-primary)` + filled triangle | High contrast for calls — the primary interaction direction |
+| Return / dashed arrows | `var(--text-secondary)` + open chevron | Lower contrast for returns — secondary flow direction |
+| `alt` fragment | Amber (`#FAEEDA` / `#854F0B`) | Warning / conditional — matches `c-amber` semantic meaning |
+| Guard condition text | Amber italic | Belongs visually to the alt fragment |
+| `par` fragment | Teal (`#E1F5EE` / `#0F6E56`) | Concurrent success path — matches `c-teal` semantic meaning |
+| Alt branch dividers | Amber solid line | Continuity with the alt frame color |
+| Par branch divider | Teal dashed line | UML spec: par branches separated by dashed lines |
+
+## Layout Notes
+
+- **ViewBox**: 680×648 (standard width; height = lifeline bottom y=590 + legend + annotation + 16px buffer)
+- **Lifeline spacing formula**: `(safe_area_width) / (n_lifelines − 1) = 600 / 5 = 120px` — but use `spacing = 100px` starting at `x=90` so that first box left = 40 and last box right = 640 exactly
+- **Actor box split-label trick**: Two separate `<text>` elements per box — one for `":"` (10px, tertiary color) and one for the class name (11.5px bold, primary color) — avoids the 14px font needing ~150px+ per box for long names like "OrderController"
+- **Pentagon tag formula**: For a fragment starting at `(fx, fy)`, the tag polygon points are `(fx,fy) (fx+w,fy) (fx+w+6,fy+6) (fx+w+6,fy+18) (fx,fy+18)` where `w` = approximate text width of the keyword + 8px padding each side
+- **Nested fragment inset**: The `par` rect uses `x = alt_x + 15` and `y = alt_y_current + 2` so both borders remain simultaneously visible — inset enough to separate visually, not so much that it wastes vertical space
+- **Activation bar placement**: `x = lifeline_cx − 4`, `width = 8` — centered on the lifeline and narrow enough not to obscure the dashed stem behind it
+- **Message label y-offset**: All labels are placed at `y = arrow_y − 5` to sit just above the arrow line; this applies to both left-going and right-going arrows since `text-anchor="middle"` handles horizontal centering automatically
+- **Return arrows entering activation bars**: End `x1/x2` at lifeline center (e.g. x=294 for OrderController) rather than the bar edge (x=286) — the small overlap is intentional and clarifies the target object
+- **Alt guard label placement**: Branch 1 guard goes at `y = frame_top + 13` to the right of the pentagon tag; subsequent branch guards go at `divider_y + 14` so they sit just inside the new branch
+- **Lifeline end cap pattern**: `<line x1="cx−7" y1="590" x2="cx+7" y2="590" stroke-width="1.5"/>` — a simple symmetric tick, no special marker needed
@@ -0,0 +1,173 @@
+# Smart City Infrastructure
+
+A multi-system integration diagram showing interconnected city infrastructure (power, water, transport) connected through a central IoT platform with a citizen dashboard on top. Demonstrates hub-spoke layout, diverse physical shapes, and UI mockups.
+
+## Key Patterns Used
+
+- **Hub-spoke layout**: Central IoT platform with radiating data connections to subsystems
+- **Connection dots**: Visual indicators where data lines attach to the central hub
+- **Dashboard/UI mockup**: Screen with mini-charts, gauges, and status indicators
+- **Multi-system integration**: Three independent systems unified by central platform
+- **Semantic line styles**: Different stroke styles for data (dashed), power, water, roads
+- **Physical infrastructure shapes**: Solar panels, wind turbines, dams, pipes, roads, vehicles
+
+## New Shape Techniques
+
+### Solar Panels (angled polygons with grid lines)
+```xml
+<polygon class="solar-panel" points="0,25 35,8 38,12 3,29"/>
+<line class="solar-frame" x1="12" y1="22" x2="24" y2="13"/>
+<line x1="19" y1="29" x2="19" y2="40" stroke="#5F5E5A" stroke-width="2"/>
+```
+
+### Wind Turbine (tower + nacelle + blades)
+```xml
+<!-- Tapered tower -->
+<polygon class="wind-tower" points="20,70 30,70 28,25 22,25"/>
+<!-- Nacelle -->
+<rect class="wind-hub" x="18" y="20" width="14" height="8" rx="2"/>
+<!-- Hub -->
+<circle class="wind-hub" cx="25" cy="18" r="5"/>
+<!-- Blades (rotated ellipses) -->
+<ellipse class="wind-blade" cx="25" cy="5" rx="3" ry="13"/>
+<ellipse class="wind-blade" cx="14" cy="26" rx="3" ry="13" transform="rotate(-120, 25, 18)"/>
+<ellipse class="wind-blade" cx="36" cy="26" rx="3" ry="13" transform="rotate(120, 25, 18)"/>
+```
+
+### Battery with Charge Level
+```xml
+<rect class="battery" x="0" y="0" width="45" height="65" rx="5"/>
+<!-- Terminals -->
+<rect x="10" y="-6" width="10" height="8" rx="2" fill="#27500A"/>
+<rect x="25" y="-6" width="10" height="8" rx="2" fill="#27500A"/>
+<!-- Charge level fill -->
+<rect class="battery-level" x="5" y="12" width="35" height="48" rx="3"/>
+<text x="22" y="42" text-anchor="middle" fill="#173404" style="font-size:10px">85%</text>
+```
+
+### Dam/Reservoir with Water Waves
+```xml
+<!-- Dam wall -->
+<polygon class="reservoir-wall" points="0,60 10,0 70,0 80,60"/>
+<!-- Water behind dam -->
+<polygon class="water" points="12,10 68,10 68,55 75,55 75,58 5,58 5,55 12,55"/>
+<!-- Wave effect -->
+<path d="M 15 25 Q 25 22 35 25 Q 45 28 55 25" fill="none" stroke="#378ADD" stroke-width="1" opacity="0.5"/>
+```
+
+### Pipe Network with Joints and Valves
+```xml
+<path class="pipe" d="M 80 85 L 110 85"/>
+<circle class="pipe-joint" cx="10" cy="30" r="8"/>
+<circle class="valve" cx="190" cy="85" r="6"/>
+<!-- Distribution branches -->
+<path class="pipe-thin" d="M 18 30 L 50 30"/>
+<path class="pipe-thin" d="M 10 22 L 10 5 L 50 5"/>
+```
+
+### Road Intersection with Lane Markings
+```xml
+<!-- Road surface -->
+<line class="road" x1="0" y1="50" x2="170" y2="50"/>
+<line class="road-mark" x1="10" y1="50" x2="160" y2="50"/>
+<!-- Cross road -->
+<line class="road" x1="85" y1="0" x2="85" y2="100"/>
+<line class="road-mark" x1="85" y1="10" x2="85" y2="90"/>
+<!-- Embedded sensors -->
+<circle class="sensor" cx="40" cy="50" r="5"/>
+```
+
+### Traffic Light with Signal States
+```xml
+<rect class="traffic-light" x="0" y="0" width="14" height="32" rx="3"/>
+<circle class="light-red" cx="7" cy="8" r="4"/>
+<circle class="light-off" cx="7" cy="16" r="4"/>
+<circle class="light-off" cx="7" cy="24" r="4"/>
+```
+
+### Bus with Windows and Wheels
+```xml
+<rect class="bus" x="0" y="0" width="55" height="28" rx="6"/>
+<!-- Windows -->
+<rect class="bus-window" x="5" y="5" width="12" height="12" rx="2"/>
+<rect class="bus-window" x="20" y="5" width="12" height="12" rx="2"/>
+<!-- Wheels with hubcaps -->
+<circle cx="14" cy="30" r="6" fill="#2C2C2A"/>
+<circle cx="14" cy="30" r="3" fill="#5F5E5A"/>
+```
+
+### Dashboard UI Mockup
+```xml
+<!-- Monitor frame -->
+<rect class="dashboard" x="0" y="0" width="200" height="120" rx="8"/>
+<!-- Screen -->
+<rect class="screen" x="10" y="10" width="180" height="85" rx="4"/>
+<!-- Mini bar chart -->
+<rect class="screen-content" x="18" y="18" width="50" height="35" rx="2"/>
+<rect class="screen-chart" x="22" y="38" width="8" height="12"/>
+<rect class="screen-chart" x="33" y="32" width="8" height="18"/>
+<!-- Gauge -->
+<circle class="screen-bar" cx="100" cy="35" r="12"/>
+<text x="100" y="39" text-anchor="middle" fill="#E8E6DE" style="font-size:8px">78%</text>
+<!-- Status indicators -->
+<circle cx="35" cy="74" r="6" fill="#97C459"/>
+<circle cx="75" cy="74" r="6" fill="#97C459"/>
+<circle cx="115" cy="74" r="6" fill="#EF9F27"/>
+```
+
+### Hexagonal IoT Hub with Connection Points
+```xml
+<!-- Outer hexagon -->
+<polygon class="iot-hex" points="0,-45 39,-22 39,22 0,45 -39,22 -39,-22"/>
+<!-- Inner hexagon -->
+<polygon class="iot-inner" points="0,-20 17,-10 17,10 0,20 -17,10 -17,-10"/>
+<!-- Connection dots on data lines -->
+<circle cx="321" cy="248" r="4" fill="#7F77DD"/>
+```
+
+## CSS Classes for Infrastructure
+
+```css
+/* Power system */
+.solar-panel { fill: #3C3489; stroke: #534AB7; stroke-width: 0.5; }
+.solar-frame { fill: none; stroke: #EEEDFE; stroke-width: 0.5; }
+.wind-tower { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1; }
+.wind-blade { fill: #F1EFE8; stroke: #888780; stroke-width: 0.5; }
+.battery { fill: #27500A; stroke: #3B6D11; stroke-width: 1.5; }
+.battery-level { fill: #97C459; }
+.power-line { stroke: #EF9F27; stroke-width: 2; fill: none; }
+
+/* Water system */
+.reservoir-wall { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1; }
+.water { fill: #85B7EB; stroke: #378ADD; stroke-width: 0.5; }
+.pipe { fill: none; stroke: #378ADD; stroke-width: 4; stroke-linecap: round; }
+.pipe-joint { fill: #185FA5; stroke: #0C447C; stroke-width: 1; }
+.valve { fill: #0C447C; stroke: #185FA5; stroke-width: 1; }
+
+/* Transport */
+.road { stroke: #888780; stroke-width: 8; fill: none; stroke-linecap: round; }
+.road-mark { stroke: #F1EFE8; stroke-width: 1; fill: none; stroke-dasharray: 6 4; }
+.traffic-light { fill: #444441; stroke: #2C2C2A; stroke-width: 0.5; }
+.light-red { fill: #E24B4A; }
+.light-green { fill: #97C459; }
+.light-off { fill: #2C2C2A; }
+.bus { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 1.5; }
+
+/* Data/IoT */
+.data-line { stroke: #7F77DD; stroke-width: 2; fill: none; stroke-dasharray: 4 3; }
+.iot-hex { fill: #EEEDFE; stroke: #534AB7; stroke-width: 2; }
+
+/* Dashboard */
+.dashboard { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1.5; }
+.screen { fill: #1a1a18; }
+.screen-chart { fill: #5DCAA5; }
+```
+
+## Layout Notes
+
+- **ViewBox**: 720×620 (wider for three-column system layout)
+- **Hub position**: Central IoT at (360, 270) - geometric center
+- **Data lines**: Use quadratic curves or L-shaped paths, add connection dots at hub attachment points
+- **System spacing**: ~200px width per system section
+- **Vertical layers**: Dashboard (top) → IoT Hub (middle) → Systems (bottom)
+- **Component grouping**: Use `<g transform="translate(x,y)">` for each major component for easy positioning
@@ -0,0 +1,154 @@
+# Smartphone Layer Anatomy
+
+An exploded view diagram showing all internal layers of a smartphone from front glass to back, with alternating left/right labels to avoid overlap. Demonstrates layered product teardown visualization and component detail.
+
+## Key Patterns Used
+
+- **Exploded vertical stack**: Layers separated vertically to show internal structure
+- **Alternating labels**: Left/right label placement prevents text overlap
+- **Component detail**: Chips, coils, lenses rendered with realistic shapes
+- **Thickness scale**: Measurement indicator on the side
+- **Progressive depth**: Each layer slightly offset to create 3D stack effect
+
+## New Shape Techniques
+
+### Capacitive Touch Grid
+```xml
+<rect class="digitizer" x="0" y="0" width="140" height="90" rx="14"/>
+<g transform="translate(8, 8)">
+  <!-- Horizontal lines -->
+  <line class="digitizer-grid" x1="0" y1="15" x2="124" y2="15"/>
+  <line class="digitizer-grid" x1="0" y1="37" x2="124" y2="37"/>
+  <!-- Vertical lines -->
+  <line class="digitizer-grid" x1="20" y1="0" x2="20" y2="74"/>
+  <line class="digitizer-grid" x1="50" y1="0" x2="50" y2="74"/>
+</g>
+<!-- Touch point indicator -->
+<circle cx="70" cy="45" r="12" fill="none" stroke="#7F77DD" stroke-width="2" opacity="0.6"/>
+<circle cx="70" cy="45" r="5" fill="#7F77DD" opacity="0.4"/>
+```
+
+### OLED RGB Subpixels
+```xml
+<rect class="oled-panel" x="0" y="0" width="140" height="90" rx="12"/>
+<g transform="translate(10, 10)">
+  <!-- RGB pixel group -->
+  <rect class="oled-subpixel-r" x="0" y="0" width="2" height="6"/>
+  <rect class="oled-subpixel-g" x="3" y="0" width="2" height="6"/>
+  <rect class="oled-subpixel-b" x="6" y="0" width="2" height="6"/>
+  <!-- Repeat pattern -->
+  <rect class="oled-subpixel-r" x="11" y="0" width="2" height="6"/>
+  <rect class="oled-subpixel-g" x="14" y="0" width="2" height="6"/>
+  <rect class="oled-subpixel-b" x="17" y="0" width="2" height="6"/>
+</g>
+```
+
+### Logic Board with Chips
+```xml
+<rect class="pcb" x="0" y="0" width="116" height="106" rx="3"/>
+<!-- PCB traces -->
+<path class="pcb-trace" d="M 8 50 L 30 50 L 30 35"/>
+
+<!-- CPU chip -->
+<rect class="chip-cpu" x="30" y="20" width="55" height="35" rx="3"/>
+<text class="chip-label" x="57" y="35" text-anchor="middle">A17 Pro</text>
+
+<!-- RAM chip -->
+<rect class="chip-ram" x="30" y="62" width="35" height="18" rx="2"/>
+<text class="chip-label" x="47" y="74" text-anchor="middle">8GB RAM</text>
+
+<!-- Storage chip -->
+<rect class="chip-storage" x="30" y="85" width="55" height="16" rx="2"/>
+<text class="chip-label" x="57" y="96" text-anchor="middle">256GB NAND</text>
+```
+
+### Camera Lens Array
+```xml
+<!-- Main camera -->
+<circle class="camera-lens" cx="20" cy="20" r="18"/>
+<circle class="camera-lens-inner" cx="20" cy="20" r="13"/>
+<circle class="camera-sensor" cx="20" cy="20" r="8"/>
+<circle cx="20" cy="20" r="3" fill="#1a1a18"/>
+
+<!-- Secondary camera (smaller) -->
+<circle class="camera-lens" cx="15" cy="15" r="13"/>
+<circle class="camera-lens-inner" cx="15" cy="15" r="9"/>
+<circle class="camera-sensor" cx="15" cy="15" r="5"/>
+```
+
+### Wireless Charging Coil with Magnets
+```xml
+<!-- Concentric coil rings -->
+<circle class="charging-coil-outer" cx="0" cy="0" r="30"/>
+<circle class="charging-coil" cx="0" cy="0" r="23"/>
+<circle class="charging-coil" cx="0" cy="0" r="16"/>
+<circle class="charging-coil" cx="0" cy="0" r="9"/>
+
+<!-- MagSafe magnet ring -->
+<circle class="magnet" cx="0" cy="-35" r="3"/>
+<circle class="magnet" cx="25" cy="-25" r="3"/>
+<circle class="magnet" cx="35" cy="0" r="3"/>
+<circle class="magnet" cx="25" cy="25" r="3"/>
+<!-- ... continue around circle -->
+```
+
+### Battery Cell
+```xml
+<rect class="battery" x="0" y="0" width="140" height="90" rx="10"/>
+<rect class="battery-cell" x="10" y="12" width="120" height="60" rx="6"/>
+
+<text x="70" y="38" text-anchor="middle" fill="#27500A" style="font-size:9px">Li-Ion Polymer</text>
+<text x="70" y="52" text-anchor="middle" fill="#27500A" style="font-size:12px; font-weight:bold">4422 mAh</text>
+
+<rect class="battery-connector" x="55" y="75" width="30" height="10" rx="2"/>
+```
+
+## CSS Classes
+
+```css
+/* Glass */
+.front-glass { fill: #E8E6DE; stroke: #888780; stroke-width: 1; opacity: 0.9; }
+.back-glass { fill: #2C2C2A; stroke: #444441; stroke-width: 1; }
+
+/* Touch digitizer */
+.digitizer { fill: #EEEDFE; stroke: #534AB7; stroke-width: 1; }
+.digitizer-grid { stroke: #AFA9EC; stroke-width: 0.3; fill: none; }
+
+/* OLED */
+.oled-panel { fill: #1a1a18; stroke: #444441; stroke-width: 1; }
+.oled-subpixel-r { fill: #E24B4A; }
+.oled-subpixel-g { fill: #97C459; }
+.oled-subpixel-b { fill: #378ADD; }
+
+/* Midframe */
+.midframe { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1.5; }
+
+/* Logic board */
+.pcb { fill: #0F6E56; stroke: #085041; stroke-width: 1; }
+.pcb-trace { stroke: #5DCAA5; stroke-width: 0.3; fill: none; }
+.chip-cpu { fill: #3C3489; stroke: #534AB7; stroke-width: 0.5; }
+.chip-ram { fill: #185FA5; stroke: #378ADD; stroke-width: 0.5; }
+.chip-storage { fill: #27500A; stroke: #3B6D11; stroke-width: 0.5; }
+
+/* Battery */
+.battery { fill: #EAF3DE; stroke: #3B6D11; stroke-width: 1.5; }
+.battery-cell { fill: #97C459; stroke: #639922; stroke-width: 0.5; }
+
+/* Camera */
+.camera-lens { fill: #0C447C; stroke: #185FA5; stroke-width: 0.5; }
+.camera-lens-inner { fill: #1a1a18; stroke: #378ADD; stroke-width: 0.3; }
+.camera-sensor { fill: #3C3489; stroke: #534AB7; stroke-width: 0.3; }
+
+/* Wireless charging */
+.charging-coil { fill: none; stroke: #EF9F27; stroke-width: 1.5; }
+.magnet { fill: #5F5E5A; stroke: #444441; stroke-width: 0.5; }
+```
+
+## Layout Notes
+
+- **ViewBox**: 900×780 (tall for vertical stack)
+- **Layer offset**: Each layer offset 10px right and down for depth effect
+- **Label alternation**: Odd layers → RIGHT labels, Even layers → LEFT labels
+- **Thickness scale**: Vertical measurement bar on left side
+- **Front/Back markers**: Text labels at top and bottom
+- **Chip labels**: Use small white text (6px) directly on chip shapes
@@ -0,0 +1,247 @@
+# SN2 Reaction Mechanism
+
+A chemistry diagram showing the bimolecular nucleophilic substitution (SN2) mechanism between hydroxide ion and methyl bromide. Demonstrates molecular structure rendering, electron movement arrows, transition state notation, and reaction energy profiles.
+
+## Key Patterns Used
+
+- **Molecular structures**: Ball-and-stick style atoms with bonds
+- **Electron movement**: Curved arrows showing nucleophilic attack
+- **Transition state**: Bracketed pentacoordinate intermediate with partial charges
+- **Stereochemistry**: Wedge/dash bonds showing 3D configuration
+- **Energy profile**: Potential energy vs reaction coordinate plot
+- **Annotation boxes**: Key features and mechanistic notes
+
+## Diagram Type
+
+This is a **chemistry mechanism diagram** with:
+- **Molecular rendering**: Atoms as colored circles with element symbols
+- **Bond notation**: Solid, wedge, dash, and partial (dashed) bonds
+- **Reaction arrows**: Curved for electron movement, straight for reaction progress
+- **Energy landscape**: Quantitative energy profile below mechanism
+
+## Molecular Structure Elements
+
+### Atom Rendering
+
+```xml
+<!-- Carbon atom (dark) -->
+<circle cx="0" cy="0" r="14" class="carbon"/>
+<text class="chem" x="0" y="5" text-anchor="middle" fill="white" font-weight="500">C</text>
+
+<!-- Oxygen atom (red) -->
+<circle cx="0" cy="0" r="14" class="oxygen"/>
+<text class="chem" x="0" y="5" text-anchor="middle" fill="white" font-weight="500">O</text>
+
+<!-- Hydrogen atom (light with border) -->
+<circle cx="38" cy="0" r="8" class="hydrogen"/>
+<text class="chem-sm" x="38" y="4" text-anchor="middle">H</text>
+
+<!-- Bromine atom (brown) -->
+<circle cx="52" cy="0" r="16" class="bromine"/>
+<text class="chem" x="52" y="5" text-anchor="middle" fill="white" font-weight="500">Br</text>
+```
+
+```css
+.carbon { fill: #2C2C2A; }
+.hydrogen { fill: #F1EFE8; stroke: #888780; stroke-width: 1; }
+.oxygen { fill: #E24B4A; }
+.bromine { fill: #993C1D; }
+.nitrogen { fill: #378ADD; }  /* for other reactions */
+```
+
+### Bond Types
+
+```xml
+<!-- Single bond (solid) -->
+<line x1="14" y1="0" x2="38" y2="0" class="bond"/>
+
+<!-- Wedge bond (coming toward viewer) -->
+<polygon class="bond-wedge" points="0,-14 -6,-35 6,-35"/>
+
+<!-- Dash bond (going away from viewer) -->
+<line x1="-10" y1="10" x2="-28" y2="28" class="bond-dash"/>
+
+<!-- Partial bond (forming/breaking) -->
+<line x1="-40" y1="0" x2="-14" y2="0" class="bond-partial"/>
+```
+
+```css
+.bond { stroke: var(--text-primary); stroke-width: 2.5; fill: none; stroke-linecap: round; }
+.bond-thin { stroke: var(--text-primary); stroke-width: 1.5; fill: none; }
+.bond-partial { stroke: var(--text-primary); stroke-width: 2; fill: none; stroke-dasharray: 4 3; }
+.bond-wedge { fill: var(--text-primary); stroke: none; }
+.bond-dash { stroke: var(--text-primary); stroke-width: 2; fill: none; stroke-dasharray: 2 2; }
+```
+
+### Lone Pairs and Charges
+
+```xml
+<!-- Lone pair electrons (dots) -->
+<circle cx="-8" cy="-18" r="2" fill="var(--text-primary)"/>
+<circle cx="0" cy="-18" r="2" fill="var(--text-primary)"/>
+
+<!-- Formal negative charge -->
+<text class="charge" x="12" y="-12" fill="#A32D2D" font-weight="bold">⊖</text>
+
+<!-- Partial charges (delta notation) -->
+<text class="partial" x="0" y="-18" text-anchor="middle" fill="#A32D2D">δ⁻</text>
+<text class="partial" x="0" y="-22" text-anchor="middle" fill="#3B6D11">δ⁺</text>
+```
+
+```css
+.charge { font-family: "Times New Roman", Georgia, serif; font-size: 12px; }
+.partial { font-family: "Times New Roman", Georgia, serif; font-size: 11px; font-style: italic; }
+```
+
+### Curved Arrow (Electron Movement)
+
+```xml
+<defs>
+  <marker id="curved-arrow" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="6" markerHeight="6" orient="auto">
+    <path d="M0,0 L10,5 L0,10 L3,5 Z" class="arrow-fill"/>
+  </marker>
+</defs>
+
+<!-- Nucleophilic attack arrow -->
+<path d="M -5,15 Q 30,60 70,25" class="arrow-curved" marker-end="url(#curved-arrow)"/>
+```
+
+```css
+.arrow-curved { stroke: #534AB7; stroke-width: 2; fill: none; }
+.arrow-fill { fill: #534AB7; }
+```
+
+### Transition State Brackets
+
+```xml
+<!-- Left bracket -->
+<path d="M -75,-70 L -85,-70 L -85,75 L -75,75" class="ts-bracket"/>
+
+<!-- Right bracket -->
+<path d="M 95,-70 L 105,-70 L 105,75 L 95,75" class="ts-bracket"/>
+
+<!-- Double dagger symbol -->
+<text class="chem" x="115" y="-60" fill="var(--text-primary)">‡</text>
+```
+
+```css
+.ts-bracket { stroke: var(--text-primary); stroke-width: 1.5; fill: none; }
+```
+
+## Energy Profile Diagram
+
+### Axes
+
+```xml
+<!-- Y-axis (Energy) -->
+<line x1="0" y1="280" x2="0" y2="0" class="axis" marker-end="url(#straight-arrow)"/>
+<text class="t" x="-15" y="-10" text-anchor="middle" transform="rotate(-90 -15 140)">Potential Energy</text>
+
+<!-- X-axis (Reaction Coordinate) -->
+<line x1="0" y1="280" x2="600" y2="280" class="axis" marker-end="url(#straight-arrow)"/>
+<text class="t" x="580" y="305" text-anchor="middle">Reaction Coordinate</text>
+```
+
+### Energy Curve
+
+```xml
+<!-- Filled area under curve -->
+<path class="energy-fill" d="
+  M 40,200 
+  Q 150,200 250,50 
+  Q 350,200 500,220 
+  L 500,280 L 40,280 Z
+"/>
+
+<!-- Curve line -->
+<path class="energy-curve" d="
+  M 40,200 
+  Q 100,200 150,150
+  Q 200,80 250,50 
+  Q 300,80 350,150
+  Q 400,210 500,220
+"/>
+```
+
+```css
+.energy-curve { stroke: #534AB7; stroke-width: 2.5; fill: none; }
+.energy-fill { fill: rgba(83, 74, 183, 0.1); }
+```
+
+### Energy Levels and Annotations
+
+```xml
+<!-- Reactants level -->
+<line x1="20" y1="200" x2="80" y2="200" stroke="#3B6D11" stroke-width="2"/>
+<text class="ts" x="50" y="218" text-anchor="middle">Reactants</text>
+
+<!-- Transition state peak -->
+<circle cx="250" cy="50" r="5" fill="#534AB7"/>
+<line x1="250" y1="50" x2="250" y2="280" class="energy-level"/>
+<text class="ts" x="250" y="30" text-anchor="middle" fill="#534AB7" font-weight="500">Transition State [‡]</text>
+
+<!-- Products level (lower = exergonic) -->
+<line x1="470" y1="220" x2="530" y2="220" stroke="#3B6D11" stroke-width="2"/>
+
+<!-- Activation energy arrow -->
+<line x1="100" y1="200" x2="100" y2="55" class="delta-arrow" marker-end="url(#delta-arrow)"/>
+<text class="ts" x="85" y="125" text-anchor="end" fill="#3B6D11">E<tspan baseline-shift="sub" font-size="8">a</tspan></text>
+```
+
+```css
+.energy-level { stroke: var(--text-secondary); stroke-width: 1; stroke-dasharray: 4 2; fill: none; }
+.delta-arrow { stroke: #3B6D11; stroke-width: 1.5; fill: none; }
+.delta-fill { fill: #3B6D11; }
+```
+
+## Chemistry Text Styles
+
+```css
+/* Chemistry notation (serif font for formulas) */
+.chem { font-family: "Times New Roman", Georgia, serif; font-size: 16px; fill: var(--text-primary); }
+.chem-sm { font-family: "Times New Roman", Georgia, serif; font-size: 12px; fill: var(--text-primary); }
+.chem-lg { font-family: "Times New Roman", Georgia, serif; font-size: 18px; fill: var(--text-primary); }
+```
+
+## Subscript/Superscript in SVG
+
+```xml
+<!-- Subscript using tspan -->
+<text class="ts">E<tspan baseline-shift="sub" font-size="8">a</tspan></text>
+
+<!-- Superscript for charges -->
+<text class="chem-sm">OH⁻</text>  <!-- Using Unicode superscript minus -->
+<text class="chem-sm">CH₃Br</text>  <!-- Using Unicode subscript 3 -->
+```
+
+## Color Coding
+
+| Element | Color | Hex |
+|---------|-------|-----|
+| Carbon | Dark gray | #2C2C2A |
+| Hydrogen | Light cream | #F1EFE8 |
+| Oxygen | Red | #E24B4A |
+| Bromine | Brown | #993C1D |
+| Nitrogen | Blue | #378ADD |
+| Electron arrows | Purple | #534AB7 |
+| Positive charge | Green | #3B6D11 |
+| Negative charge | Red | #A32D2D |
+
+## Layout Notes
+
+- **ViewBox**: 800×680 (landscape for mechanism + energy profile)
+- **Mechanism section**: y=60-300, showing reactants → TS → products
+- **Energy profile**: y=320-630, with axes and curve
+- **Atom sizes**: C/O/Br ~12-16px radius, H ~7-8px radius
+- **Bond lengths**: ~25-40px between atom centers
+- **Spacing**: ~140px between mechanism stages
+
+## When to Use This Pattern
+
+Use this diagram style for:
+- Organic reaction mechanisms (SN1, SN2, E1, E2, additions, eliminations)
+- Reaction energy profiles and kinetics
+- Stereochemistry illustrations
+- Enzyme mechanism diagrams
+- Transition state theory visualization
+- Any chemistry concept requiring molecular structures
@@ -0,0 +1,338 @@
+# Modern Onshore Wind Turbine Structure
+
+A physical/structural cross-section diagram showing all major components of a modern wind turbine from underground foundation to blade tips.
+
+## Key Patterns Used
+
+- **Underground section**: Soil layers, deep concrete foundation with rebar reinforcement grid, spread footing
+- **Cross-section view**: Tower wall thickness shown, internal components visible
+- **Tapered tower**: Path elements creating realistic tower silhouette that narrows toward top
+- **Internal access**: Ladder with rungs, elevator shaft inside tower
+- **Cable routing**: Power cables running from nacelle down through tower to transformer
+- **Nacelle cutaway**: Gearbox, generator, brake, yaw system all visible inside housing
+- **Rotor assembly**: Hub with pitch motors at blade roots, three composite blades with gradient fill
+- **Ground level marker**: Clear separation between above/below ground
+- **Component color coding**: Each system type has distinct color (blue=generator, gold=gearbox, red=brake, green=yaw, purple=pitch)
+- **Legend bar**: Quick reference for color meanings
+
+## Diagram
+
+```xml
+<svg width="100%" viewBox="0 0 680 920" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="arrow" viewBox="0 0 10 10" refX="8" refY="5"
+            markerWidth="6" markerHeight="6" orient="auto-start-reverse">
+      <path d="M2 1L8 5L2 9" fill="none" stroke="context-stroke"
+            stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+    </marker>
+    <!-- Blade gradient for 3D effect -->
+    <linearGradient id="bladeGrad" x1="0%" y1="0%" x2="100%" y2="0%">
+      <stop offset="0%" style="stop-color:#D3D1C7"/>
+      <stop offset="50%" style="stop-color:#F1EFE8"/>
+      <stop offset="100%" style="stop-color:#B4B2A9"/>
+    </linearGradient>
+  </defs>
+
+  <!-- ===== GROUND LEVEL LINE ===== -->
+  <line x1="40" y1="680" x2="640" y2="680" stroke="#3B6D11" stroke-width="2"/>
+  <text class="tl" x="45" y="675">Ground level</text>
+
+  <!-- ===== UNDERGROUND: FOUNDATION ===== -->
+  
+  <!-- Soil layers -->
+  <rect x="120" y="680" width="300" height="180" class="soil"/>
+  <rect x="120" y="780" width="300" height="80" class="soil-dark"/>
+  
+  <!-- Deep concrete foundation -->
+  <path d="M170 680 L170 820 L200 850 L340 850 L370 820 L370 680 Z" class="concrete"/>
+  <!-- Foundation base spread -->
+  <path d="M140 820 L170 820 L200 850 L340 850 L370 820 L400 820 L400 860 L140 860 Z" class="concrete-dark"/>
+  
+  <!-- Rebar reinforcement -->
+  <g class="rebar">
+    <line x1="185" y1="700" x2="185" y2="840"/>
+    <line x1="210" y1="700" x2="210" y2="845"/>
+    <line x1="235" y1="700" x2="235" y2="848"/>
+    <line x1="260" y1="700" x2="260" y2="848"/>
+    <line x1="285" y1="700" x2="285" y2="848"/>
+    <line x1="310" y1="700" x2="310" y2="845"/>
+    <line x1="335" y1="700" x2="335" y2="840"/>
+    <!-- Horizontal rebar -->
+    <line x1="175" y1="720" x2="365" y2="720"/>
+    <line x1="175" y1="760" x2="365" y2="760"/>
+    <line x1="175" y1="800" x2="365" y2="800"/>
+    <line x1="155" y1="835" x2="385" y2="835"/>
+  </g>
+  
+  <!-- Foundation labels -->
+  <line x1="410" y1="770" x2="480" y2="770" class="leader"/>
+  <text class="ts" x="485" y="766">Deep concrete foundation</text>
+  <text class="tl" x="485" y="778">Reinforced with steel rebar</text>
+  <text class="tl" x="485" y="790">15-25m deep typical</text>
+  
+  <line x1="400" y1="850" x2="480" y2="870" class="leader"/>
+  <text class="ts" x="485" y="866">Foundation spread footing</text>
+  <text class="tl" x="485" y="878">Distributes load to soil</text>
+
+  <!-- ===== TOWER BASE ===== -->
+  
+  <!-- Tower base flange -->
+  <ellipse cx="270" cy="680" rx="70" ry="12" class="concrete-dark"/>
+  <rect x="200" y="668" width="140" height="12" class="tower"/>
+  
+  <!-- Transformer at base -->
+  <g transform="translate(470, 640)">
+    <rect x="0" y="0" width="50" height="40" rx="3" class="transformer"/>
+    <!-- Cooling fins -->
+    <rect x="52" y="5" width="4" height="30" class="transformer-fin"/>
+    <rect x="58" y="5" width="4" height="30" class="transformer-fin"/>
+    <rect x="64" y="5" width="4" height="30" class="transformer-fin"/>
+    <!-- Connection box -->
+    <rect x="10" y="-8" width="30" height="10" rx="2" class="transformer-fin"/>
+  </g>
+  <line x1="470" y1="660" x2="430" y2="640" class="leader"/>
+  <text class="ts" x="385" y="636" text-anchor="end">Transformer</text>
+  <text class="tl" x="385" y="648" text-anchor="end">Steps up voltage for grid</text>
+
+  <!-- ===== TUBULAR STEEL TOWER ===== -->
+  
+  <!-- Tower outer shell (tapered) -->
+  <path d="M200 680 L220 200 L320 200 L340 680 Z" class="tower"/>
+  
+  <!-- Tower inner surface (cutaway) -->
+  <path d="M215 680 L232 210 L308 210 L325 680 Z" class="tower-inner"/>
+  
+  <!-- Tower section joints -->
+  <line x1="205" y1="550" x2="335" y2="550" class="tower-section"/>
+  <line x1="210" y1="420" x2="330" y2="420" class="tower-section"/>
+  <line x1="215" y1="300" x2="325" y2="300" class="tower-section"/>
+  
+  <!-- Internal ladder (left side) -->
+  <g transform="translate(225, 220)">
+    <!-- Ladder rails -->
+    <line x1="0" y1="0" x2="8" y2="450" class="ladder"/>
+    <line x1="15" y1="0" x2="23" y2="450" class="ladder"/>
+    <!-- Rungs -->
+    <g class="ladder-rung">
+      <line x1="1" y1="20" x2="22" y2="21"/>
+      <line x1="1" y1="50" x2="22" y2="52"/>
+      <line x1="2" y1="80" x2="22" y2="83"/>
+      <line x1="2" y1="110" x2="23" y2="114"/>
+      <line x1="2" y1="140" x2="23" y2="145"/>
+      <line x1="3" y1="170" x2="23" y2="176"/>
+      <line x1="3" y1="200" x2="24" y2="207"/>
+      <line x1="3" y1="230" x2="24" y2="238"/>
+      <line x1="4" y1="260" x2="24" y2="269"/>
+      <line x1="4" y1="290" x2="25" y2="300"/>
+      <line x1="4" y1="320" x2="25" y2="331"/>
+      <line x1="5" y1="350" x2="25" y2="362"/>
+      <line x1="5" y1="380" x2="26" y2="393"/>
+      <line x1="6" y1="410" x2="26" y2="424"/>
+      <line x1="6" y1="440" x2="27" y2="455"/>
+    </g>
+  </g>
+  
+  <!-- Elevator shaft (right side) -->
+  <rect x="280" y="230" width="25" height="430" rx="2" class="elevator"/>
+  <text class="tl" x="292" y="450" text-anchor="middle" transform="rotate(-90, 292, 450)" fill="#185FA5">ELEVATOR</text>
+  
+  <!-- Electrical cables running down -->
+  <path d="M270 220 C270 300 268 400 268 500 C268 600 268 650 310 665 L470 665" class="cable"/>
+  <path d="M260 225 C258 350 256 500 256 600 C256 650 256 670 256 680" class="cable-thin"/>
+  
+  <!-- Tower labels -->
+  <line x1="340" y1="350" x2="400" y2="320" class="leader"/>
+  <text class="ts" x="405" y="316">Tubular steel tower</text>
+  <text class="tl" x="405" y="328">80-120m height typical</text>
+  <text class="tl" x="405" y="340">Tapered for strength</text>
+  
+  <line x1="248" y1="400" x2="130" y2="380" class="leader"/>
+  <text class="ts" x="125" y="376" text-anchor="end">Internal ladder</text>
+  <text class="tl" x="125" y="388" text-anchor="end">Service access</text>
+  
+  <line x1="305" y1="500" x2="400" y2="520" class="leader"/>
+  <text class="ts" x="405" y="516">Service elevator</text>
+  
+  <line x1="268" y1="580" x2="130" y2="600" class="leader"/>
+  <text class="ts" x="125" y="596" text-anchor="end">Power cables</text>
+  <text class="tl" x="125" y="608" text-anchor="end">To transformer</text>
+
+  <!-- ===== NACELLE ===== -->
+  
+  <g transform="translate(270, 160)">
+    <!-- Nacelle base/bedplate -->
+    <rect x="-60" y="30" width="120" height="15" class="nacelle"/>
+    
+    <!-- Yaw bearing -->
+    <ellipse cx="0" cy="42" rx="35" ry="6" class="bearing"/>
+    
+    <!-- Yaw motors -->
+    <rect x="-55" y="32" width="12" height="18" rx="2" class="yaw"/>
+    <rect x="43" y="32" width="12" height="18" rx="2" class="yaw"/>
+    
+    <!-- Nacelle housing -->
+    <path d="M-65 30 L-70 -10 L-65 -35 L70 -35 L85 -10 L85 30 Z" class="nacelle-cover"/>
+    
+    <!-- Main shaft -->
+    <rect x="-90" y="-8" width="35" height="16" rx="2" fill="#888780" stroke="#5F5E5A" stroke-width="0.5"/>
+    
+    <!-- Gearbox -->
+    <rect x="-55" y="-25" width="40" height="45" rx="3" class="gearbox"/>
+    <text class="tl" x="-35" y="5" text-anchor="middle" fill="#633806">GEAR</text>
+    
+    <!-- Generator -->
+    <rect x="-10" y="-20" width="50" height="38" rx="4" class="generator"/>
+    <ellipse cx="15" cy="0" rx="15" ry="15" fill="none" stroke="#0C447C" stroke-width="1"/>
+    <text class="tl" x="15" y="4" text-anchor="middle" fill="#E6F1FB">GEN</text>
+    
+    <!-- Brake disc -->
+    <rect x="45" y="-12" width="8" height="24" rx="1" class="brake"/>
+    
+    <!-- Electrical cabinet -->
+    <rect x="58" y="-25" width="20" height="35" rx="2" fill="#5F5E5A" stroke="#444441" stroke-width="0.5"/>
+    
+    <!-- Anemometer on top -->
+    <line x1="60" y1="-35" x2="60" y2="-50" stroke="#5F5E5A" stroke-width="1"/>
+    <ellipse cx="60" cy="-52" rx="8" ry="3" fill="#D3D1C7" stroke="#888780" stroke-width="0.5"/>
+  </g>
+  
+  <!-- Nacelle labels -->
+  <line x1="215" y1="135" x2="130" y2="115" class="leader"/>
+  <text class="ts" x="125" y="111" text-anchor="end">Gearbox</text>
+  <text class="tl" x="125" y="123" text-anchor="end">Speed multiplier</text>
+  
+  <line x1="285" y1="145" x2="400" y2="125" class="leader"/>
+  <text class="ts" x="405" y="121">Generator</text>
+  <text class="tl" x="405" y="133">Converts rotation to electricity</text>
+  
+  <line x1="315" y1="155" x2="400" y2="165" class="leader"/>
+  <text class="ts" x="405" y="161">Brake system</text>
+  
+  <line x1="215" y1="200" x2="130" y2="220" class="leader"/>
+  <text class="ts" x="125" y="216" text-anchor="end">Yaw motors</text>
+  <text class="tl" x="125" y="228" text-anchor="end">Rotate nacelle to face wind</text>
+  
+  <line x1="330" y1="108" x2="400" y2="90" class="leader"/>
+  <text class="ts" x="405" y="86">Anemometer</text>
+  <text class="tl" x="405" y="98">Wind speed sensor</text>
+
+  <!-- ===== ROTOR HUB & BLADES ===== -->
+  
+  <!-- Hub -->
+  <g transform="translate(180, 152)">
+    <!-- Hub body -->
+    <ellipse cx="0" cy="0" rx="25" ry="30" class="hub"/>
+    <!-- Hub nose cone -->
+    <path d="M-25 -20 Q-50 0 -25 20 Q-30 0 -25 -20" class="hub-cap"/>
+    
+    <!-- Blade roots with pitch motors -->
+    <!-- Blade 1 (up) -->
+    <g transform="translate(-10, -25) rotate(-80)">
+      <ellipse cx="0" cy="0" rx="12" ry="8" class="blade-root"/>
+      <rect x="-8" y="-5" width="10" height="10" rx="2" class="pitch-motor"/>
+    </g>
+    
+    <!-- Blade 2 (lower left) -->
+    <g transform="translate(-18, 18) rotate(40)">
+      <ellipse cx="0" cy="0" rx="12" ry="8" class="blade-root"/>
+      <rect x="-8" y="-5" width="10" height="10" rx="2" class="pitch-motor"/>
+    </g>
+    
+    <!-- Blade 3 (lower right) -->
+    <g transform="translate(5, 22) rotate(160)">
+      <ellipse cx="0" cy="0" rx="12" ry="8" class="blade-root"/>
+      <rect x="-8" y="-5" width="10" height="10" rx="2" class="pitch-motor"/>
+    </g>
+  </g>
+  
+  <!-- Blade 1 (pointing up-left) -->
+  <path d="M165 125 Q140 80 130 40 Q125 20 115 15 Q110 18 112 25 Q115 50 125 90 Q140 120 158 128 Z" class="blade" fill="url(#bladeGrad)"/>
+  
+  <!-- Blade 2 (pointing down-left) -->
+  <path d="M158 175 Q120 200 80 230 Q60 245 55 255 Q60 258 68 252 Q95 235 130 210 Q155 190 163 178 Z" class="blade" fill="url(#bladeGrad)"/>
+  
+  <!-- Blade 3 (pointing down-right, partially visible) -->
+  <path d="M188 175 Q195 200 205 230 Q210 250 215 255 Q220 252 218 245 Q212 220 202 195 Q192 175 186 172 Z" class="blade" fill="url(#bladeGrad)"/>
+  
+  <!-- Blade labels -->
+  <line x1="115" y1="35" x2="60" y2="35" class="leader"/>
+  <text class="ts" x="55" y="31" text-anchor="end">Composite blade</text>
+  <text class="tl" x="55" y="43" text-anchor="end">Fiberglass/carbon fiber</text>
+  <text class="tl" x="55" y="55" text-anchor="end">40-80m length each</text>
+  
+  <line x1="170" y1="130" x2="130" y2="155" class="leader"/>
+  <text class="ts" x="85" y="151" text-anchor="end">Pitch motor</text>
+  <text class="tl" x="85" y="163" text-anchor="end">Adjusts blade angle</text>
+  
+  <line x1="180" y1="152" x2="130" y2="180" class="leader"/>
+  <text class="ts" x="85" y="183" text-anchor="end">Rotor hub</text>
+
+  <!-- ===== LEGEND ===== -->
+  <g transform="translate(40, 895)">
+    <rect x="0" y="-15" width="600" height="30" rx="4" fill="none" stroke="#D3D1C7" stroke-width="0.5"/>
+    
+    <rect x="15" y="-5" width="12" height="12" rx="2" class="generator"/>
+    <text class="tl" x="32" y="5">Generator</text>
+    
+    <rect x="95" y="-5" width="12" height="12" rx="2" class="gearbox"/>
+    <text class="tl" x="112" y="5">Gearbox</text>
+    
+    <rect x="170" y="-5" width="12" height="12" rx="2" class="brake"/>
+    <text class="tl" x="187" y="5">Brake</text>
+    
+    <rect x="230" y="-5" width="12" height="12" rx="2" class="yaw"/>
+    <text class="tl" x="247" y="5">Yaw system</text>
+    
+    <rect x="320" y="-5" width="12" height="12" rx="2" class="pitch-motor"/>
+    <text class="tl" x="337" y="5">Pitch motor</text>
+    
+    <line x1="415" y1="1" x2="435" y2="1" class="cable" style="stroke-width:2"/>
+    <text class="tl" x="440" y="5">Power cable</text>
+    
+    <rect x="515" y="-5" width="12" height="12" rx="2" class="transformer"/>
+    <text class="tl" x="532" y="5">Transformer</text>
+  </g>
+
+</svg>
+```
+
+## CSS Classes
+
+```css
+/* Foundation */
+.concrete { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1; }
+.concrete-dark { fill: #888780; stroke: #5F5E5A; stroke-width: 1; }
+.rebar { stroke: #854F0B; stroke-width: 1.5; fill: none; }
+.soil { fill: #8B7355; stroke: #5F5E5A; stroke-width: 0.5; }
+.soil-dark { fill: #6B5344; }
+
+/* Tower */
+.tower { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1; }
+.tower-inner { fill: #D3D1C7; stroke: #888780; stroke-width: 0.5; }
+.tower-section { stroke: #888780; stroke-width: 0.5; stroke-dasharray: 2 4; }
+.ladder { stroke: #5F5E5A; stroke-width: 1; fill: none; }
+.ladder-rung { stroke: #888780; stroke-width: 0.8; }
+.elevator { fill: #E6F1FB; stroke: #185FA5; stroke-width: 0.5; }
+.cable { stroke: #E24B4A; stroke-width: 2; fill: none; }
+.cable-thin { stroke: #E24B4A; stroke-width: 1.5; fill: none; }
+
+/* Nacelle */
+.nacelle { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1; }
+.nacelle-cover { fill: #D3D1C7; stroke: #5F5E5A; stroke-width: 1; }
+.gearbox { fill: #BA7517; stroke: #633806; stroke-width: 0.5; }
+.generator { fill: #378ADD; stroke: #0C447C; stroke-width: 0.5; }
+.brake { fill: #E24B4A; stroke: #791F1F; stroke-width: 0.5; }
+.yaw { fill: #5DCAA5; stroke: #085041; stroke-width: 0.5; }
+.bearing { fill: #444441; stroke: #2C2C2A; stroke-width: 0.5; }
+
+/* Rotor */
+.hub { fill: #D3D1C7; stroke: #5F5E5A; stroke-width: 1; }
+.hub-cap { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1; }
+.blade { fill: #F1EFE8; stroke: #888780; stroke-width: 1; }
+.blade-root { fill: #D3D1C7; stroke: #5F5E5A; stroke-width: 0.5; }
+.pitch-motor { fill: #7F77DD; stroke: #3C3489; stroke-width: 0.5; }
+
+/* Transformer */
+.transformer { fill: #27500A; stroke: #173404; stroke-width: 1; }
+.transformer-fin { fill: #3B6D11; stroke: #27500A; stroke-width: 0.5; }
+```
@@ -0,0 +1,43 @@
+# Dashboard Patterns
+
+Building blocks for UI/dashboard mockups inside a concept diagram — admin panels, monitoring dashboards, control interfaces, status displays.
+
+## Pattern
+
+A "screen" is a rounded dark rect inside a lighter "frame" rect, with chart/gauge/indicator elements nested on top.
+
+```xml
+<!-- Monitor frame -->
+<rect class="dashboard" x="0" y="0" width="200" height="120" rx="8"/>
+<!-- Screen -->
+<rect class="screen" x="10" y="10" width="180" height="85" rx="4"/>
+<!-- Mini bar chart -->
+<rect class="screen-content" x="18" y="18" width="50" height="35" rx="2"/>
+<rect class="screen-chart" x="22" y="38" width="8" height="12"/>
+<rect class="screen-chart" x="33" y="32" width="8" height="18"/>
+<!-- Gauge -->
+<circle class="screen-bar" cx="100" cy="35" r="12"/>
+<text x="100" y="39" text-anchor="middle" fill="#E8E6DE" style="font-size:8px">78%</text>
+<!-- Status indicators -->
+<circle cx="35" cy="74" r="6" fill="#97C459"/> <!-- green = ok -->
+<circle cx="75" cy="74" r="6" fill="#EF9F27"/> <!-- amber = warning -->
+<circle cx="115" cy="74" r="6" fill="#E24B4A"/> <!-- red = alert -->
+```
+
+## CSS
+
+```css
+.dashboard      { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1.5; }
+.screen         { fill: #1a1a18; }
+.screen-content { fill: #2C2C2A; }
+.screen-chart   { fill: #5DCAA5; }
+.screen-bar     { fill: #7F77DD; }
+.screen-alert   { fill: #E24B4A; }
+```
+
+## Tips
+
+- Dashboard screens stay dark in both light and dark mode — they represent actual monitor glass.
+- Keep on-screen text small (`font-size:8px` or `10px`) and high-contrast (near-white fill on dark).
+- Use the status triad green/amber/red consistently — OK / warning / alert.
+- A single dashboard usually sits on top of an infrastructure hub diagram as a unified view (see `examples/smart-city-infrastructure.md`).
@@ -0,0 +1,144 @@
+# Infrastructure Patterns
+
+Reusable shapes and line styles for infrastructure / systems-integration diagrams (smart cities, IoT networks, industrial systems, multi-domain architectures).
+
+## Layout pattern: hub-spoke
+
+- **Central hub**: Hexagon or circle representing the integration platform
+- **Radiating connections**: Data lines from hub to each subsystem with connection dots
+- **Subsystem sections**: Each system (power, water, transport) in its own region
+- **Dashboard on top**: Optional UI mockup showing a unified view (see `dashboard-patterns.md`)
+
+```xml
+<!-- Central hub (hexagon) -->
+<polygon class="iot-hex" points="0,-45 39,-22 39,22 0,45 -39,22 -39,-22"/>
+
+<!-- Data lines with connection dots -->
+<path class="data-line" d="M 321 248 L 200 248 L 120 380" stroke-dasharray="4 3"/>
+<circle cx="321" cy="248" r="4" fill="#7F77DD"/>
+```
+
+## Semantic line styles
+
+Use a dedicated CSS class per subsystem so every diagram reads the same way:
+
+```css
+.data-line  { stroke: #7F77DD; stroke-width: 2; fill: none; stroke-dasharray: 4 3; }
+.power-line { stroke: #EF9F27; stroke-width: 2; fill: none; }
+.water-pipe { stroke: #378ADD; stroke-width: 4; stroke-linecap: round; fill: none; }
+.road       { stroke: #888780; stroke-width: 8; stroke-linecap: round; fill: none; }
+```
+
+## Power systems
+
+**Solar panel (angled):**
+```xml
+<polygon class="solar-panel" points="0,25 35,8 38,12 3,29"/>
+<line class="solar-frame" x1="12" y1="22" x2="24" y2="13"/>
+```
+
+**Wind turbine:**
+```xml
+<polygon class="wind-tower" points="20,70 30,70 28,25 22,25"/>
+<circle class="wind-hub" cx="25" cy="18" r="5"/>
+<ellipse class="wind-blade" cx="25" cy="5" rx="3" ry="13"/>
+<ellipse class="wind-blade" cx="14" cy="26" rx="3" ry="13" transform="rotate(-120, 25, 18)"/>
+<ellipse class="wind-blade" cx="36" cy="26" rx="3" ry="13" transform="rotate(120, 25, 18)"/>
+```
+
+**Battery with charge level:**
+```xml
+<rect class="battery" x="0" y="0" width="45" height="65" rx="5"/>
+<rect x="10" y="-6" width="10" height="8" rx="2" fill="#27500A"/> <!-- terminal -->
+<rect class="battery-level" x="5" y="12" width="35" height="48" rx="3"/> <!-- fill level -->
+```
+
+**Power pylon:**
+```xml
+<polygon class="pylon" points="30,0 35,0 40,60 25,60"/>
+<line x1="15" y1="10" x2="45" y2="10" stroke="#5F5E5A" stroke-width="3"/>
+<circle cx="18" cy="10" r="3" fill="#FAEEDA" stroke="#854F0B"/> <!-- insulator -->
+```
+
+## Water systems
+
+**Reservoir/dam:**
+```xml
+<polygon class="reservoir-wall" points="0,60 10,0 70,0 80,60"/>
+<polygon class="water" points="12,10 68,10 68,55 75,55 75,58 5,58 5,55 12,55"/>
+<!-- Wave effect -->
+<path d="M 15 25 Q 25 22 35 25 Q 45 28 55 25" fill="none" stroke="#378ADD" opacity="0.5"/>
+```
+
+**Treatment tank:**
+```xml
+<ellipse class="treatment-tank" cx="35" cy="45" rx="30" ry="18"/>
+<rect class="treatment-tank" x="5" y="20" width="60" height="25"/>
+<!-- Bubbles -->
+<circle cx="20" cy="32" r="2" fill="#378ADD" opacity="0.6"/>
+```
+
+**Pipe with joint and valve:**
+```xml
+<path class="pipe" d="M 80 85 L 110 85"/>
+<circle class="pipe-joint" cx="110" cy="85" r="8"/>
+<circle class="valve" cx="95" cy="85" r="6"/>
+```
+
+## Transport systems
+
+**Road with lane markings:**
+```xml
+<line class="road" x1="0" y1="50" x2="170" y2="50"/>
+<line class="road-mark" x1="10" y1="50" x2="160" y2="50"/>
+```
+
+**Traffic light:**
+```xml
+<rect class="traffic-light" x="0" y="0" width="14" height="32" rx="3"/>
+<circle class="light-red" cx="7" cy="8" r="4"/>
+<circle class="light-off" cx="7" cy="16" r="4"/>
+<circle class="light-green" cx="7" cy="24" r="4"/>
+```
+
+**Bus:**
+```xml
+<rect class="bus" x="0" y="0" width="55" height="28" rx="6"/>
+<rect class="bus-window" x="5" y="5" width="12" height="12" rx="2"/>
+<circle cx="14" cy="30" r="6" fill="#2C2C2A"/> <!-- wheel -->
+<circle cx="14" cy="30" r="3" fill="#5F5E5A"/> <!-- hubcap -->
+```
+
+## Full CSS block (add to the host page or inline <style>)
+
+```css
+/* Power */
+.solar-panel   { fill: #3C3489; stroke: #534AB7; stroke-width: 0.5; }
+.wind-tower    { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1; }
+.wind-blade    { fill: #F1EFE8; stroke: #888780; stroke-width: 0.5; }
+.battery       { fill: #27500A; stroke: #3B6D11; stroke-width: 1.5; }
+.battery-level { fill: #97C459; }
+.power-line    { stroke: #EF9F27; stroke-width: 2; fill: none; }
+
+/* Water */
+.reservoir-wall { fill: #B4B2A9; stroke: #5F5E5A; stroke-width: 1; }
+.water          { fill: #85B7EB; stroke: #378ADD; stroke-width: 0.5; }
+.pipe           { fill: none; stroke: #378ADD; stroke-width: 4; stroke-linecap: round; }
+.pipe-joint     { fill: #185FA5; stroke: #0C447C; stroke-width: 1; }
+.valve          { fill: #0C447C; stroke: #185FA5; stroke-width: 1; }
+
+/* Transport */
+.road          { stroke: #888780; stroke-width: 8; fill: none; stroke-linecap: round; }
+.road-mark     { stroke: #F1EFE8; stroke-width: 1; stroke-dasharray: 6 4; fill: none; }
+.traffic-light { fill: #444441; stroke: #2C2C2A; stroke-width: 0.5; }
+.light-red     { fill: #E24B4A; }
+.light-green   { fill: #97C459; }
+.light-off     { fill: #2C2C2A; }
+.bus           { fill: #E1F5EE; stroke: #0F6E56; stroke-width: 1.5; }
+```
+
+## Reference examples
+
+- `examples/smart-city-infrastructure.md` — hub-spoke with multiple subsystems
+- `examples/electricity-grid-flow.md` — voltage hierarchy, flow markers
+- `examples/wind-turbine-structure.md` — cross-section with legend
@@ -0,0 +1,42 @@
+# Physical Shape Cookbook
+
+Guidance for drawing physical objects (vehicles, buildings, hardware, mechanical systems, anatomy) — when rectangles aren't enough.
+
+## Shape selection
+
+| Physical form | SVG element | Example use |
+|---------------|-------------|-------------|
+| Curved bodies | `<path>` with Q/C curves | Fuselage, tanks, pipes |
+| Tapered/angular shapes | `<polygon>` | Wings, fins, wedges |
+| Cylindrical/round | `<ellipse>`, `<circle>` | Engines, wheels, buttons |
+| Linear structures | `<line>` | Struts, beams, connections |
+| Internal sections | `<rect>` inside parent | Compartments, rooms |
+| Dashed boundaries | `stroke-dasharray` | Hidden parts, fuel tanks |
+
+## Layering approach
+
+1. Draw outer structure first (fuselage, frame, hull)
+2. Add internal sections on top (cabins, compartments)
+3. Add detail elements (engines, wheels, controls)
+4. Add leader lines with labels
+
+## Semantic CSS classes (instead of c-* ramps)
+
+For physical diagrams, define component-specific classes directly rather than applying `c-*` color classes. This makes each part self-documenting and lets you keep a restrained palette:
+
+```css
+.fuselage { fill: #F1EFE8; stroke: #5F5E5A; stroke-width: 1; }
+.wing     { fill: #E6F1FB; stroke: #185FA5; stroke-width: 1; }
+.engine   { fill: #FAECE7; stroke: #993C1D; stroke-width: 1; }
+```
+
+Add these to a local `<style>` inside the SVG (or extend the host page's `<style>` block). The light-mode/dark-mode pattern still works — use the CSS variables from the template (`var(--bg-secondary)`, `var(--border)`, `var(--text-primary)`) if you want dark-mode awareness.
+
+## Reference examples
+
+Look at these example files for working physical-diagram patterns:
+
+- `examples/commercial-aircraft-structure.md` — fuselage curves + tapered wings + ellipse engines
+- `examples/wind-turbine-structure.md` — underground foundation, tubular tower, nacelle cutaway
+- `examples/smartphone-layer-anatomy.md` — exploded-view stack with alternating labels
+- `examples/apartment-floor-plan-conversion.md` — walls, doors, windows, proposed changes
@@ -0,0 +1,174 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Concept Diagram</title>
+<style>
+  :root {
+    --text-primary: #1a1a18;
+    --text-secondary: #5f5e5a;
+    --text-tertiary: #88877f;
+    --bg-primary: #ffffff;
+    --bg-secondary: #f6f5f0;
+    --bg-tertiary: #eeedeb;
+    --border: rgba(0,0,0,0.15);
+    --border-hover: rgba(0,0,0,0.3);
+  }
+  @media (prefers-color-scheme: dark) {
+    :root {
+      --text-primary: #e8e6de;
+      --text-secondary: #b4b2a9;
+      --text-tertiary: #888780;
+      --bg-primary: #1a1a18;
+      --bg-secondary: #2c2c2a;
+      --bg-tertiary: #3d3d3a;
+      --border: rgba(255,255,255,0.15);
+      --border-hover: rgba(255,255,255,0.3);
+    }
+  }
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body {
+    font-family: system-ui, -apple-system, sans-serif;
+    background: var(--bg-tertiary);
+    display: flex;
+    justify-content: center;
+    align-items: flex-start;
+    min-height: 100vh;
+    padding: 40px 20px;
+  }
+  .card {
+    background: var(--bg-primary);
+    border-radius: 16px;
+    padding: 32px;
+    max-width: 780px;
+    width: 100%;
+    box-shadow: 0 1px 3px rgba(0,0,0,0.08);
+  }
+  h1 {
+    font-size: 18px;
+    font-weight: 500;
+    color: var(--text-primary);
+    margin-bottom: 8px;
+  }
+  .subtitle {
+    font-size: 13px;
+    color: var(--text-tertiary);
+    margin-bottom: 24px;
+  }
+  svg { width: 100%; height: auto; }
+
+  /* === SVG Design System Classes === */
+
+  /* Text classes */
+  .t  { font-family: system-ui, -apple-system, sans-serif; font-size: 14px; fill: var(--text-primary); }
+  .ts { font-family: system-ui, -apple-system, sans-serif; font-size: 12px; fill: var(--text-secondary); }
+  .th { font-family: system-ui, -apple-system, sans-serif; font-size: 14px; fill: var(--text-primary); font-weight: 500; }
+
+  /* Neutral box */
+  .box { fill: var(--bg-secondary); stroke: var(--border); stroke-width: 0.5px; }
+
+  /* Arrow */
+  .arr { stroke: var(--text-secondary); stroke-width: 1.5px; fill: none; }
+
+  /* Leader line */
+  .leader { stroke: var(--text-tertiary); stroke-width: 0.5px; stroke-dasharray: 4 3; fill: none; }
+
+  /* Clickable node */
+  .node { cursor: pointer; transition: opacity 0.15s; }
+  .node:hover { opacity: 0.82; }
+
+  /* === Color Ramp Classes (light mode) === */
+  .c-purple > rect, .c-purple > circle, .c-purple > ellipse { fill: #EEEDFE; stroke: #534AB7; }
+  .c-purple > .th, .c-purple > text.th { fill: #3C3489; }
+  .c-purple > .ts, .c-purple > text.ts { fill: #534AB7; }
+  .c-purple > .t,  .c-purple > text.t  { fill: #3C3489; }
+
+  .c-teal > rect, .c-teal > circle, .c-teal > ellipse { fill: #E1F5EE; stroke: #0F6E56; }
+  .c-teal > .th, .c-teal > text.th { fill: #085041; }
+  .c-teal > .ts, .c-teal > text.ts { fill: #0F6E56; }
+  .c-teal > .t,  .c-teal > text.t  { fill: #085041; }
+
+  .c-coral > rect, .c-coral > circle, .c-coral > ellipse { fill: #FAECE7; stroke: #993C1D; }
+  .c-coral > .th, .c-coral > text.th { fill: #712B13; }
+  .c-coral > .ts, .c-coral > text.ts { fill: #993C1D; }
+  .c-coral > .t,  .c-coral > text.t  { fill: #712B13; }
+
+  .c-pink > rect, .c-pink > circle, .c-pink > ellipse { fill: #FBEAF0; stroke: #993556; }
+  .c-pink > .th, .c-pink > text.th { fill: #72243E; }
+  .c-pink > .ts, .c-pink > text.ts { fill: #993556; }
+  .c-pink > .t,  .c-pink > text.t  { fill: #72243E; }
+
+  .c-gray > rect, .c-gray > circle, .c-gray > ellipse { fill: #F1EFE8; stroke: #5F5E5A; }
+  .c-gray > .th, .c-gray > text.th { fill: #444441; }
+  .c-gray > .ts, .c-gray > text.ts { fill: #5F5E5A; }
+  .c-gray > .t,  .c-gray > text.t  { fill: #444441; }
+
+  .c-blue > rect, .c-blue > circle, .c-blue > ellipse { fill: #E6F1FB; stroke: #185FA5; }
+  .c-blue > .th, .c-blue > text.th { fill: #0C447C; }
+  .c-blue > .ts, .c-blue > text.ts { fill: #185FA5; }
+  .c-blue > .t,  .c-blue > text.t  { fill: #0C447C; }
+
+  .c-green > rect, .c-green > circle, .c-green > ellipse { fill: #EAF3DE; stroke: #3B6D11; }
+  .c-green > .th, .c-green > text.th { fill: #27500A; }
+  .c-green > .ts, .c-green > text.ts { fill: #3B6D11; }
+  .c-green > .t,  .c-green > text.t  { fill: #27500A; }
+
+  .c-amber > rect, .c-amber > circle, .c-amber > ellipse { fill: #FAEEDA; stroke: #854F0B; }
+  .c-amber > .th, .c-amber > text.th { fill: #633806; }
+  .c-amber > .ts, .c-amber > text.ts { fill: #854F0B; }
+  .c-amber > .t,  .c-amber > text.t  { fill: #633806; }
+
+  .c-red > rect, .c-red > circle, .c-red > ellipse { fill: #FCEBEB; stroke: #A32D2D; }
+  .c-red > .th, .c-red > text.th { fill: #791F1F; }
+  .c-red > .ts, .c-red > text.ts { fill: #A32D2D; }
+  .c-red > .t,  .c-red > text.t  { fill: #791F1F; }
+
+  /* === Dark mode overrides === */
+  @media (prefers-color-scheme: dark) {
+    .c-purple > rect, .c-purple > circle, .c-purple > ellipse { fill: #3C3489; stroke: #AFA9EC; }
+    .c-purple > .th, .c-purple > text.th { fill: #CECBF6; }
+    .c-purple > .ts, .c-purple > text.ts { fill: #AFA9EC; }
+
+    .c-teal > rect, .c-teal > circle, .c-teal > ellipse { fill: #085041; stroke: #5DCAA5; }
+    .c-teal > .th, .c-teal > text.th { fill: #9FE1CB; }
+    .c-teal > .ts, .c-teal > text.ts { fill: #5DCAA5; }
+
+    .c-coral > rect, .c-coral > circle, .c-coral > ellipse { fill: #712B13; stroke: #F0997B; }
+    .c-coral > .th, .c-coral > text.th { fill: #F5C4B3; }
+    .c-coral > .ts, .c-coral > text.ts { fill: #F0997B; }
+
+    .c-pink > rect, .c-pink > circle, .c-pink > ellipse { fill: #72243E; stroke: #ED93B1; }
+    .c-pink > .th, .c-pink > text.th { fill: #F4C0D1; }
+    .c-pink > .ts, .c-pink > text.ts { fill: #ED93B1; }
+
+    .c-gray > rect, .c-gray > circle, .c-gray > ellipse { fill: #444441; stroke: #B4B2A9; }
+    .c-gray > .th, .c-gray > text.th { fill: #D3D1C7; }
+    .c-gray > .ts, .c-gray > text.ts { fill: #B4B2A9; }
+
+    .c-blue > rect, .c-blue > circle, .c-blue > ellipse { fill: #0C447C; stroke: #85B7EB; }
+    .c-blue > .th, .c-blue > text.th { fill: #B5D4F4; }
+    .c-blue > .ts, .c-blue > text.ts { fill: #85B7EB; }
+
+    .c-green > rect, .c-green > circle, .c-green > ellipse { fill: #27500A; stroke: #97C459; }
+    .c-green > .th, .c-green > text.th { fill: #C0DD97; }
+    .c-green > .ts, .c-green > text.ts { fill: #97C459; }
+
+    .c-amber > rect, .c-amber > circle, .c-amber > ellipse { fill: #633806; stroke: #EF9F27; }
+    .c-amber > .th, .c-amber > text.th { fill: #FAC775; }
+    .c-amber > .ts, .c-amber > text.ts { fill: #EF9F27; }
+
+    .c-red > rect, .c-red > circle, .c-red > ellipse { fill: #791F1F; stroke: #F09595; }
+    .c-red > .th, .c-red > text.th { fill: #F7C1C1; }
+    .c-red > .ts, .c-red > text.ts { fill: #F09595; }
+  }
+</style>
+</head>
+<body>
+<div class="card">
+  <h1><!-- DIAGRAM TITLE HERE --></h1>
+  <p class="subtitle"><!-- OPTIONAL SUBTITLE HERE --></p>
+  <!-- PASTE SVG HERE -->
+</div>
+</body>
+</html>
@@ -40,7 +40,7 @@ dependencies = [
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
 dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
-messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
+messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29"]
@@ -1674,12 +1674,26 @@ class AIAgent:
        turn-scoped).
        """
        import logging
+        import re as _re
        from hermes_cli.providers import determine_api_mode

        # ── Determine api_mode if not provided ──
        if not api_mode:
            api_mode = determine_api_mode(new_provider, base_url)

+        # Defense-in-depth: ensure OpenCode base_url doesn't carry a trailing
+        # /v1 into the anthropic_messages client, which would cause the SDK to
+        # hit /v1/v1/messages.  `model_switch.switch_model()` already strips
+        # this, but we guard here so any direct callers (future code paths,
+        # tests) can't reintroduce the double-/v1 404 bug.
+        if (
+            api_mode == "anthropic_messages"
+            and new_provider in ("opencode-zen", "opencode-go")
+            and isinstance(base_url, str)
+            and base_url
+        ):
+            base_url = _re.sub(r"/v1/?$", "", base_url)
+
        old_model = self.model
        old_provider = self.provider

@@ -3228,6 +3242,53 @@ class AIAgent:
        except Exception:
            pass

+    def release_clients(self) -> None:
+        """Release LLM client resources WITHOUT tearing down session tool state.
+
+        Used by the gateway when evicting this agent from _agent_cache for
+        memory-management reasons (LRU cap or idle TTL) — the session may
+        resume at any time with a freshly-built AIAgent that reuses the
+        same task_id / session_id, so we must NOT kill:
+          - process_registry entries for task_id (user's bg shells)
+          - terminal sandbox for task_id (cwd, env, shell state)
+          - browser daemon for task_id (open tabs, cookies)
+          - memory provider (has its own lifecycle; keeps running)
+
+        We DO close:
+          - OpenAI/httpx client pool (big chunk of held memory + sockets;
+            the rebuilt agent gets a fresh client anyway)
+          - Active child subagents (per-turn artefacts; safe to drop)
+
+        Safe to call multiple times.  Distinct from close() — which is the
+        hard teardown for actual session boundaries (/new, /reset, session
+        expiry).
+        """
+        # Close active child agents (per-turn; no cross-turn persistence).
+        try:
+            with self._active_children_lock:
+                children = list(self._active_children)
+                self._active_children.clear()
+            for child in children:
+                try:
+                    child.release_clients()
+                except Exception:
+                    # Fall back to full close on children; they're per-turn.
+                    try:
+                        child.close()
+                    except Exception:
+                        pass
+        except Exception:
+            pass
+
+        # Close the OpenAI/httpx client to release sockets immediately.
+        try:
+            client = getattr(self, "client", None)
+            if client is not None:
+                self._close_openai_client(client, reason="cache_evict", shared=True)
+                self.client = None
+        except Exception:
+            pass
+
    def close(self) -> None:
        """Release all resources held by this agent instance.

@@ -4381,6 +4442,41 @@ class AIAgent:
                self._client_log_context(),
            )
            return client
+        # Inject TCP keepalives so the kernel detects dead provider connections
+        # instead of letting them sit silently in CLOSE-WAIT (#10324).  Without
+        # this, a peer that drops mid-stream leaves the socket in a state where
+        # epoll_wait never fires, ``httpx`` read timeout may not trigger, and
+        # the agent hangs until manually killed.  Probes after 30s idle, retry
+        # every 10s, give up after 3 → dead peer detected within ~60s.
+        #
+        # Safety against #10933: the ``client_kwargs = dict(client_kwargs)``
+        # above means this injection only lands in the local per-call copy,
+        # never back into ``self._client_kwargs``.  Each ``_create_openai_client``
+        # invocation therefore gets its OWN fresh ``httpx.Client`` whose
+        # lifetime is tied to the OpenAI client it is passed to.  When the
+        # OpenAI client is closed (rebuild, teardown, credential rotation),
+        # the paired ``httpx.Client`` closes with it, and the next call
+        # constructs a fresh one — no stale closed transport can be reused.
+        # Tests in ``tests/run_agent/test_create_openai_client_reuse.py`` and
+        # ``tests/run_agent/test_sequential_chats_live.py`` pin this invariant.
+        if "http_client" not in client_kwargs:
+            try:
+                import httpx as _httpx
+                import socket as _socket
+                _sock_opts = [(_socket.SOL_SOCKET, _socket.SO_KEEPALIVE, 1)]
+                if hasattr(_socket, "TCP_KEEPIDLE"):
+                    # Linux
+                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPIDLE, 30))
+                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPINTVL, 10))
+                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPCNT, 3))
+                elif hasattr(_socket, "TCP_KEEPALIVE"):
+                    # macOS (uses TCP_KEEPALIVE instead of TCP_KEEPIDLE)
+                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPALIVE, 30))
+                client_kwargs["http_client"] = _httpx.Client(
+                    transport=_httpx.HTTPTransport(socket_options=_sock_opts),
+                )
+            except Exception:
+                pass  # Fall through to default transport if socket opts fail
        client = OpenAI(**client_kwargs)
        logger.info(
            "OpenAI client created (%s, shared=%s) %s",
@@ -122,6 +122,43 @@ log_error() {
    echo -e "${RED}✗${NC} $1"
 }

+prompt_yes_no() {
+    local question="$1"
+    local default="${2:-yes}"
+    local prompt_suffix
+    local answer=""
+
+    # Use case patterns (not ${var,,}) so this works on bash 3.2 (macOS /bin/bash).
+    case "$default" in
+        [yY]|[yY][eE][sS]|[tT][rR][uU][eE]|1) prompt_suffix="[Y/n]" ;;
+        *) prompt_suffix="[y/N]" ;;
+    esac
+
+    if [ "$IS_INTERACTIVE" = true ]; then
+        read -r -p "$question $prompt_suffix " answer || answer=""
+    elif [ -r /dev/tty ] && [ -w /dev/tty ]; then
+        printf "%s %s " "$question" "$prompt_suffix" > /dev/tty
+        IFS= read -r answer < /dev/tty || answer=""
+    else
+        answer=""
+    fi
+
+    answer="${answer#"${answer%%[![:space:]]*}"}"
+    answer="${answer%"${answer##*[![:space:]]}"}"
+
+    if [ -z "$answer" ]; then
+        case "$default" in
+            [yY]|[yY][eE][sS]|[tT][rR][uU][eE]|1) return 0 ;;
+            *) return 1 ;;
+        esac
+    fi
+
+    case "$answer" in
+        [yY]|[yY][eE][sS]) return 0 ;;
+        *) return 1 ;;
+    esac
+}
+
 is_termux() {
    [ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]]
 }
@@ -606,9 +643,7 @@ install_system_packages() {
                echo ""
                log_info "sudo is needed ONLY to install optional system packages (${pkgs[*]}) via your package manager."
                log_info "Hermes Agent itself does not require or retain root access."
-                read -p "Install ${description}? (requires sudo) [y/N] " -n 1 -r
-                echo
-                if [[ $REPLY =~ ^[Yy]$ ]]; then
+                if prompt_yes_no "Install ${description}? (requires sudo)" "no"; then
                    if sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a $install_cmd; then
                        [ "$need_ripgrep" = true ] && HAS_RIPGREP=true && log_success "ripgrep installed"
                        [ "$need_ffmpeg" = true ]  && HAS_FFMPEG=true  && log_success "ffmpeg installed"
@@ -621,9 +656,7 @@ install_system_packages() {
                echo ""
                log_info "sudo is needed ONLY to install optional system packages (${pkgs[*]}) via your package manager."
                log_info "Hermes Agent itself does not require or retain root access."
-                read -p "Install ${description}? [Y/n] " -n 1 -r < /dev/tty
-                echo
-                if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+                if prompt_yes_no "Install ${description}?" "yes"; then
                    if sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a $install_cmd < /dev/tty; then
                        [ "$need_ripgrep" = true ] && HAS_RIPGREP=true && log_success "ripgrep installed"
                        [ "$need_ffmpeg" = true ]  && HAS_FFMPEG=true  && log_success "ffmpeg installed"
@@ -863,9 +896,7 @@ install_deps() {
                else
                    log_info "sudo is needed ONLY to install build tools (build-essential, python3-dev, libffi-dev) via apt."
                    log_info "Hermes Agent itself does not require or retain root access."
-                    read -p "Install build tools? [Y/n] " -n 1 -r < /dev/tty
-                    echo
-                    if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+                    if prompt_yes_no "Install build tools?" "yes"; then
                        sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get update -qq && sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get install -y -qq build-essential python3-dev libffi-dev >/dev/null 2>&1 || true
                        log_success "Build tools installed"
                    fi
@@ -1236,9 +1267,7 @@ maybe_start_gateway() {
            log_info "WhatsApp is enabled but not yet paired."
            log_info "Running 'hermes whatsapp' to pair via QR code..."
            echo ""
-            read -p "Pair WhatsApp now? [Y/n] " -n 1 -r
-            echo
-            if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+            if prompt_yes_no "Pair WhatsApp now?" "yes"; then
                HERMES_CMD="$(get_hermes_command_path)"
                $HERMES_CMD whatsapp || true
            fi
@@ -1253,14 +1282,18 @@ maybe_start_gateway() {
    fi

    echo ""
+    local should_install_gateway=false
    if [ "$DISTRO" = "termux" ]; then
-        read -p "Would you like to start the gateway in the background? [Y/n] " -n 1 -r < /dev/tty
+        if prompt_yes_no "Would you like to start the gateway in the background?" "yes"; then
+            should_install_gateway=true
+        fi
    else
-        read -p "Would you like to install the gateway as a background service? [Y/n] " -n 1 -r < /dev/tty
+        if prompt_yes_no "Would you like to install the gateway as a background service?" "yes"; then
+            should_install_gateway=true
+        fi
    fi
-    echo

-    if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+    if [ "$should_install_gateway" = true ]; then
        HERMES_CMD="$(get_hermes_command_path)"

        if [ "$DISTRO" != "termux" ] && command -v systemctl &> /dev/null; then
@@ -50,9 +50,12 @@ AUTHOR_MAP = {
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
+    "valdi.jorge@gmail.com": "jvcl",
    "126368201+vilkasdev@users.noreply.github.com": "vilkasdev",
    "137614867+cutepawss@users.noreply.github.com": "cutepawss",
    "96793918+memosr@users.noreply.github.com": "memosr",
+    "milkoor@users.noreply.github.com": "milkoor",
+    "xuerui911@gmail.com": "Fatty911",
    "131039422+SHL0MS@users.noreply.github.com": "SHL0MS",
    "77628552+raulvidis@users.noreply.github.com": "raulvidis",
    "145567217+Aum08Desai@users.noreply.github.com": "Aum08Desai",
@@ -68,6 +71,8 @@ AUTHOR_MAP = {
    "27917469+nosleepcassette@users.noreply.github.com": "nosleepcassette",
    "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
    "109555139+davetist@users.noreply.github.com": "davetist",
+    "39405770+yyq4193@users.noreply.github.com": "yyq4193",
+    "Asunfly@users.noreply.github.com": "Asunfly",
    # contributors (manual mapping from git names)
    "ahmedsherif95@gmail.com": "asheriif",
    "dmayhem93@gmail.com": "dmahan93",
@@ -80,6 +85,9 @@ AUTHOR_MAP = {
    "xaydinoktay@gmail.com": "aydnOktay",
    "abdullahfarukozden@gmail.com": "Farukest",
    "lovre.pesut@gmail.com": "rovle",
+    "kevinskysunny@gmail.com": "kevinskysunny",
+    "xiewenxuan462@gmail.com": "yule975",
+    "yiweimeng.dlut@hotmail.com": "meng93",
    "hakanerten02@hotmail.com": "teyrebaz33",
    "ruzzgarcn@gmail.com": "Ruzzgar",
    "alireza78.crypto@gmail.com": "alireza78a",
@@ -92,6 +100,7 @@ AUTHOR_MAP = {
    "mcosma@gmail.com": "wakamex",
    "clawdia.nash@proton.me": "clawdia-nash",
    "pickett.austin@gmail.com": "austinpickett",
+    "dangtc94@gmail.com": "dieutx",
    "jaisehgal11299@gmail.com": "jaisup",
    "percydikec@gmail.com": "PercyDikec",
    "dean.kerr@gmail.com": "deankerr",
@@ -174,6 +183,7 @@ AUTHOR_MAP = {
    "juan.ovalle@mistral.ai": "jjovalle99",
    "julien.talbot@ergonomia.re": "Julientalbot",
    "kagura.chen28@gmail.com": "kagura-agent",
+    "1342088860@qq.com": "youngDoo",
    "kamil@gwozdz.me": "kamil-gwozdz",
    "karamusti912@gmail.com": "MustafaKara7",
    "kira@ariaki.me": "kira-ariaki",
@@ -228,6 +238,23 @@ AUTHOR_MAP = {
    "zaynjarvis@gmail.com": "ZaynJarvis",
    "zhiheng.liu@bytedance.com": "ZaynJarvis",
    "mbelleau@Michels-MacBook-Pro.local": "malaiwah",
+    "michel.belleau@malaiwah.com": "malaiwah",
+    "gnanasekaran.sekareee@gmail.com": "gnanam1990",
+    "jz.pentest@gmail.com": "0xyg3n",
+    "hypnosis.mda@gmail.com": "Hypn0sis",
+    "ywt000818@gmail.com": "OwenYWT",
+    "dhandhalyabhavik@gmail.com": "v1k22",
+    "rucchizhao@zhaochenfeideMacBook-Pro.local": "RucchiZ",
+    "lehaolin98@outlook.com": "LehaoLin",
+    "yuewang1@microsoft.com": "imink",
+    "1736355688@qq.com": "hedgeho9X",
+    "bernylinville@devopsthink.org": "bernylinville",
+    "brian@bde.io": "briandevans",
+    "hubin_ll@qq.com": "LLQWQ",
+    "memosr_email@gmail.com": "memosr",
+    "anthhub@163.com": "anthhub",
+    "shenuu@gmail.com": "shenuu",
+    "xiayh17@gmail.com": "xiayh0107",
 }


@@ -0,0 +1,104 @@
+#!/usr/bin/env bash
+# Canonical test runner for hermes-agent. Run this instead of calling
+# `pytest` directly to guarantee your local run matches CI behavior.
+#
+# What this script enforces:
+#   * -n 4 xdist workers (CI has 4 cores; -n auto diverges locally)
+#   * TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0 (deterministic)
+#   * Credential env vars blanked (conftest.py also does this, but this
+#     is belt-and-suspenders for anyone running `pytest` outside of
+#     our conftest path — e.g. calling pytest on a single file)
+#   * Proper venv activation
+#
+# Usage:
+#   scripts/run_tests.sh                     # full suite
+#   scripts/run_tests.sh tests/agent/        # one directory
+#   scripts/run_tests.sh tests/agent/test_foo.py::TestClass::test_method
+#   scripts/run_tests.sh --tb=long -v        # pass-through pytest args
+
+set -euo pipefail
+
+# ── Locate repo root ────────────────────────────────────────────────────────
+# Works whether this is the main checkout or a worktree.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# ── Activate venv ───────────────────────────────────────────────────────────
+# Prefer a .venv in the current tree, fall back to the main checkout's venv
+# (useful for worktrees where we don't always duplicate the venv).
+VENV=""
+for candidate in "$REPO_ROOT/.venv" "$REPO_ROOT/venv" "$HOME/.hermes/hermes-agent/venv"; do
+  if [ -f "$candidate/bin/activate" ]; then
+    VENV="$candidate"
+    break
+  fi
+done
+
+if [ -z "$VENV" ]; then
+  echo "error: no virtualenv found in $REPO_ROOT/.venv or $REPO_ROOT/venv" >&2
+  exit 1
+fi
+
+PYTHON="$VENV/bin/python"
+
+# ── Ensure pytest-split is installed (required for shard-equivalent runs) ──
+if ! "$PYTHON" -c "import pytest_split" 2>/dev/null; then
+  echo "→ installing pytest-split into $VENV"
+  "$PYTHON" -m pip install --quiet "pytest-split>=0.9,<1"
+fi
+
+# ── Hermetic environment ────────────────────────────────────────────────────
+# Mirror what CI does in .github/workflows/tests.yml + what conftest.py does.
+# Unset every credential-shaped var currently in the environment.
+while IFS='=' read -r name _; do
+  case "$name" in
+    *_API_KEY|*_TOKEN|*_SECRET|*_PASSWORD|*_CREDENTIALS|*_ACCESS_KEY| \
+    *_SECRET_ACCESS_KEY|*_PRIVATE_KEY|*_OAUTH_TOKEN|*_WEBHOOK_SECRET| \
+    *_ENCRYPT_KEY|*_APP_SECRET|*_CLIENT_SECRET|*_CORP_SECRET|*_AES_KEY| \
+    AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|FAL_KEY| \
+    GH_TOKEN|GITHUB_TOKEN)
+      unset "$name"
+      ;;
+  esac
+done < <(env)
+
+# Unset HERMES_* behavioral vars too.
+unset HERMES_YOLO_MODE HERMES_INTERACTIVE HERMES_QUIET HERMES_TOOL_PROGRESS \
+      HERMES_TOOL_PROGRESS_MODE HERMES_MAX_ITERATIONS HERMES_SESSION_PLATFORM \
+      HERMES_SESSION_CHAT_ID HERMES_SESSION_CHAT_NAME HERMES_SESSION_THREAD_ID \
+      HERMES_SESSION_SOURCE HERMES_SESSION_KEY HERMES_GATEWAY_SESSION \
+      HERMES_PLATFORM HERMES_INFERENCE_PROVIDER HERMES_MANAGED HERMES_DEV \
+      HERMES_CONTAINER HERMES_EPHEMERAL_SYSTEM_PROMPT HERMES_TIMEZONE \
+      HERMES_REDACT_SECRETS HERMES_BACKGROUND_NOTIFICATIONS HERMES_EXEC_ASK \
+      HERMES_HOME_MODE 2>/dev/null || true
+
+# Pin deterministic runtime.
+export TZ=UTC
+export LANG=C.UTF-8
+export LC_ALL=C.UTF-8
+export PYTHONHASHSEED=0
+
+# ── Worker count ────────────────────────────────────────────────────────────
+# CI uses `-n auto` on ubuntu-latest which gives 4 workers. A 20-core
+# workstation with `-n auto` gets 20 workers and exposes test-ordering
+# flakes that CI will never see. Pin to 4 so local matches CI.
+WORKERS="${HERMES_TEST_WORKERS:-4}"
+
+# ── Run pytest ──────────────────────────────────────────────────────────────
+cd "$REPO_ROOT"
+
+# If the first argument starts with `-` treat all args as pytest flags;
+# otherwise treat them as test paths.
+ARGS=("$@")
+
+echo "▶ running pytest with $WORKERS workers, hermetic env, in $REPO_ROOT"
+echo "  (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; all credential env vars unset)"
+
+# -o "addopts=" clears pyproject.toml's `-n auto` so our -n wins.
+exec "$PYTHON" -m pytest \
+  -o "addopts=" \
+  -n "$WORKERS" \
+  --ignore=tests/integration \
+  --ignore=tests/e2e \
+  -m "not integration" \
+  "${ARGS[@]}"
@@ -1,6 +1,6 @@
 ---
 name: architecture-diagram
-description: Generate professional dark-themed system architecture diagrams as standalone HTML/SVG files. Self-contained output with no external dependencies. Based on Cocoon AI's architecture-diagram-generator (MIT).
+description: Generate dark-themed SVG diagrams of software systems and cloud infrastructure as standalone HTML files with inline SVG graphics. Semantic component colors (cyan=frontend, emerald=backend, violet=database, amber=cloud/AWS, rose=security, orange=message bus), JetBrains Mono font, grid background. Best suited for software architecture, cloud/VPC topology, microservice maps, service-mesh diagrams, database + API layer diagrams, security groups, message buses — anything that fits a tech-infra deck with a dark aesthetic. If a more specialized diagramming skill exists for the subject (scientific, educational, hand-drawn, animated, etc.), prefer that — otherwise this skill can also serve as a general-purpose SVG diagram fallback. Based on Cocoon AI's architecture-diagram-generator (MIT).
 version: 1.0.0
 author: Cocoon AI (hello@cocoon-ai.com), ported by Hermes Agent
 license: MIT
@@ -8,13 +8,31 @@ dependencies: []
 metadata:
  hermes:
    tags: [architecture, diagrams, SVG, HTML, visualization, infrastructure, cloud]
-    related_skills: [excalidraw]
+    related_skills: [concept-diagrams, excalidraw]
 ---

 # Architecture Diagram Skill

 Generate professional, dark-themed technical architecture diagrams as standalone HTML files with inline SVG graphics. No external tools, no API keys, no rendering libraries — just write the HTML file and open it in a browser.

+## Scope
+
+**Best suited for:**
+- Software system architecture (frontend / backend / database layers)
+- Cloud infrastructure (VPC, regions, subnets, managed services)
+- Microservice / service-mesh topology
+- Database + API map, deployment diagrams
+- Anything with a tech-infra subject that fits a dark, grid-backed aesthetic
+
+**Look elsewhere first for:**
+- Physics, chemistry, math, biology, or other scientific subjects
+- Physical objects (vehicles, hardware, anatomy, cross-sections)
+- Floor plans, narrative journeys, educational / textbook-style visuals
+- Hand-drawn whiteboard sketches (consider `excalidraw`)
+- Animated explainers (consider an animation skill)
+
+If a more specialized skill is available for the subject, prefer that. If none fits, this skill can also serve as a general SVG diagram fallback — the output will just carry the dark tech aesthetic described below.
+
 Based on [Cocoon AI's architecture-diagram-generator](https://github.com/Cocoon-AI/architecture-diagram-generator) (MIT).

 ## Workflow
@@ -167,13 +167,6 @@ class TestSessionOps:
        assert model_cmd.input is not None
        assert model_cmd.input.root.hint == "model name to switch to"

-    @pytest.mark.asyncio
-    async def test_new_session_schedules_available_commands_update(self, agent):
-        with patch.object(agent, "_schedule_available_commands_update") as mock_schedule:
-            resp = await agent.new_session(cwd="/home/user/project")
-
-        mock_schedule.assert_called_once_with(resp.session_id)
-
    @pytest.mark.asyncio
    async def test_cancel_sets_event(self, agent):
        resp = await agent.new_session(cwd=".")
@@ -187,41 +180,11 @@ class TestSessionOps:
        # Should not raise
        await agent.cancel(session_id="does-not-exist")

-    @pytest.mark.asyncio
-    async def test_load_session_returns_response(self, agent):
-        resp = await agent.new_session(cwd="/tmp")
-        load_resp = await agent.load_session(cwd="/tmp", session_id=resp.session_id)
-        assert isinstance(load_resp, LoadSessionResponse)
-
-    @pytest.mark.asyncio
-    async def test_load_session_schedules_available_commands_update(self, agent):
-        resp = await agent.new_session(cwd="/tmp")
-        with patch.object(agent, "_schedule_available_commands_update") as mock_schedule:
-            load_resp = await agent.load_session(cwd="/tmp", session_id=resp.session_id)
-
-        assert isinstance(load_resp, LoadSessionResponse)
-        mock_schedule.assert_called_once_with(resp.session_id)
-
    @pytest.mark.asyncio
    async def test_load_session_not_found_returns_none(self, agent):
        resp = await agent.load_session(cwd="/tmp", session_id="bogus")
        assert resp is None

-    @pytest.mark.asyncio
-    async def test_resume_session_returns_response(self, agent):
-        resp = await agent.new_session(cwd="/tmp")
-        resume_resp = await agent.resume_session(cwd="/tmp", session_id=resp.session_id)
-        assert isinstance(resume_resp, ResumeSessionResponse)
-
-    @pytest.mark.asyncio
-    async def test_resume_session_schedules_available_commands_update(self, agent):
-        resp = await agent.new_session(cwd="/tmp")
-        with patch.object(agent, "_schedule_available_commands_update") as mock_schedule:
-            resume_resp = await agent.resume_session(cwd="/tmp", session_id=resp.session_id)
-
-        assert isinstance(resume_resp, ResumeSessionResponse)
-        mock_schedule.assert_called_once_with(resp.session_id)
-
    @pytest.mark.asyncio
    async def test_resume_session_creates_new_if_missing(self, agent):
        resume_resp = await agent.resume_session(cwd="/tmp", session_id="nonexistent")
@@ -234,14 +197,6 @@ class TestSessionOps:


 class TestListAndFork:
-    @pytest.mark.asyncio
-    async def test_list_sessions(self, agent):
-        await agent.new_session(cwd="/a")
-        await agent.new_session(cwd="/b")
-        resp = await agent.list_sessions()
-        assert isinstance(resp, ListSessionsResponse)
-        assert len(resp.sessions) == 2
-
    @pytest.mark.asyncio
    async def test_fork_session(self, agent):
        new_resp = await agent.new_session(cwd="/original")
@@ -249,16 +204,6 @@ class TestListAndFork:
        assert fork_resp.session_id
        assert fork_resp.session_id != new_resp.session_id

-    @pytest.mark.asyncio
-    async def test_fork_session_schedules_available_commands_update(self, agent):
-        new_resp = await agent.new_session(cwd="/original")
-        with patch.object(agent, "_schedule_available_commands_update") as mock_schedule:
-            fork_resp = await agent.fork_session(cwd="/forked", session_id=new_resp.session_id)
-
-        assert fork_resp.session_id
-        mock_schedule.assert_called_once_with(fork_resp.session_id)
-
-
 # ---------------------------------------------------------------------------
 # session configuration / model routing
 # ---------------------------------------------------------------------------
@@ -274,20 +219,6 @@ class TestSessionConfiguration:
        assert isinstance(resp, SetSessionModeResponse)
        assert getattr(state, "mode", None) == "chat"

-    @pytest.mark.asyncio
-    async def test_set_config_option_returns_response(self, agent):
-        new_resp = await agent.new_session(cwd="/tmp")
-        resp = await agent.set_config_option(
-            config_id="approval_mode",
-            session_id=new_resp.session_id,
-            value="auto",
-        )
-        state = agent.session_manager.get_session(new_resp.session_id)
-
-        assert isinstance(resp, SetSessionConfigOptionResponse)
-        assert getattr(state, "config_options", {}) == {"approval_mode": "auto"}
-        assert resp.config_options == []
-
    @pytest.mark.asyncio
    async def test_router_accepts_stable_session_config_methods(self, agent):
        new_resp = await agent.new_session(cwd="/tmp")
@@ -808,47 +739,3 @@ class TestRegisterSessionMcpServers:
        with patch("tools.mcp_tool.register_mcp_servers", side_effect=RuntimeError("boom")):
            # Should not raise
            await agent._register_session_mcp_servers(state, [server])
-
-    @pytest.mark.asyncio
-    async def test_new_session_calls_register(self, agent, mock_manager):
-        """new_session passes mcp_servers to _register_session_mcp_servers."""
-        with patch.object(agent, "_register_session_mcp_servers", new_callable=AsyncMock) as mock_reg:
-            resp = await agent.new_session(cwd="/tmp", mcp_servers=["fake"])
-            assert resp is not None
-            mock_reg.assert_called_once()
-            # Second arg should be the mcp_servers list
-            assert mock_reg.call_args[0][1] == ["fake"]
-
-    @pytest.mark.asyncio
-    async def test_load_session_calls_register(self, agent, mock_manager):
-        """load_session passes mcp_servers to _register_session_mcp_servers."""
-        # Create a session first so load can find it
-        state = mock_manager.create_session(cwd="/tmp")
-        sid = state.session_id
-
-        with patch.object(agent, "_register_session_mcp_servers", new_callable=AsyncMock) as mock_reg:
-            resp = await agent.load_session(cwd="/tmp", session_id=sid, mcp_servers=["fake"])
-            assert resp is not None
-            mock_reg.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_resume_session_calls_register(self, agent, mock_manager):
-        """resume_session passes mcp_servers to _register_session_mcp_servers."""
-        state = mock_manager.create_session(cwd="/tmp")
-        sid = state.session_id
-
-        with patch.object(agent, "_register_session_mcp_servers", new_callable=AsyncMock) as mock_reg:
-            resp = await agent.resume_session(cwd="/tmp", session_id=sid, mcp_servers=["fake"])
-            assert resp is not None
-            mock_reg.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_fork_session_calls_register(self, agent, mock_manager):
-        """fork_session passes mcp_servers to _register_session_mcp_servers."""
-        state = mock_manager.create_session(cwd="/tmp")
-        sid = state.session_id
-
-        with patch.object(agent, "_register_session_mcp_servers", new_callable=AsyncMock) as mock_reg:
-            resp = await agent.fork_session(cwd="/tmp", session_id=sid, mcp_servers=["fake"])
-            assert resp is not None
-            mock_reg.assert_called_once()
@@ -436,17 +436,6 @@ class TestExpiredCodexFallback:
 class TestExplicitProviderRouting:
    """Test explicit provider selection bypasses auto chain correctly."""

-    def test_explicit_anthropic_oauth(self, monkeypatch):
-        """provider='anthropic' + OAuth token should work with is_oauth=True."""
-        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-explicit-test")
-        with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
-            mock_build.return_value = MagicMock()
-            client, model = resolve_provider_client("anthropic")
-            assert client is not None
-            # Verify OAuth flag propagated
-            adapter = client.chat.completions
-            assert adapter._is_oauth is True
-
    def test_explicit_anthropic_api_key(self, monkeypatch):
        """provider='anthropic' + regular API key should work with is_oauth=False."""
        with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api-regular-key"), \
@@ -458,146 +447,9 @@ class TestExplicitProviderRouting:
            adapter = client.chat.completions
            assert adapter._is_oauth is False

-    def test_explicit_openrouter(self, monkeypatch):
-        """provider='openrouter' should use OPENROUTER_API_KEY."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-explicit")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            mock_openai.return_value = MagicMock()
-            client, model = resolve_provider_client("openrouter")
-            assert client is not None
-
-    def test_explicit_kimi(self, monkeypatch):
-        """provider='kimi-coding' should use KIMI_API_KEY."""
-        monkeypatch.setenv("KIMI_API_KEY", "kimi-test-key")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            mock_openai.return_value = MagicMock()
-            client, model = resolve_provider_client("kimi-coding")
-            assert client is not None
-
-    def test_explicit_minimax(self, monkeypatch):
-        """provider='minimax' should use MINIMAX_API_KEY."""
-        monkeypatch.setenv("MINIMAX_API_KEY", "mm-test-key")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            mock_openai.return_value = MagicMock()
-            client, model = resolve_provider_client("minimax")
-            assert client is not None
-
-    def test_explicit_deepseek(self, monkeypatch):
-        """provider='deepseek' should use DEEPSEEK_API_KEY."""
-        monkeypatch.setenv("DEEPSEEK_API_KEY", "ds-test-key")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            mock_openai.return_value = MagicMock()
-            client, model = resolve_provider_client("deepseek")
-            assert client is not None
-
-    def test_explicit_zai(self, monkeypatch):
-        """provider='zai' should use GLM_API_KEY."""
-        monkeypatch.setenv("GLM_API_KEY", "zai-test-key")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            mock_openai.return_value = MagicMock()
-            client, model = resolve_provider_client("zai")
-            assert client is not None
-
-    def test_explicit_google_alias_uses_gemini_credentials(self):
-        """provider='google' should route through the gemini API-key provider."""
-        with (
-            patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
-                "api_key": "gemini-key",
-                "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
-            }),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
-        ):
-            mock_openai.return_value = MagicMock()
-            client, model = resolve_provider_client("google", model="gemini-3.1-pro-preview")
-
-        assert client is not None
-        assert model == "gemini-3.1-pro-preview"
-        assert mock_openai.call_args.kwargs["api_key"] == "gemini-key"
-        assert mock_openai.call_args.kwargs["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
-
-    def test_explicit_unknown_returns_none(self, monkeypatch):
-        """Unknown provider should return None."""
-        client, model = resolve_provider_client("nonexistent-provider")
-        assert client is None
-
-
 class TestGetTextAuxiliaryClient:
    """Test the full resolution chain for get_text_auxiliary_client."""

-    def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client()
-        assert model == "google/gemini-3-flash-preview"
-        mock_openai.assert_called_once()
-        call_kwargs = mock_openai.call_args
-        assert call_kwargs.kwargs["api_key"] == "or-key"
-
-    def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
-        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
-             patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            mock_nous.return_value = {"access_token": "nous-tok"}
-            client, model = get_text_auxiliary_client()
-        assert model == "google/gemini-3-flash-preview"
-
-    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
-        config = {
-            "model": {
-                "provider": "custom",
-                "base_url": "http://localhost:1234/v1",
-                "default": "my-local-model",
-            }
-        }
-        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
-        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
-        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
-        # Override the autouse monkeypatch for codex
-        monkeypatch.setattr(
-            "agent.auxiliary_client._read_codex_access_token",
-            lambda: "codex-test-token-abc123",
-        )
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client()
-        assert model == "my-local-model"
-        call_kwargs = mock_openai.call_args
-        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
-
-    def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
-        config = {
-            "model": {
-                "provider": "custom",
-                "base_url": "http://localhost:1234/v1",
-                "default": "my-local-model",
-            }
-        }
-        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
-        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
-        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
-
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
-             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
-             patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client()
-
-        assert client is not None
-        assert model == "my-local-model"
-        call_kwargs = mock_openai.call_args
-        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
-
-    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
-        with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
-             patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
-             patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
-             patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
-             patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client()
-        assert model == "gpt-5.2-codex"
-        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
-        from agent.auxiliary_client import CodexAuxiliaryClient
-        assert isinstance(client, CodexAuxiliaryClient)
-
    def test_codex_pool_entry_takes_priority_over_auth_store(self):
        class _Entry:
            access_token = "pooled-codex-token"
@@ -624,395 +476,6 @@ class TestGetTextAuxiliaryClient:
        assert isinstance(client, CodexAuxiliaryClient)
        assert model == "gpt-5.2-codex"

-    def test_returns_none_when_nothing_available(self, monkeypatch):
-        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
-        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
-        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        with patch("agent.auxiliary_client._resolve_auto", return_value=(None, None)):
-            client, model = get_text_auxiliary_client()
-        assert client is None
-        assert model is None
-
-    def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self, monkeypatch):
-        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
-        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
-        with patch("agent.auxiliary_client._resolve_custom_runtime",
-                   return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
-             patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
-             patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
-             patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
-             patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
-             patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client()
-
-        from agent.auxiliary_client import CodexAuxiliaryClient
-        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
-        assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1"
-        assert mock_openai.call_args.kwargs["api_key"] == "sk-test"
-
-
-class TestVisionClientFallback:
-    """Vision client auto mode resolves known-good multimodal backends."""
-
-    def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch):
-        """Active provider appears in available backends when credentials exist."""
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
-        with (
-            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
-            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
-            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
-        ):
-            backends = get_available_vision_backends()
-
-        assert "anthropic" in backends
-
-    def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
-        with (
-            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
-        ):
-            client, model = resolve_provider_client("anthropic")
-
-        assert client is not None
-        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
-        assert model == "claude-haiku-4-5-20251001"
-
-
-class TestAuxiliaryPoolAwareness:
-    def test_try_nous_uses_pool_entry(self):
-        class _Entry:
-            access_token = "pooled-access-token"
-            agent_key = "pooled-agent-key"
-            inference_base_url = "https://inference.pool.example/v1"
-
-        class _Pool:
-            def has_credentials(self):
-                return True
-
-            def select(self):
-                return _Entry()
-
-        with (
-            patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
-        ):
-            from agent.auxiliary_client import _try_nous
-
-            client, model = _try_nous()
-
-        assert client is not None
-        assert model == "gemini-3-flash"
-        call_kwargs = mock_openai.call_args.kwargs
-        assert call_kwargs["api_key"] == "pooled-agent-key"
-        assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
-
-    def test_resolve_provider_client_copilot_uses_runtime_credentials(self, monkeypatch):
-        monkeypatch.delenv("GITHUB_TOKEN", raising=False)
-        monkeypatch.delenv("GH_TOKEN", raising=False)
-
-        with (
-            patch(
-                "hermes_cli.auth.resolve_api_key_provider_credentials",
-                return_value={
-                    "provider": "copilot",
-                    "api_key": "gh-cli-token",
-                    "base_url": "https://api.githubcopilot.com",
-                    "source": "gh auth token",
-                },
-            ),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
-        ):
-            client, model = resolve_provider_client("copilot", model="gpt-5.4")
-
-        assert client is not None
-        assert model == "gpt-5.4"
-        call_kwargs = mock_openai.call_args.kwargs
-        assert call_kwargs["api_key"] == "gh-cli-token"
-        assert call_kwargs["base_url"] == "https://api.githubcopilot.com"
-        assert call_kwargs["default_headers"]["Editor-Version"]
-
-    def test_copilot_responses_api_model_wrapped_in_codex_client(self, monkeypatch):
-        """Copilot GPT-5+ models (needing Responses API) are wrapped in CodexAuxiliaryClient."""
-        monkeypatch.delenv("GITHUB_TOKEN", raising=False)
-        monkeypatch.delenv("GH_TOKEN", raising=False)
-
-        with (
-            patch(
-                "hermes_cli.auth.resolve_api_key_provider_credentials",
-                return_value={
-                    "provider": "copilot",
-                    "api_key": "test-token",
-                    "base_url": "https://api.githubcopilot.com",
-                    "source": "gh auth token",
-                },
-            ),
-            patch("agent.auxiliary_client.OpenAI"),
-        ):
-            client, model = resolve_provider_client("copilot", model="gpt-5.4-mini")
-
-        from agent.auxiliary_client import CodexAuxiliaryClient
-        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.4-mini"
-
-    def test_copilot_chat_completions_model_not_wrapped(self, monkeypatch):
-        """Copilot models using Chat Completions are returned as plain OpenAI clients."""
-        monkeypatch.delenv("GITHUB_TOKEN", raising=False)
-        monkeypatch.delenv("GH_TOKEN", raising=False)
-
-        with (
-            patch(
-                "hermes_cli.auth.resolve_api_key_provider_credentials",
-                return_value={
-                    "provider": "copilot",
-                    "api_key": "test-token",
-                    "base_url": "https://api.githubcopilot.com",
-                    "source": "gh auth token",
-                },
-            ),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
-        ):
-            client, model = resolve_provider_client("copilot", model="gpt-4.1-mini")
-
-        from agent.auxiliary_client import CodexAuxiliaryClient
-        assert not isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-4.1-mini"
-        # Should be the raw mock OpenAI client
-        assert client is mock_openai.return_value
-
-    def test_vision_auto_uses_active_provider_as_fallback(self, monkeypatch):
-        """When no OpenRouter/Nous available, vision auto falls back to active provider."""
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
-        with (
-            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
-            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
-            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
-        ):
-            provider, client, model = resolve_vision_provider_client()
-
-        assert client is not None
-        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
-
-    def test_vision_auto_prefers_active_provider_over_openrouter(self, monkeypatch):
-        """Active provider is tried before OpenRouter in vision auto."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
-
-        with (
-            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
-            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
-            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
-        ):
-            provider, client, model = resolve_vision_provider_client()
-
-        # Active provider should win over OpenRouter
-        assert provider == "anthropic"
-
-    def test_vision_auto_uses_named_custom_as_active_provider(self, monkeypatch):
-        """Named custom provider works as active provider fallback in vision auto."""
-        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)), \
-             patch("agent.auxiliary_client._read_main_provider", return_value="custom:local"), \
-             patch("agent.auxiliary_client._read_main_model", return_value="my-local-model"), \
-             patch("agent.auxiliary_client.resolve_provider_client",
-                   return_value=(MagicMock(), "my-local-model")) as mock_resolve:
-            provider, client, model = resolve_vision_provider_client()
-        assert client is not None
-        assert provider == "custom:local"
-
-    def test_vision_config_google_provider_uses_gemini_credentials(self, monkeypatch):
-        config = {
-            "auxiliary": {
-                "vision": {
-                    "provider": "google",
-                    "model": "gemini-3.1-pro-preview",
-                }
-            }
-        }
-        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
-        with (
-            patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
-                "api_key": "gemini-key",
-                "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
-            }),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
-        ):
-            resolved_provider, client, model = resolve_vision_provider_client()
-
-        assert resolved_provider == "gemini"
-        assert client is not None
-        assert model == "gemini-3.1-pro-preview"
-        assert mock_openai.call_args.kwargs["api_key"] == "gemini-key"
-        assert mock_openai.call_args.kwargs["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
-
-
-
-class TestTaskSpecificOverrides:
-    """Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""
-
-    def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir(parents=True, exist_ok=True)
-        (hermes_home / "config.yaml").write_text(
-            """auxiliary:
-  web_extract:
-    base_url: http://localhost:3456/v1
-    api_key: config-key
-    model: config-model
-"""
-        )
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client("web_extract")
-        assert model == "config-model"
-        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
-        assert mock_openai.call_args.kwargs["api_key"] == "config-key"
-
-    def test_task_without_override_uses_auto(self, monkeypatch):
-        """A task with no provider env var falls through to auto chain."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        with patch("agent.auxiliary_client.OpenAI"):
-            client, model = get_text_auxiliary_client("compression")
-        assert model == "google/gemini-3-flash-preview"  # auto → OpenRouter
-
-    def test_resolve_auto_prefers_live_main_runtime_over_persisted_config(self, monkeypatch, tmp_path):
-        """Session-only live model switches should override persisted config for auto routing."""
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir(parents=True, exist_ok=True)
-        (hermes_home / "config.yaml").write_text(
-            """model:
-  default: glm-5.1
-  provider: opencode-go
-"""
-        )
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-        calls = []
-
-        def _fake_resolve(provider, model=None, *args, **kwargs):
-            calls.append((provider, model, kwargs))
-            return MagicMock(), model or "resolved-model"
-
-        with patch("agent.auxiliary_client.resolve_provider_client", side_effect=_fake_resolve):
-            client, model = _resolve_auto(
-                main_runtime={
-                    "provider": "openai-codex",
-                    "model": "gpt-5.4",
-                    "api_mode": "codex_responses",
-                }
-            )
-
-        assert client is not None
-        assert model == "gpt-5.4"
-        assert calls[0][0] == "openai-codex"
-        assert calls[0][1] == "gpt-5.4"
-        assert calls[0][2]["api_mode"] == "codex_responses"
-
-    def test_explicit_compression_pin_still_wins_over_live_main_runtime(self, monkeypatch, tmp_path):
-        """Task-level compression config should beat a live session override."""
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir(parents=True, exist_ok=True)
-        (hermes_home / "config.yaml").write_text(
-            """auxiliary:
-  compression:
-    provider: openrouter
-    model: google/gemini-3-flash-preview
-model:
-  default: glm-5.1
-  provider: opencode-go
-"""
-        )
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(MagicMock(), "google/gemini-3-flash-preview")) as mock_resolve:
-            client, model = get_text_auxiliary_client(
-                "compression",
-                main_runtime={
-                    "provider": "openai-codex",
-                    "model": "gpt-5.4",
-                },
-            )
-
-        assert client is not None
-        assert model == "google/gemini-3-flash-preview"
-        assert mock_resolve.call_args.args[0] == "openrouter"
-        assert mock_resolve.call_args.kwargs["main_runtime"] == {
-            "provider": "openai-codex",
-            "model": "gpt-5.4",
-        }
-
-
-def test_resolve_provider_client_supports_copilot_acp_external_process():
-    fake_client = MagicMock()
-
-    with patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.4-mini"), \
-         patch("agent.auxiliary_client.CodexAuxiliaryClient", MagicMock()), \
-         patch("agent.copilot_acp_client.CopilotACPClient", return_value=fake_client) as mock_acp, \
-         patch("hermes_cli.auth.resolve_external_process_provider_credentials", return_value={
-             "provider": "copilot-acp",
-             "api_key": "copilot-acp",
-             "base_url": "acp://copilot",
-             "command": "/usr/bin/copilot",
-             "args": ["--acp", "--stdio"],
-         }):
-        client, model = resolve_provider_client("copilot-acp")
-
-    assert client is fake_client
-    assert model == "gpt-5.4-mini"
-    assert mock_acp.call_args.kwargs["api_key"] == "copilot-acp"
-    assert mock_acp.call_args.kwargs["base_url"] == "acp://copilot"
-    assert mock_acp.call_args.kwargs["command"] == "/usr/bin/copilot"
-    assert mock_acp.call_args.kwargs["args"] == ["--acp", "--stdio"]
-
-
-def test_resolve_provider_client_copilot_acp_requires_explicit_or_configured_model():
-    with patch("agent.auxiliary_client._read_main_model", return_value=""), \
-         patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp, \
-         patch("hermes_cli.auth.resolve_external_process_provider_credentials", return_value={
-             "provider": "copilot-acp",
-             "api_key": "copilot-acp",
-             "base_url": "acp://copilot",
-             "command": "/usr/bin/copilot",
-             "args": ["--acp", "--stdio"],
-         }):
-        client, model = resolve_provider_client("copilot-acp")
-
-    assert client is None
-    assert model is None
-    mock_acp.assert_not_called()
-
-
-class TestAuxiliaryMaxTokensParam:
-    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
-        """Codex adapter translates max_tokens internally, so we return max_tokens."""
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
-            result = auxiliary_max_tokens_param(1024)
-        assert result == {"max_tokens": 1024}
-
-    def test_openrouter_uses_max_tokens(self, monkeypatch):
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        result = auxiliary_max_tokens_param(1024)
-        assert result == {"max_tokens": 1024}
-
-    def test_no_provider_uses_max_tokens(self):
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
-            result = auxiliary_max_tokens_param(1024)
-        assert result == {"max_tokens": 1024}
-
-
 # ── Payment / credit exhaustion fallback ─────────────────────────────────


@@ -1126,83 +589,6 @@ class TestCallLlmPaymentFallback:
        exc.status_code = 402
        return exc

-    def test_402_triggers_fallback_when_auto(self, monkeypatch):
-        """When provider is auto and returns 402, call_llm tries the next one."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-
-        primary_client = MagicMock()
-        primary_client.chat.completions.create.side_effect = self._make_402_error()
-
-        fallback_client = MagicMock()
-        fallback_response = MagicMock()
-        fallback_client.chat.completions.create.return_value = fallback_response
-
-        with patch("agent.auxiliary_client._get_cached_client",
-                    return_value=(primary_client, "google/gemini-3-flash-preview")), \
-             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
-             patch("agent.auxiliary_client._try_payment_fallback",
-                    return_value=(fallback_client, "gpt-5.2-codex", "openai-codex")) as mock_fb:
-            result = call_llm(
-                task="compression",
-                messages=[{"role": "user", "content": "hello"}],
-            )
-
-        assert result is fallback_response
-        mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
-        # Fallback call should use the fallback model
-        fb_kwargs = fallback_client.chat.completions.create.call_args.kwargs
-        assert fb_kwargs["model"] == "gpt-5.2-codex"
-
-    def test_402_no_fallback_when_explicit_provider(self, monkeypatch):
-        """When provider is explicitly configured (not auto), 402 should NOT fallback (#7559)."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-
-        primary_client = MagicMock()
-        primary_client.chat.completions.create.side_effect = self._make_402_error()
-
-        with patch("agent.auxiliary_client._get_cached_client",
-                    return_value=(primary_client, "local-model")), \
-             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("custom", "local-model", None, None, None)), \
-             patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
-            with pytest.raises(Exception, match="insufficient credits"):
-                call_llm(
-                    task="compression",
-                    messages=[{"role": "user", "content": "hello"}],
-                )
-
-        # Fallback should NOT be attempted when provider is explicit
-        mock_fb.assert_not_called()
-
-    def test_connection_error_triggers_fallback_when_auto(self, monkeypatch):
-        """Connection errors also trigger fallback when provider is auto."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-
-        primary_client = MagicMock()
-        conn_err = Exception("Connection refused")
-        conn_err.status_code = None
-        primary_client.chat.completions.create.side_effect = conn_err
-
-        fallback_client = MagicMock()
-        fallback_response = MagicMock()
-        fallback_client.chat.completions.create.return_value = fallback_response
-
-        with patch("agent.auxiliary_client._get_cached_client",
-                    return_value=(primary_client, "model")), \
-             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("auto", "model", None, None, None)), \
-             patch("agent.auxiliary_client._is_connection_error", return_value=True), \
-             patch("agent.auxiliary_client._try_payment_fallback",
-                    return_value=(fallback_client, "fb-model", "nous")) as mock_fb:
-            result = call_llm(
-                task="compression",
-                messages=[{"role": "user", "content": "hello"}],
-            )
-
-        assert result is fallback_response
-        mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
-
    def test_non_payment_error_not_caught(self, monkeypatch):
        """Non-payment/non-connection errors (500) should NOT trigger fallback."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
@@ -1222,26 +608,6 @@ class TestCallLlmPaymentFallback:
                    messages=[{"role": "user", "content": "hello"}],
                )

-    def test_402_with_no_fallback_reraises(self, monkeypatch):
-        """When 402 hits and no fallback is available, the original error propagates."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-
-        primary_client = MagicMock()
-        primary_client.chat.completions.create.side_effect = self._make_402_error()
-
-        with patch("agent.auxiliary_client._get_cached_client",
-                    return_value=(primary_client, "google/gemini-3-flash-preview")), \
-             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
-             patch("agent.auxiliary_client._try_payment_fallback",
-                    return_value=(None, None, "")):
-            with pytest.raises(Exception, match="insufficient credits"):
-                call_llm(
-                    task="compression",
-                    messages=[{"role": "user", "content": "hello"}],
-                )
-
-
 # ---------------------------------------------------------------------------
 # Gate: _resolve_api_key_provider must skip anthropic when not configured
 # ---------------------------------------------------------------------------
@@ -1289,59 +655,11 @@ def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
 # ---------------------------------------------------------------------------


-class TestModelDefaultElimination:
-    """_resolve_api_key_provider must skip providers without known aux models."""
-
-    def test_unknown_provider_skipped(self, monkeypatch):
-        """Providers not in _API_KEY_PROVIDER_AUX_MODELS are skipped, not sent model='default'."""
-        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
-
-        # Verify our known providers have entries
-        assert "gemini" in _API_KEY_PROVIDER_AUX_MODELS
-        assert "kimi-coding" in _API_KEY_PROVIDER_AUX_MODELS
-
-        # A random provider_id not in the dict should return None
-        assert _API_KEY_PROVIDER_AUX_MODELS.get("totally-unknown-provider") is None
-
-    def test_known_provider_gets_real_model(self):
-        """Known providers get a real model name, not 'default'."""
-        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
-
-        for provider_id, model in _API_KEY_PROVIDER_AUX_MODELS.items():
-            assert model != "default", f"{provider_id} should not map to 'default'"
-            assert isinstance(model, str) and model.strip(), \
-                f"{provider_id} should have a non-empty model string"
-
-
 # ---------------------------------------------------------------------------
 # _try_payment_fallback reason parameter (#7512 bug 3)
 # ---------------------------------------------------------------------------


-class TestTryPaymentFallbackReason:
-    """_try_payment_fallback uses the reason parameter in log messages."""
-
-    def test_reason_parameter_passed_through(self, monkeypatch):
-        """The reason= parameter is accepted without error."""
-        from agent.auxiliary_client import _try_payment_fallback
-
-        # Mock the provider chain to return nothing
-        monkeypatch.setattr(
-            "agent.auxiliary_client._get_provider_chain",
-            lambda: [],
-        )
-        monkeypatch.setattr(
-            "agent.auxiliary_client._read_main_provider",
-            lambda: "",
-        )
-
-        client, model, label = _try_payment_fallback(
-            "openrouter", task="compression", reason="connection error"
-        )
-        assert client is None
-        assert label == ""
-
-
 # ---------------------------------------------------------------------------
 # _is_connection_error coverage
 # ---------------------------------------------------------------------------
@@ -1383,98 +701,6 @@ class TestIsConnectionError:
 # ---------------------------------------------------------------------------


-class TestAsyncCallLlmFallback:
-    """async_call_llm mirrors call_llm fallback behavior."""
-
-    def _make_402_error(self, msg="Payment Required: insufficient credits"):
-        exc = Exception(msg)
-        exc.status_code = 402
-        return exc
-
-    @pytest.mark.asyncio
-    async def test_402_triggers_async_fallback_when_auto(self, monkeypatch):
-        """When provider is auto and returns 402, async_call_llm tries fallback."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-
-        primary_client = MagicMock()
-        primary_client.chat.completions.create = AsyncMock(
-            side_effect=self._make_402_error())
-
-        # Fallback client (sync) returned by _try_payment_fallback
-        fb_sync_client = MagicMock()
-        fb_async_client = MagicMock()
-        fb_response = MagicMock()
-        fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
-
-        with patch("agent.auxiliary_client._get_cached_client",
-                    return_value=(primary_client, "google/gemini-3-flash-preview")), \
-             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
-             patch("agent.auxiliary_client._try_payment_fallback",
-                    return_value=(fb_sync_client, "gpt-5.2-codex", "openai-codex")) as mock_fb, \
-             patch("agent.auxiliary_client._to_async_client",
-                    return_value=(fb_async_client, "gpt-5.2-codex")):
-            result = await async_call_llm(
-                task="compression",
-                messages=[{"role": "user", "content": "hello"}],
-            )
-
-        assert result is fb_response
-        mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
-
-    @pytest.mark.asyncio
-    async def test_402_no_async_fallback_when_explicit(self, monkeypatch):
-        """When provider is explicit, 402 should NOT trigger async fallback."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-
-        primary_client = MagicMock()
-        primary_client.chat.completions.create = AsyncMock(
-            side_effect=self._make_402_error())
-
-        with patch("agent.auxiliary_client._get_cached_client",
-                    return_value=(primary_client, "local-model")), \
-             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("custom", "local-model", None, None, None)), \
-             patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
-            with pytest.raises(Exception, match="insufficient credits"):
-                await async_call_llm(
-                    task="compression",
-                    messages=[{"role": "user", "content": "hello"}],
-                )
-
-        mock_fb.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_connection_error_triggers_async_fallback(self, monkeypatch):
-        """Connection errors trigger async fallback when provider is auto."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-
-        primary_client = MagicMock()
-        conn_err = Exception("Connection refused")
-        conn_err.status_code = None
-        primary_client.chat.completions.create = AsyncMock(side_effect=conn_err)
-
-        fb_sync_client = MagicMock()
-        fb_async_client = MagicMock()
-        fb_response = MagicMock()
-        fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
-
-        with patch("agent.auxiliary_client._get_cached_client",
-                    return_value=(primary_client, "model")), \
-             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("auto", "model", None, None, None)), \
-             patch("agent.auxiliary_client._is_connection_error", return_value=True), \
-             patch("agent.auxiliary_client._try_payment_fallback",
-                    return_value=(fb_sync_client, "fb-model", "nous")) as mock_fb, \
-             patch("agent.auxiliary_client._to_async_client",
-                    return_value=(fb_async_client, "fb-model")):
-            result = await async_call_llm(
-                task="compression",
-                messages=[{"role": "user", "content": "hello"}],
-            )
-
-        assert result is fb_response
-        mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
 class TestStaleBaseUrlWarning:
    """_resolve_auto() warns when OPENAI_BASE_URL conflicts with config provider (#5161)."""

@@ -1546,24 +772,6 @@ class TestStaleBaseUrlWarning:
        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
            "Should NOT warn when OPENAI_BASE_URL is not set"

-    def test_warning_only_fires_once(self, monkeypatch, caplog):
-        """Warning is suppressed after the first invocation."""
-        import agent.auxiliary_client as mod
-        monkeypatch.setattr(mod, "_stale_base_url_warned", False)
-        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
-        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
-
-        with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
-             patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
-             caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
-            _resolve_auto()
-            caplog.clear()
-            _resolve_auto()
-
-        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
-            "Warning should not fire a second time"
-
-
 # ---------------------------------------------------------------------------
 # Anthropic-compatible image block conversion
 # ---------------------------------------------------------------------------
@@ -826,85 +826,6 @@ class TestGeminiCloudCodeClient:
        finally:
            client.close()

-    def test_create_with_mocked_http(self, monkeypatch):
-        """End-to-end: mock oauth + http, verify translation works."""
-        from agent import gemini_cloudcode_adapter, google_oauth
-        from agent.google_oauth import GoogleCredentials, save_credentials
-
-        # Set up logged-in state
-        save_credentials(GoogleCredentials(
-            access_token="bearer-tok",
-            refresh_token="rt",
-            expires_ms=int((time.time() + 3600) * 1000),
-            project_id="test-proj",
-        ))
-
-        # Mock the HTTP response
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = {
-            "response": {
-                "candidates": [{
-                    "content": {"parts": [{"text": "hello from mock"}]},
-                    "finishReason": "STOP",
-                }],
-                "usageMetadata": {
-                    "promptTokenCount": 5,
-                    "candidatesTokenCount": 3,
-                    "totalTokenCount": 8,
-                },
-            }
-        }
-
-        client = gemini_cloudcode_adapter.GeminiCloudCodeClient()
-        try:
-            with patch.object(client._http, "post", return_value=mock_response) as mock_post:
-                result = client.chat.completions.create(
-                    model="gemini-2.5-flash",
-                    messages=[{"role": "user", "content": "hi"}],
-                )
-            assert result.choices[0].message.content == "hello from mock"
-
-            # Verify the request was wrapped correctly
-            call_args = mock_post.call_args
-            assert "cloudcode-pa.googleapis.com" in call_args[0][0]
-            assert ":generateContent" in call_args[0][0]
-            json_body = call_args[1]["json"]
-            assert json_body["project"] == "test-proj"
-            assert json_body["model"] == "gemini-2.5-flash"
-            assert "request" in json_body
-            # Auth header
-            assert call_args[1]["headers"]["Authorization"] == "Bearer bearer-tok"
-        finally:
-            client.close()
-
-    def test_create_raises_on_http_error(self, monkeypatch):
-        from agent import gemini_cloudcode_adapter
-        from agent.google_oauth import GoogleCredentials, save_credentials
-
-        save_credentials(GoogleCredentials(
-            access_token="tok", refresh_token="rt",
-            expires_ms=int((time.time() + 3600) * 1000),
-            project_id="p",
-        ))
-
-        mock_response = MagicMock()
-        mock_response.status_code = 401
-        mock_response.text = "unauthorized"
-
-        client = gemini_cloudcode_adapter.GeminiCloudCodeClient()
-        try:
-            with patch.object(client._http, "post", return_value=mock_response):
-                with pytest.raises(gemini_cloudcode_adapter.CodeAssistError) as exc_info:
-                    client.chat.completions.create(
-                        model="gemini-2.5-flash",
-                        messages=[{"role": "user", "content": "hi"}],
-                    )
-            assert exc_info.value.code == "code_assist_unauthorized"
-        finally:
-            client.close()
-
-
 # =============================================================================
 # Provider registration
 # =============================================================================
@@ -916,14 +837,6 @@ class TestProviderRegistration:
        assert "google-gemini-cli" in PROVIDER_REGISTRY
        assert PROVIDER_REGISTRY["google-gemini-cli"].auth_type == "oauth_external"

-    @pytest.mark.parametrize("alias", [
-        "gemini-cli", "gemini-oauth", "google-gemini-cli",
-    ])
-    def test_alias_resolves(self, alias):
-        from hermes_cli.auth import resolve_provider
-
-        assert resolve_provider(alias) == "google-gemini-cli"
-
    def test_google_gemini_alias_still_goes_to_api_key_gemini(self):
        """Regression guard: don't shadow the existing google-gemini → gemini alias."""
        from hermes_cli.auth import resolve_provider
@@ -411,8 +411,10 @@ class TestTerminalFormatting:

        assert "Input tokens" in text
        assert "Output tokens" in text
-        assert "Est. cost" in text
-        assert "$" in text
+        # Cost and cache metrics are intentionally hidden (pricing was unreliable).
+        assert "Est. cost" not in text
+        assert "Cache read" not in text
+        assert "Cache write" not in text

    def test_terminal_format_shows_platforms(self, populated_db):
        engine = InsightsEngine(populated_db)
@@ -431,8 +433,8 @@ class TestTerminalFormatting:

        assert "█" in text  # Bar chart characters

-    def test_terminal_format_shows_na_for_custom_models(self, db):
-        """Custom models should show N/A instead of fake cost."""
+    def test_terminal_format_hides_cost_for_custom_models(self, db):
+        """Cost display is hidden entirely — custom models no longer show 'N/A' either."""
        db.create_session(session_id="s1", source="cli", model="my-custom-model")
        db.update_token_counts("s1", input_tokens=1000, output_tokens=500)
        db._conn.commit()
@@ -441,8 +443,9 @@ class TestTerminalFormatting:
        report = engine.generate(days=30)
        text = engine.format_terminal(report)

-        assert "N/A" in text
-        assert "custom/self-hosted" in text
+        assert "N/A" not in text
+        assert "custom/self-hosted" not in text
+        assert "Cost" not in text


 class TestGatewayFormatting:
@@ -461,13 +464,14 @@ class TestGatewayFormatting:

        assert "**" in text  # Markdown bold

-    def test_gateway_format_shows_cost(self, populated_db):
+    def test_gateway_format_hides_cost(self, populated_db):
        engine = InsightsEngine(populated_db)
        report = engine.generate(days=30)
        text = engine.format_gateway(report)

-        assert "$" in text
-        assert "Est. cost" in text
+        assert "$" not in text
+        assert "Est. cost" not in text
+        assert "cache" not in text.lower()

    def test_gateway_format_shows_models(self, populated_db):
        engine = InsightsEngine(populated_db)
@@ -1,7 +1,27 @@
-"""Shared fixtures for the hermes-agent test suite."""
+"""Shared fixtures for the hermes-agent test suite.
+
+Hermetic-test invariants enforced here (see AGENTS.md for rationale):
+
+1. **No credential env vars.** All provider/credential-shaped env vars
+   (ending in _API_KEY, _TOKEN, _SECRET, _PASSWORD, _CREDENTIALS, etc.)
+   are unset before every test. Local developer keys cannot leak in.
+2. **Isolated HERMES_HOME.** HERMES_HOME points to a per-test tempdir so
+   code reading ``~/.hermes/*`` via ``get_hermes_home()`` can't see the
+   real one. (We do NOT also redirect HOME — that broke subprocesses in
+   CI. Code using ``Path.home() / ".hermes"`` instead of the canonical
+   ``get_hermes_home()`` is a bug to fix at the callsite.)
+3. **Deterministic runtime.** TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0.
+4. **No HERMES_SESSION_* inheritance** — the agent's current gateway
+   session must not leak into tests.
+
+These invariants make the local test run match CI closely. Gaps that
+remain (CPU count, xdist worker count) are addressed by the canonical
+test runner at ``scripts/run_tests.sh``.
+"""

 import asyncio
 import os
+import re
 import signal
 import sys
 import tempfile
@@ -16,30 +36,215 @@ if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))


+# ── Credential env-var filter ──────────────────────────────────────────────
+#
+# Any env var in the current process matching ONE of these patterns is
+# unset for every test. Developers' local keys cannot leak into assertions
+# about "auto-detect provider when key present".
+
+_CREDENTIAL_SUFFIXES = (
+    "_API_KEY",
+    "_TOKEN",
+    "_SECRET",
+    "_PASSWORD",
+    "_CREDENTIALS",
+    "_ACCESS_KEY",
+    "_SECRET_ACCESS_KEY",
+    "_PRIVATE_KEY",
+    "_OAUTH_TOKEN",
+    "_WEBHOOK_SECRET",
+    "_ENCRYPT_KEY",
+    "_APP_SECRET",
+    "_CLIENT_SECRET",
+    "_CORP_SECRET",
+    "_AES_KEY",
+)
+
+# Explicit names (for ones that don't fit the suffix pattern)
+_CREDENTIAL_NAMES = frozenset({
+    "AWS_ACCESS_KEY_ID",
+    "AWS_SECRET_ACCESS_KEY",
+    "AWS_SESSION_TOKEN",
+    "ANTHROPIC_TOKEN",
+    "FAL_KEY",
+    "GH_TOKEN",
+    "GITHUB_TOKEN",
+    "OPENAI_API_KEY",
+    "OPENROUTER_API_KEY",
+    "NOUS_API_KEY",
+    "GEMINI_API_KEY",
+    "GOOGLE_API_KEY",
+    "GROQ_API_KEY",
+    "XAI_API_KEY",
+    "MISTRAL_API_KEY",
+    "DEEPSEEK_API_KEY",
+    "KIMI_API_KEY",
+    "MOONSHOT_API_KEY",
+    "GLM_API_KEY",
+    "ZAI_API_KEY",
+    "MINIMAX_API_KEY",
+    "OLLAMA_API_KEY",
+    "OPENVIKING_API_KEY",
+    "COPILOT_API_KEY",
+    "CLAUDE_CODE_OAUTH_TOKEN",
+    "BROWSERBASE_API_KEY",
+    "FIRECRAWL_API_KEY",
+    "PARALLEL_API_KEY",
+    "EXA_API_KEY",
+    "TAVILY_API_KEY",
+    "WANDB_API_KEY",
+    "ELEVENLABS_API_KEY",
+    "HONCHO_API_KEY",
+    "MEM0_API_KEY",
+    "SUPERMEMORY_API_KEY",
+    "RETAINDB_API_KEY",
+    "HINDSIGHT_API_KEY",
+    "HINDSIGHT_LLM_API_KEY",
+    "TINKER_API_KEY",
+    "DAYTONA_API_KEY",
+    "TWILIO_AUTH_TOKEN",
+    "TELEGRAM_BOT_TOKEN",
+    "DISCORD_BOT_TOKEN",
+    "SLACK_BOT_TOKEN",
+    "SLACK_APP_TOKEN",
+    "MATTERMOST_TOKEN",
+    "MATRIX_ACCESS_TOKEN",
+    "MATRIX_PASSWORD",
+    "MATRIX_RECOVERY_KEY",
+    "HASS_TOKEN",
+    "EMAIL_PASSWORD",
+    "BLUEBUBBLES_PASSWORD",
+    "FEISHU_APP_SECRET",
+    "FEISHU_ENCRYPT_KEY",
+    "FEISHU_VERIFICATION_TOKEN",
+    "DINGTALK_CLIENT_SECRET",
+    "QQ_CLIENT_SECRET",
+    "QQ_STT_API_KEY",
+    "WECOM_SECRET",
+    "WECOM_CALLBACK_CORP_SECRET",
+    "WECOM_CALLBACK_TOKEN",
+    "WECOM_CALLBACK_ENCODING_AES_KEY",
+    "WEIXIN_TOKEN",
+    "MODAL_TOKEN_ID",
+    "MODAL_TOKEN_SECRET",
+    "TERMINAL_SSH_KEY",
+    "SUDO_PASSWORD",
+    "GATEWAY_PROXY_KEY",
+    "API_SERVER_KEY",
+    "TOOL_GATEWAY_USER_TOKEN",
+    "TELEGRAM_WEBHOOK_SECRET",
+    "WEBHOOK_SECRET",
+    "AI_GATEWAY_API_KEY",
+    "VOICE_TOOLS_OPENAI_KEY",
+    "BROWSER_USE_API_KEY",
+    "CUSTOM_API_KEY",
+    "GATEWAY_PROXY_URL",
+    "GEMINI_BASE_URL",
+    "OPENAI_BASE_URL",
+    "OPENROUTER_BASE_URL",
+    "OLLAMA_BASE_URL",
+    "GROQ_BASE_URL",
+    "XAI_BASE_URL",
+    "AI_GATEWAY_BASE_URL",
+    "ANTHROPIC_BASE_URL",
+})
+
+
+def _looks_like_credential(name: str) -> bool:
+    """True if env var name matches a credential-shaped pattern."""
+    if name in _CREDENTIAL_NAMES:
+        return True
+    return any(name.endswith(suf) for suf in _CREDENTIAL_SUFFIXES)
+
+
+# HERMES_* vars that change test behavior by being set. Unset all of these
+# unconditionally — individual tests that need them set do so explicitly.
+_HERMES_BEHAVIORAL_VARS = frozenset({
+    "HERMES_YOLO_MODE",
+    "HERMES_INTERACTIVE",
+    "HERMES_QUIET",
+    "HERMES_TOOL_PROGRESS",
+    "HERMES_TOOL_PROGRESS_MODE",
+    "HERMES_MAX_ITERATIONS",
+    "HERMES_SESSION_PLATFORM",
+    "HERMES_SESSION_CHAT_ID",
+    "HERMES_SESSION_CHAT_NAME",
+    "HERMES_SESSION_THREAD_ID",
+    "HERMES_SESSION_SOURCE",
+    "HERMES_SESSION_KEY",
+    "HERMES_GATEWAY_SESSION",
+    "HERMES_PLATFORM",
+    "HERMES_INFERENCE_PROVIDER",
+    "HERMES_MANAGED",
+    "HERMES_DEV",
+    "HERMES_CONTAINER",
+    "HERMES_EPHEMERAL_SYSTEM_PROMPT",
+    "HERMES_TIMEZONE",
+    "HERMES_REDACT_SECRETS",
+    "HERMES_BACKGROUND_NOTIFICATIONS",
+    "HERMES_EXEC_ASK",
+    "HERMES_HOME_MODE",
+})
+
+
@pytest.fixture(autouse=True)
-def _isolate_hermes_home(tmp_path, monkeypatch):
-    """Redirect HERMES_HOME to a temp dir so tests never write to ~/.hermes/."""
-    fake_home = tmp_path / "hermes_test"
-    fake_home.mkdir()
-    (fake_home / "sessions").mkdir()
-    (fake_home / "cron").mkdir()
-    (fake_home / "memories").mkdir()
-    (fake_home / "skills").mkdir()
-    monkeypatch.setenv("HERMES_HOME", str(fake_home))
-    # Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/
+def _hermetic_environment(tmp_path, monkeypatch):
+    """Blank out all credential/behavioral env vars so local and CI match.
+
+    Also redirects HOME and HERMES_HOME to per-test tempdirs so code that
+    reads ``~/.hermes/*`` can't touch the real one, and pins TZ/LANG so
+    datetime/locale-sensitive tests are deterministic.
+    """
+    # 1. Blank every credential-shaped env var that's currently set.
+    for name in list(os.environ.keys()):
+        if _looks_like_credential(name):
+            monkeypatch.delenv(name, raising=False)
+
+    # 2. Blank behavioral HERMES_* vars that could change test semantics.
+    for name in _HERMES_BEHAVIORAL_VARS:
+        monkeypatch.delenv(name, raising=False)
+
+    # 3. Redirect HERMES_HOME to a per-test tempdir. Code that reads
+    #    ``~/.hermes/*`` via ``get_hermes_home()`` now gets the tempdir.
+    #
+    #    NOTE: We do NOT also redirect HOME. Doing so broke CI because
+    #    some tests (and their transitive deps) spawn subprocesses that
+    #    inherit HOME and expect it to be stable. If a test genuinely
+    #    needs HOME isolated, it should set it explicitly in its own
+    #    fixture. Any code in the codebase reading ``~/.hermes/*`` via
+    #    ``Path.home() / ".hermes"`` instead of ``get_hermes_home()``
+    #    is a bug to fix at the callsite.
+    fake_hermes_home = tmp_path / "hermes_test"
+    fake_hermes_home.mkdir()
+    (fake_hermes_home / "sessions").mkdir()
+    (fake_hermes_home / "cron").mkdir()
+    (fake_hermes_home / "memories").mkdir()
+    (fake_hermes_home / "skills").mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(fake_hermes_home))
+
+    # 4. Deterministic locale / timezone / hashseed. CI runs in UTC with
+    #    C.UTF-8 locale; local dev often doesn't. Pin everything.
+    monkeypatch.setenv("TZ", "UTC")
+    monkeypatch.setenv("LANG", "C.UTF-8")
+    monkeypatch.setenv("LC_ALL", "C.UTF-8")
+    monkeypatch.setenv("PYTHONHASHSEED", "0")
+
+    # 5. Reset plugin singleton so tests don't leak plugins from
+    #    ~/.hermes/plugins/ (which, per step 3, is now empty — but the
+    #    singleton might still be cached from a previous test).
    try:
        import hermes_cli.plugins as _plugins_mod
        monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
    except Exception:
        pass
-    # Tests should not inherit the agent's current gateway/messaging surface.
-    # Individual tests that need gateway behavior set these explicitly.
-    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
-    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
-    monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
-    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
-    # Avoid making real calls during tests if this key is set in the env files
-    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+
+# Backward-compat alias — old tests reference this fixture name. Keep it
+# as a no-op wrapper so imports don't break.
+@pytest.fixture(autouse=True)
+def _isolate_hermes_home(_hermetic_environment):
+    """Alias preserved for any test that yields this name explicitly."""
+    return None


@pytest.fixture()
@@ -64,6 +64,60 @@ class TestResolveDeliveryTarget:
            "thread_id": "17585",
        }

+    @pytest.mark.parametrize(
+        ("platform", "env_var", "chat_id"),
+        [
+            ("matrix", "MATRIX_HOME_ROOM", "!bot-room:example.org"),
+            ("signal", "SIGNAL_HOME_CHANNEL", "+15551234567"),
+            ("mattermost", "MATTERMOST_HOME_CHANNEL", "team-town-square"),
+            ("sms", "SMS_HOME_CHANNEL", "+15557654321"),
+            ("email", "EMAIL_HOME_ADDRESS", "home@example.com"),
+            ("dingtalk", "DINGTALK_HOME_CHANNEL", "cidNNN"),
+            ("feishu", "FEISHU_HOME_CHANNEL", "oc_home"),
+            ("wecom", "WECOM_HOME_CHANNEL", "wecom-home"),
+            ("weixin", "WEIXIN_HOME_CHANNEL", "wxid_home"),
+            ("qqbot", "QQ_HOME_CHANNEL", "group-openid-home"),
+        ],
+    )
+    def test_origin_delivery_without_origin_falls_back_to_supported_home_channels(
+        self, monkeypatch, platform, env_var, chat_id
+    ):
+        for fallback_env in (
+            "MATRIX_HOME_ROOM",
+            "MATRIX_HOME_CHANNEL",
+            "TELEGRAM_HOME_CHANNEL",
+            "DISCORD_HOME_CHANNEL",
+            "SLACK_HOME_CHANNEL",
+            "SIGNAL_HOME_CHANNEL",
+            "MATTERMOST_HOME_CHANNEL",
+            "SMS_HOME_CHANNEL",
+            "EMAIL_HOME_ADDRESS",
+            "DINGTALK_HOME_CHANNEL",
+            "BLUEBUBBLES_HOME_CHANNEL",
+            "FEISHU_HOME_CHANNEL",
+            "WECOM_HOME_CHANNEL",
+            "WEIXIN_HOME_CHANNEL",
+            "QQ_HOME_CHANNEL",
+        ):
+            monkeypatch.delenv(fallback_env, raising=False)
+        monkeypatch.setenv(env_var, chat_id)
+
+        assert _resolve_delivery_target({"deliver": "origin"}) == {
+            "platform": platform,
+            "chat_id": chat_id,
+            "thread_id": None,
+        }
+
+    def test_bare_matrix_delivery_uses_matrix_home_room(self, monkeypatch):
+        monkeypatch.delenv("MATRIX_HOME_CHANNEL", raising=False)
+        monkeypatch.setenv("MATRIX_HOME_ROOM", "!room123:example.org")
+
+        assert _resolve_delivery_target({"deliver": "matrix"}) == {
+            "platform": "matrix",
+            "chat_id": "!room123:example.org",
+            "thread_id": None,
+        }
+
    def test_explicit_telegram_topic_target_with_thread_id(self):
        """deliver: 'telegram:chat_id:thread_id' parses correctly."""
        job = {
@@ -548,41 +602,6 @@ class TestDeliverResultWrapping:
 class TestDeliverResultErrorReturns:
    """Verify _deliver_result returns error strings on failure, None on success."""

-    def test_returns_none_on_successful_delivery(self):
-        from gateway.config import Platform
-
-        pconfig = MagicMock()
-        pconfig.enabled = True
-        mock_cfg = MagicMock()
-        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
-
-        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
-             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})):
-            job = {
-                "id": "ok-job",
-                "deliver": "origin",
-                "origin": {"platform": "telegram", "chat_id": "123"},
-            }
-            result = _deliver_result(job, "Output.")
-        assert result is None
-
-    def test_returns_none_for_local_delivery(self):
-        """local-only jobs don't deliver — not a failure."""
-        job = {"id": "local-job", "deliver": "local"}
-        result = _deliver_result(job, "Output.")
-        assert result is None
-
-    def test_returns_error_for_unknown_platform(self):
-        job = {
-            "id": "bad-platform",
-            "deliver": "origin",
-            "origin": {"platform": "fax", "chat_id": "123"},
-        }
-        with patch("gateway.config.load_gateway_config"):
-            result = _deliver_result(job, "Output.")
-        assert result is not None
-        assert "unknown platform" in result
-
    def test_returns_error_when_platform_disabled(self):
        from gateway.config import Platform

@@ -601,25 +620,6 @@ class TestDeliverResultErrorReturns:
        assert result is not None
        assert "not configured" in result

-    def test_returns_error_on_send_failure(self):
-        from gateway.config import Platform
-
-        pconfig = MagicMock()
-        pconfig.enabled = True
-        mock_cfg = MagicMock()
-        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
-
-        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
-             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"error": "rate limited"})):
-            job = {
-                "id": "rate-limited",
-                "deliver": "origin",
-                "origin": {"platform": "telegram", "chat_id": "123"},
-            }
-            result = _deliver_result(job, "Output.")
-        assert result is not None
-        assert "rate limited" in result
-
    def test_returns_error_for_unresolved_target(self, monkeypatch):
        """Non-local delivery with no resolvable target should return an error."""
        monkeypatch.delenv("TELEGRAM_HOME_CHANNEL", raising=False)
@@ -864,57 +864,6 @@ class TestRunJobConfigLogging:
            f"Expected 'failed to parse prefill messages' warning in logs, got: {[r.message for r in caplog.records]}"


-class TestRunJobPerJobOverrides:
-    def test_job_level_model_provider_and_base_url_overrides_are_used(self, tmp_path):
-        config_yaml = tmp_path / "config.yaml"
-        config_yaml.write_text(
-            "model:\n"
-            "  default: gpt-5.4\n"
-            "  provider: openai-codex\n"
-            "  base_url: https://chatgpt.com/backend-api/codex\n"
-        )
-
-        job = {
-            "id": "briefing-job",
-            "name": "briefing",
-            "prompt": "hello",
-            "model": "perplexity/sonar-pro",
-            "provider": "custom",
-            "base_url": "http://127.0.0.1:4000/v1",
-        }
-
-        fake_db = MagicMock()
-        fake_runtime = {
-            "provider": "openrouter",
-            "api_mode": "chat_completions",
-            "base_url": "http://127.0.0.1:4000/v1",
-            "api_key": "***",
-        }
-
-        with patch("cron.scheduler._hermes_home", tmp_path), \
-             patch("cron.scheduler._resolve_origin", return_value=None), \
-             patch("dotenv.load_dotenv"), \
-             patch("hermes_state.SessionDB", return_value=fake_db), \
-             patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_runtime) as runtime_mock, \
-             patch("run_agent.AIAgent") as mock_agent_cls:
-            mock_agent = MagicMock()
-            mock_agent.run_conversation.return_value = {"final_response": "ok"}
-            mock_agent_cls.return_value = mock_agent
-
-            success, output, final_response, error = run_job(job)
-
-        assert success is True
-        assert error is None
-        assert final_response == "ok"
-        assert "ok" in output
-        runtime_mock.assert_called_once_with(
-            requested="custom",
-            explicit_base_url="http://127.0.0.1:4000/v1",
-        )
-        assert mock_agent_cls.call_args.kwargs["model"] == "perplexity/sonar-pro"
-        fake_db.close.assert_called_once()
-
-
 class TestRunJobSkillBacked:
    def test_run_job_preserves_skill_env_passthrough_into_worker_thread(self, tmp_path):
        job = {
@@ -1128,16 +1077,6 @@ class TestSilentDelivery:
            "origin": {"platform": "telegram", "chat_id": "123"},
        }

-    def test_normal_response_delivers(self):
-        with patch("cron.scheduler.get_due_jobs", return_value=[self._make_job()]), \
-             patch("cron.scheduler.run_job", return_value=(True, "# output", "Results here", None)), \
-             patch("cron.scheduler.save_job_output", return_value="/tmp/out.md"), \
-             patch("cron.scheduler._deliver_result") as deliver_mock, \
-             patch("cron.scheduler.mark_job_run"):
-            from cron.scheduler import tick
-            tick(verbose=False)
-        deliver_mock.assert_called_once()
-
    def test_silent_response_suppresses_delivery(self, caplog):
        with patch("cron.scheduler.get_due_jobs", return_value=[self._make_job()]), \
             patch("cron.scheduler.run_job", return_value=(True, "# output", "[SILENT]", None)), \
@@ -1277,44 +1216,6 @@ class TestBuildJobPromptMissingSkill:
        assert "go" in result


-class TestTickAdvanceBeforeRun:
-    """Verify that tick() calls advance_next_run before run_job for crash safety."""
-
-    def test_advance_called_before_run_job(self, tmp_path):
-        """advance_next_run must be called before run_job to prevent crash-loop re-fires."""
-        call_order = []
-
-        def fake_advance(job_id):
-            call_order.append(("advance", job_id))
-            return True
-
-        def fake_run_job(job):
-            call_order.append(("run", job["id"]))
-            return True, "output", "response", None
-
-        fake_job = {
-            "id": "test-advance",
-            "name": "test",
-            "prompt": "hello",
-            "enabled": True,
-            "schedule": {"kind": "cron", "expr": "15 6 * * *"},
-        }
-
-        with patch("cron.scheduler.get_due_jobs", return_value=[fake_job]), \
-             patch("cron.scheduler.advance_next_run", side_effect=fake_advance) as adv_mock, \
-             patch("cron.scheduler.run_job", side_effect=fake_run_job), \
-             patch("cron.scheduler.save_job_output", return_value=tmp_path / "out.md"), \
-             patch("cron.scheduler.mark_job_run"), \
-             patch("cron.scheduler._deliver_result"):
-            from cron.scheduler import tick
-            executed = tick(verbose=False)
-
-        assert executed == 1
-        adv_mock.assert_called_once_with("test-advance")
-        # advance must happen before run
-        assert call_order == [("advance", "test-advance"), ("run", "test-advance")]
-
-
 class TestSendMediaViaAdapter:
    """Unit tests for _send_media_via_adapter — routes files to typed adapter methods."""

@@ -1358,12 +1259,3 @@ class TestSendMediaViaAdapter:
        self._run_with_loop(adapter, "123", media_files, None, {"id": "j3"})
        adapter.send_voice.assert_called_once()
        adapter.send_image_file.assert_called_once()
-
-    def test_single_failure_does_not_block_others(self):
-        adapter = MagicMock()
-        adapter.send_voice = AsyncMock(side_effect=RuntimeError("network error"))
-        adapter.send_image_file = AsyncMock()
-        media_files = [("/tmp/voice.ogg", False), ("/tmp/photo.png", False)]
-        self._run_with_loop(adapter, "123", media_files, None, {"id": "j4"})
-        adapter.send_voice.assert_called_once()
-        adapter.send_image_file.assert_called_once()
@@ -258,3 +258,785 @@ class TestAgentCacheLifecycle:
        cb3 = lambda *a: None
        agent.tool_progress_callback = cb3
        assert agent.tool_progress_callback is cb3
+
+
+class TestAgentCacheBoundedGrowth:
+    """LRU cap and idle-TTL eviction prevent unbounded cache growth."""
+
+    def _bounded_runner(self):
+        """Runner with an OrderedDict cache (matches real gateway init)."""
+        from collections import OrderedDict
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._agent_cache = OrderedDict()
+        runner._agent_cache_lock = threading.Lock()
+        return runner
+
+    def _fake_agent(self, last_activity: float | None = None):
+        """Lightweight stand-in; real AIAgent is heavy to construct."""
+        m = MagicMock()
+        if last_activity is not None:
+            m._last_activity_ts = last_activity
+        else:
+            import time as _t
+            m._last_activity_ts = _t.time()
+        return m
+
+    def test_cap_evicts_lru_when_exceeded(self, monkeypatch):
+        """Inserting past _AGENT_CACHE_MAX_SIZE pops the oldest entry."""
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 3)
+        runner = self._bounded_runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        for i in range(3):
+            runner._agent_cache[f"s{i}"] = (self._fake_agent(), f"sig{i}")
+
+        # Insert a 4th — oldest (s0) must be evicted.
+        with runner._agent_cache_lock:
+            runner._agent_cache["s3"] = (self._fake_agent(), "sig3")
+            runner._enforce_agent_cache_cap()
+
+        assert "s0" not in runner._agent_cache
+        assert "s3" in runner._agent_cache
+        assert len(runner._agent_cache) == 3
+
+    def test_cap_respects_move_to_end(self, monkeypatch):
+        """Entries refreshed via move_to_end are NOT evicted as 'oldest'."""
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 3)
+        runner = self._bounded_runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        for i in range(3):
+            runner._agent_cache[f"s{i}"] = (self._fake_agent(), f"sig{i}")
+
+        # Touch s0 — it is now MRU, so s1 becomes LRU.
+        runner._agent_cache.move_to_end("s0")
+
+        with runner._agent_cache_lock:
+            runner._agent_cache["s3"] = (self._fake_agent(), "sig3")
+            runner._enforce_agent_cache_cap()
+
+        assert "s0" in runner._agent_cache  # rescued by move_to_end
+        assert "s1" not in runner._agent_cache  # now oldest → evicted
+        assert "s3" in runner._agent_cache
+
+    def test_cap_triggers_cleanup_thread(self, monkeypatch):
+        """Evicted agent has release_clients() called for it (soft cleanup).
+
+        Uses the soft path (_release_evicted_agent_soft), NOT the hard
+        _cleanup_agent_resources — cache eviction must not tear down
+        per-task state (terminal/browser/bg procs).
+        """
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1)
+        runner = self._bounded_runner()
+
+        release_calls: list = []
+        cleanup_calls: list = []
+        # Intercept both paths; only release_clients path should fire.
+        def _soft(agent):
+            release_calls.append(agent)
+        runner._release_evicted_agent_soft = _soft
+        runner._cleanup_agent_resources = lambda a: cleanup_calls.append(a)
+
+        old_agent = self._fake_agent()
+        new_agent = self._fake_agent()
+        with runner._agent_cache_lock:
+            runner._agent_cache["old"] = (old_agent, "sig_old")
+            runner._agent_cache["new"] = (new_agent, "sig_new")
+            runner._enforce_agent_cache_cap()
+
+        # Cleanup is dispatched to a daemon thread; join briefly to observe.
+        import time as _t
+        deadline = _t.time() + 2.0
+        while _t.time() < deadline and not release_calls:
+            _t.sleep(0.02)
+        assert old_agent in release_calls
+        assert new_agent not in release_calls
+        # Hard-cleanup path must NOT have fired — that's for session expiry only.
+        assert cleanup_calls == []
+
+    def test_idle_ttl_sweep_evicts_stale_agents(self, monkeypatch):
+        """_sweep_idle_cached_agents removes agents idle past the TTL."""
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.05)
+        runner = self._bounded_runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        import time as _t
+        fresh = self._fake_agent(last_activity=_t.time())
+        stale = self._fake_agent(last_activity=_t.time() - 10.0)
+        runner._agent_cache["fresh"] = (fresh, "s1")
+        runner._agent_cache["stale"] = (stale, "s2")
+
+        evicted = runner._sweep_idle_cached_agents()
+        assert evicted == 1
+        assert "stale" not in runner._agent_cache
+        assert "fresh" in runner._agent_cache
+
+    def test_idle_sweep_skips_agents_without_activity_ts(self, monkeypatch):
+        """Agents missing _last_activity_ts are left alone (defensive)."""
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.01)
+        runner = self._bounded_runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        no_ts = MagicMock(spec=[])  # no _last_activity_ts attribute
+        runner._agent_cache["s"] = (no_ts, "sig")
+
+        assert runner._sweep_idle_cached_agents() == 0
+        assert "s" in runner._agent_cache
+
+    def test_plain_dict_cache_is_tolerated(self):
+        """Test fixtures using plain {} don't crash _enforce_agent_cache_cap."""
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._agent_cache = {}  # plain dict, not OrderedDict
+        runner._agent_cache_lock = threading.Lock()
+        runner._cleanup_agent_resources = MagicMock()
+
+        # Should be a no-op rather than raising.
+        with runner._agent_cache_lock:
+            for i in range(200):
+                runner._agent_cache[f"s{i}"] = (MagicMock(), f"sig{i}")
+            runner._enforce_agent_cache_cap()  # no crash, no eviction
+
+        assert len(runner._agent_cache) == 200
+
+    def test_main_lookup_updates_lru_order(self, monkeypatch):
+        """Cache hit via the main-lookup path refreshes LRU position."""
+        runner = self._bounded_runner()
+
+        a0 = self._fake_agent()
+        a1 = self._fake_agent()
+        a2 = self._fake_agent()
+        runner._agent_cache["s0"] = (a0, "sig0")
+        runner._agent_cache["s1"] = (a1, "sig1")
+        runner._agent_cache["s2"] = (a2, "sig2")
+
+        # Simulate what _process_message_background does on a cache hit
+        # (minus the agent-state reset which isn't relevant here).
+        with runner._agent_cache_lock:
+            cached = runner._agent_cache.get("s0")
+            if cached and hasattr(runner._agent_cache, "move_to_end"):
+                runner._agent_cache.move_to_end("s0")
+
+        # After the hit, insertion order should be s1, s2, s0.
+        assert list(runner._agent_cache.keys()) == ["s1", "s2", "s0"]
+
+
+class TestAgentCacheActiveSafety:
+    """Safety: eviction must not tear down agents currently mid-turn.
+
+    AIAgent.close() kills process_registry entries for the task, cleans
+    the terminal sandbox, closes the OpenAI client, and cascades
+    .close() into active child subagents.  Calling it while the agent
+    is still processing would crash the in-flight request.  These tests
+    pin that eviction skips any agent present in _running_agents.
+    """
+
+    def _runner(self):
+        from collections import OrderedDict
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._agent_cache = OrderedDict()
+        runner._agent_cache_lock = threading.Lock()
+        runner._running_agents = {}
+        return runner
+
+    def _fake_agent(self, idle_seconds: float = 0.0):
+        import time as _t
+        m = MagicMock()
+        m._last_activity_ts = _t.time() - idle_seconds
+        return m
+
+    def test_cap_skips_active_lru_entry(self, monkeypatch):
+        """Active LRU entry is skipped; cache stays over cap rather than
+        compensating by evicting a newer entry.
+
+        Rationale: evicting a more-recent entry just because the oldest
+        slot is temporarily locked would punish the most recently-
+        inserted session (which has no cache to preserve) to protect
+        one that happens to be mid-turn.  Better to let the cache stay
+        transiently over cap and re-check on the next insert.
+        """
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 2)
+        runner = self._runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        active = self._fake_agent()
+        idle_a = self._fake_agent()
+        idle_b = self._fake_agent()
+
+        # Insertion order: active (oldest), idle_a, idle_b.
+        runner._agent_cache["session-active"] = (active, "sig")
+        runner._agent_cache["session-idle-a"] = (idle_a, "sig")
+        runner._agent_cache["session-idle-b"] = (idle_b, "sig")
+
+        # Mark `active` as mid-turn — it's LRU, but protected.
+        runner._running_agents["session-active"] = active
+
+        with runner._agent_cache_lock:
+            runner._enforce_agent_cache_cap()
+
+        # All three remain; no eviction ran, no cleanup dispatched.
+        assert "session-active" in runner._agent_cache
+        assert "session-idle-a" in runner._agent_cache
+        assert "session-idle-b" in runner._agent_cache
+        assert runner._cleanup_agent_resources.call_count == 0
+
+    def test_cap_evicts_when_multiple_excess_and_some_inactive(self, monkeypatch):
+        """Mixed active/idle in the LRU excess window: only the idle ones go.
+
+        With CAP=2 and 4 entries, excess=2 (the two oldest).  If the
+        oldest is active and the next is idle, we evict exactly one.
+        Cache ends at CAP+1, which is still better than unbounded.
+        """
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 2)
+        runner = self._runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        oldest_active = self._fake_agent()
+        idle_second = self._fake_agent()
+        idle_third = self._fake_agent()
+        idle_fourth = self._fake_agent()
+
+        runner._agent_cache["s1"] = (oldest_active, "sig")
+        runner._agent_cache["s2"] = (idle_second, "sig")  # in excess window, idle
+        runner._agent_cache["s3"] = (idle_third, "sig")
+        runner._agent_cache["s4"] = (idle_fourth, "sig")
+
+        runner._running_agents["s1"] = oldest_active  # oldest is mid-turn
+
+        with runner._agent_cache_lock:
+            runner._enforce_agent_cache_cap()
+
+        # s1 protected (active), s2 evicted (idle + in excess window),
+        # s3 and s4 untouched (outside excess window).
+        assert "s1" in runner._agent_cache
+        assert "s2" not in runner._agent_cache
+        assert "s3" in runner._agent_cache
+        assert "s4" in runner._agent_cache
+
+    def test_cap_leaves_cache_over_limit_if_all_active(self, monkeypatch, caplog):
+        """If every over-cap entry is mid-turn, the cache stays over cap.
+
+        Better to temporarily exceed the cap than to crash an in-flight
+        turn by tearing down its clients.
+        """
+        from gateway import run as gw_run
+        import logging as _logging
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1)
+        runner = self._runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        a1 = self._fake_agent()
+        a2 = self._fake_agent()
+        a3 = self._fake_agent()
+        runner._agent_cache["s1"] = (a1, "sig")
+        runner._agent_cache["s2"] = (a2, "sig")
+        runner._agent_cache["s3"] = (a3, "sig")
+
+        # All three are mid-turn.
+        runner._running_agents["s1"] = a1
+        runner._running_agents["s2"] = a2
+        runner._running_agents["s3"] = a3
+
+        with caplog.at_level(_logging.WARNING, logger="gateway.run"):
+            with runner._agent_cache_lock:
+                runner._enforce_agent_cache_cap()
+
+        # Cache unchanged because eviction had to skip every candidate.
+        assert len(runner._agent_cache) == 3
+        # _cleanup_agent_resources must NOT have been scheduled.
+        assert runner._cleanup_agent_resources.call_count == 0
+        # And we logged a warning so operators can see the condition.
+        assert any("mid-turn" in r.message for r in caplog.records)
+
+    def test_cap_pending_sentinel_does_not_block_eviction(self, monkeypatch):
+        """_AGENT_PENDING_SENTINEL in _running_agents is treated as 'not active'.
+
+        The sentinel is set while an agent is being CONSTRUCTED, before the
+        real AIAgent instance exists.  Cached agents from other sessions
+        can still be evicted safely.
+        """
+        from gateway import run as gw_run
+        from gateway.run import _AGENT_PENDING_SENTINEL
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1)
+        runner = self._runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        a1 = self._fake_agent()
+        a2 = self._fake_agent()
+        runner._agent_cache["s1"] = (a1, "sig")
+        runner._agent_cache["s2"] = (a2, "sig")
+        # Another session is mid-creation — sentinel, no real agent yet.
+        runner._running_agents["s3-being-created"] = _AGENT_PENDING_SENTINEL
+
+        with runner._agent_cache_lock:
+            runner._enforce_agent_cache_cap()
+
+        assert "s1" not in runner._agent_cache  # evicted normally
+        assert "s2" in runner._agent_cache
+
+    def test_idle_sweep_skips_active_agent(self, monkeypatch):
+        """Idle-TTL sweep must not tear down an active agent even if 'stale'."""
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.01)
+        runner = self._runner()
+        runner._cleanup_agent_resources = MagicMock()
+
+        old_but_active = self._fake_agent(idle_seconds=10.0)
+        runner._agent_cache["s1"] = (old_but_active, "sig")
+        runner._running_agents["s1"] = old_but_active
+
+        evicted = runner._sweep_idle_cached_agents()
+
+        assert evicted == 0
+        assert "s1" in runner._agent_cache
+        assert runner._cleanup_agent_resources.call_count == 0
+
+    def test_eviction_does_not_close_active_agent_client(self, monkeypatch):
+        """Live test: evicting an active agent does NOT null its .client.
+
+        This reproduces the original concern — if eviction fired while an
+        agent was mid-turn, `agent.close()` would set `self.client = None`
+        and the next API call inside the loop would crash.  With the
+        active-agent skip, the client stays intact.
+        """
+        from gateway import run as gw_run
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1)
+        runner = self._runner()
+
+        # Build a proper fake agent whose close() matches AIAgent's contract.
+        active = MagicMock()
+        active._last_activity_ts = __import__("time").time()
+        active.client = MagicMock()  # simulate an OpenAI client
+        def _real_close():
+            active.client = None  # mirrors run_agent.py:3299
+        active.close = _real_close
+        active.shutdown_memory_provider = MagicMock()
+
+        idle = self._fake_agent()
+
+        runner._agent_cache["active-session"] = (active, "sig")
+        runner._agent_cache["idle-session"] = (idle, "sig")
+        runner._running_agents["active-session"] = active
+
+        # Real cleanup function, not mocked — we want to see whether close()
+        # runs on the active agent.  (It shouldn't.)
+        with runner._agent_cache_lock:
+            runner._enforce_agent_cache_cap()
+
+        # Let any eviction cleanup threads drain.
+        import time as _t
+        _t.sleep(0.2)
+
+        # The ACTIVE agent's client must still be usable.
+        assert active.client is not None, (
+            "Active agent's client was closed by eviction — "
+            "running turn would crash on its next API call."
+        )
+
+
+class TestAgentCacheSpilloverLive:
+    """Live E2E: fill cache with real AIAgent instances and stress it."""
+
+    def _runner(self):
+        from collections import OrderedDict
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._agent_cache = OrderedDict()
+        runner._agent_cache_lock = threading.Lock()
+        runner._running_agents = {}
+        return runner
+
+    def _real_agent(self):
+        """A genuine AIAgent; no API calls are made during these tests."""
+        from run_agent import AIAgent
+        return AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+            platform="telegram",
+        )
+
+    def test_fill_to_cap_then_spillover(self, monkeypatch):
+        """Fill to cap with real agents, insert one more, oldest evicted."""
+        from gateway import run as gw_run
+
+        CAP = 8
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP)
+        runner = self._runner()
+
+        agents = [self._real_agent() for _ in range(CAP)]
+        for i, a in enumerate(agents):
+            with runner._agent_cache_lock:
+                runner._agent_cache[f"s{i}"] = (a, "sig")
+                runner._enforce_agent_cache_cap()
+        assert len(runner._agent_cache) == CAP
+
+        # Spillover insertion.
+        newcomer = self._real_agent()
+        with runner._agent_cache_lock:
+            runner._agent_cache["new"] = (newcomer, "sig")
+            runner._enforce_agent_cache_cap()
+
+        # Oldest (s0) evicted, cap still CAP.
+        assert "s0" not in runner._agent_cache
+        assert "new" in runner._agent_cache
+        assert len(runner._agent_cache) == CAP
+
+        # Clean up so pytest doesn't leak resources.
+        for a in agents + [newcomer]:
+            try:
+                a.close()
+            except Exception:
+                pass
+
+    def test_spillover_all_active_keeps_cache_over_cap(self, monkeypatch, caplog):
+        """Every slot active: cache goes over cap, no one gets torn down."""
+        from gateway import run as gw_run
+        import logging as _logging
+
+        CAP = 4
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP)
+        runner = self._runner()
+
+        agents = [self._real_agent() for _ in range(CAP)]
+        for i, a in enumerate(agents):
+            runner._agent_cache[f"s{i}"] = (a, "sig")
+            runner._running_agents[f"s{i}"] = a  # every session mid-turn
+
+        newcomer = self._real_agent()
+        with caplog.at_level(_logging.WARNING, logger="gateway.run"):
+            with runner._agent_cache_lock:
+                runner._agent_cache["new"] = (newcomer, "sig")
+                runner._enforce_agent_cache_cap()
+
+        assert len(runner._agent_cache) == CAP + 1  # temporarily over cap
+        # All existing agents still usable.
+        for i, a in enumerate(agents):
+            assert a.client is not None, f"s{i} got closed while active!"
+        # And we warned operators.
+        assert any("mid-turn" in r.message for r in caplog.records)
+
+        for a in agents + [newcomer]:
+            try:
+                a.close()
+            except Exception:
+                pass
+
+    def test_concurrent_inserts_settle_at_cap(self, monkeypatch):
+        """Many threads inserting in parallel end with len(cache) == CAP."""
+        from gateway import run as gw_run
+
+        CAP = 16
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP)
+        runner = self._runner()
+
+        N_THREADS = 8
+        PER_THREAD = 20  # 8 * 20 = 160 inserts into a 16-slot cache
+
+        def worker(tid: int):
+            for j in range(PER_THREAD):
+                a = self._real_agent()
+                key = f"t{tid}-s{j}"
+                with runner._agent_cache_lock:
+                    runner._agent_cache[key] = (a, "sig")
+                    runner._enforce_agent_cache_cap()
+
+        threads = [
+            threading.Thread(target=worker, args=(t,), daemon=True)
+            for t in range(N_THREADS)
+        ]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join(timeout=30)
+            assert not t.is_alive(), "Worker thread hung — possible deadlock?"
+
+        # Let daemon cleanup threads settle.
+        import time as _t
+        _t.sleep(0.5)
+
+        assert len(runner._agent_cache) == CAP, (
+            f"Expected exactly {CAP} entries after concurrent inserts, "
+            f"got {len(runner._agent_cache)}."
+        )
+
+    def test_evicted_session_next_turn_gets_fresh_agent(self, monkeypatch):
+        """After eviction, the same session_key can insert a fresh agent.
+
+        Simulates the real spillover flow: evicted session sends another
+        message, which builds a new AIAgent and re-enters the cache.
+        """
+        from gateway import run as gw_run
+
+        CAP = 2
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP)
+        runner = self._runner()
+
+        a0 = self._real_agent()
+        a1 = self._real_agent()
+        runner._agent_cache["sA"] = (a0, "sig")
+        runner._agent_cache["sB"] = (a1, "sig")
+
+        # 3rd session forces sA (oldest) out.
+        a2 = self._real_agent()
+        with runner._agent_cache_lock:
+            runner._agent_cache["sC"] = (a2, "sig")
+            runner._enforce_agent_cache_cap()
+        assert "sA" not in runner._agent_cache
+
+        # Let the eviction cleanup thread run.
+        import time as _t
+        _t.sleep(0.3)
+
+        # Now sA's user sends another message → a fresh agent goes in.
+        a0_new = self._real_agent()
+        with runner._agent_cache_lock:
+            runner._agent_cache["sA"] = (a0_new, "sig")
+            runner._enforce_agent_cache_cap()
+
+        assert "sA" in runner._agent_cache
+        assert runner._agent_cache["sA"][0] is a0_new  # the new one, not stale
+        # Fresh agent is usable.
+        assert a0_new.client is not None
+
+        for a in (a0, a1, a2, a0_new):
+            try:
+                a.close()
+            except Exception:
+                pass
+
+
+class TestAgentCacheIdleResume:
+    """End-to-end: idle-TTL-evicted session resumes cleanly with task state.
+
+    Real-world scenario: user leaves a Telegram session open for 2+ hours.
+    Idle-TTL evicts their cached agent.  They come back and send a message.
+    The new agent built for the same session_id must inherit:
+      - Conversation history (from SessionStore — outside cache concern)
+      - Terminal sandbox (same task_id → same _active_environments entry)
+      - Browser daemon (same task_id → same browser session)
+      - Background processes (same task_id → same process_registry entries)
+    The ONLY thing that should reset is the LLM client pool (rebuilt fresh).
+    """
+
+    def _runner(self):
+        from collections import OrderedDict
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._agent_cache = OrderedDict()
+        runner._agent_cache_lock = threading.Lock()
+        runner._running_agents = {}
+        return runner
+
+    def test_release_clients_does_not_touch_process_registry(self, monkeypatch):
+        """release_clients must not call process_registry.kill_all for task_id."""
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+            session_id="idle-resume-test-session",
+        )
+
+        # Spy on process_registry.kill_all — it MUST NOT be called.
+        from tools import process_registry as _pr
+        kill_all_calls: list = []
+        original_kill_all = _pr.process_registry.kill_all
+        _pr.process_registry.kill_all = lambda **kw: kill_all_calls.append(kw)
+        try:
+            agent.release_clients()
+        finally:
+            _pr.process_registry.kill_all = original_kill_all
+            try:
+                agent.close()
+            except Exception:
+                pass
+
+        assert kill_all_calls == [], (
+            f"release_clients() called process_registry.kill_all — would "
+            f"kill user's bg processes on cache eviction. Calls: {kill_all_calls}"
+        )
+
+    def test_release_clients_does_not_touch_terminal_or_browser(self, monkeypatch):
+        """release_clients must not call cleanup_vm or cleanup_browser."""
+        from run_agent import AIAgent
+        from tools import terminal_tool as _tt
+        from tools import browser_tool as _bt
+
+        agent = AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+            session_id="idle-resume-test-2",
+        )
+
+        vm_calls: list = []
+        browser_calls: list = []
+        original_vm = _tt.cleanup_vm
+        original_browser = _bt.cleanup_browser
+        _tt.cleanup_vm = lambda tid: vm_calls.append(tid)
+        _bt.cleanup_browser = lambda tid: browser_calls.append(tid)
+        try:
+            agent.release_clients()
+        finally:
+            _tt.cleanup_vm = original_vm
+            _bt.cleanup_browser = original_browser
+            try:
+                agent.close()
+            except Exception:
+                pass
+
+        assert vm_calls == [], (
+            f"release_clients() tore down terminal sandbox — user's cwd, "
+            f"env, and bg shells would be gone on resume. Calls: {vm_calls}"
+        )
+        assert browser_calls == [], (
+            f"release_clients() tore down browser session — user's open "
+            f"tabs and cookies gone on resume. Calls: {browser_calls}"
+        )
+
+    def test_release_clients_closes_llm_client(self):
+        """release_clients IS expected to close the OpenAI/httpx client."""
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+        )
+        # Clients are lazy-built; force one to exist so we can verify close.
+        assert agent.client is not None  # __init__ builds it
+
+        agent.release_clients()
+
+        # Post-release: client reference is dropped (memory freed).
+        assert agent.client is None
+
+    def test_close_vs_release_full_teardown_difference(self, monkeypatch):
+        """close() tears down task state; release_clients() does not.
+
+        This pins the semantic contract: session-expiry path uses close()
+        (full teardown — session is done), cache-eviction path uses
+        release_clients() (soft — session may resume).
+        """
+        from run_agent import AIAgent
+        from tools import terminal_tool as _tt
+
+        # Agent A: evicted from cache (soft) — terminal survives.
+        # Agent B: session expired (hard) — terminal torn down.
+        agent_a = AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+            session_id="soft-session",
+        )
+        agent_b = AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+            session_id="hard-session",
+        )
+
+        vm_calls: list = []
+        original_vm = _tt.cleanup_vm
+        _tt.cleanup_vm = lambda tid: vm_calls.append(tid)
+        try:
+            agent_a.release_clients()   # cache eviction
+            agent_b.close()              # session expiry
+        finally:
+            _tt.cleanup_vm = original_vm
+            try:
+                agent_a.close()
+            except Exception:
+                pass
+
+        # Only agent_b's task_id should appear in cleanup calls.
+        assert "hard-session" in vm_calls
+        assert "soft-session" not in vm_calls
+
+    def test_idle_evicted_session_rebuild_inherits_task_id(self, monkeypatch):
+        """After idle-TTL eviction, a fresh agent with the same session_id
+        gets the same task_id — so tool state (terminal/browser/bg procs)
+        that persisted across eviction is reachable via the new agent.
+        """
+        from gateway import run as gw_run
+        from run_agent import AIAgent
+
+        monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.01)
+        runner = self._runner()
+
+        # Build an agent representing a stale (idle) session.
+        SESSION_ID = "long-lived-user-session"
+        old = AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+            session_id=SESSION_ID,
+        )
+        old._last_activity_ts = 0.0  # force idle
+        runner._agent_cache["sKey"] = (old, "sig")
+
+        # Simulate the idle-TTL sweep firing.
+        runner._sweep_idle_cached_agents()
+        assert "sKey" not in runner._agent_cache
+
+        # Wait for the daemon thread doing release_clients() to finish.
+        import time as _t
+        _t.sleep(0.3)
+
+        # Old agent's client is gone (soft cleanup fired).
+        assert old.client is None
+
+        # User comes back — new agent built for the SAME session_id.
+        new_agent = AIAgent(
+            model="anthropic/claude-sonnet-4", api_key="test",
+            base_url="https://openrouter.ai/api/v1", provider="openrouter",
+            max_iterations=5, quiet_mode=True,
+            skip_context_files=True, skip_memory=True,
+            session_id=SESSION_ID,
+        )
+
+        # Same session_id means same task_id routed to tools.  The new
+        # agent inherits any per-task state (terminal sandbox etc.) that
+        # was preserved across eviction.
+        assert new_agent.session_id == old.session_id == SESSION_ID
+        # And it has a fresh working client.
+        assert new_agent.client is not None
+
+        try:
+            new_agent.close()
+        except Exception:
+            pass
@@ -20,11 +20,6 @@ def _make_adapter(monkeypatch, **extra):
    return BlueBubblesAdapter(cfg)


-class TestBlueBubblesPlatformEnum:
-    def test_bluebubbles_enum_exists(self):
-        assert Platform.BLUEBUBBLES.value == "bluebubbles"
-
-
 class TestBlueBubblesConfigLoading:
    def test_apply_env_overrides_bluebubbles(self, monkeypatch):
        monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
@@ -41,15 +36,6 @@ class TestBlueBubblesConfigLoading:
        assert bc.extra["password"] == "secret"
        assert bc.extra["webhook_port"] == 9999

-    def test_connected_platforms_includes_bluebubbles(self, monkeypatch):
-        monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
-        monkeypatch.setenv("BLUEBUBBLES_PASSWORD", "secret")
-        from gateway.config import GatewayConfig, _apply_env_overrides
-
-        config = GatewayConfig()
-        _apply_env_overrides(config)
-        assert Platform.BLUEBUBBLES in config.get_connected_platforms()
-
    def test_home_channel_set_from_env(self, monkeypatch):
        monkeypatch.setenv("BLUEBUBBLES_SERVER_URL", "http://localhost:1234")
        monkeypatch.setenv("BLUEBUBBLES_PASSWORD", "secret")
@@ -273,29 +259,6 @@ class TestBlueBubblesGuidResolution:
        assert result is None


-class TestBlueBubblesToolsetIntegration:
-    def test_toolset_exists(self):
-        from toolsets import TOOLSETS
-
-        assert "hermes-bluebubbles" in TOOLSETS
-
-    def test_toolset_in_gateway_composite(self):
-        from toolsets import TOOLSETS
-
-        gateway = TOOLSETS["hermes-gateway"]
-        assert "hermes-bluebubbles" in gateway["includes"]
-
-
-class TestBlueBubblesPromptHint:
-    def test_platform_hint_exists(self):
-        from agent.prompt_builder import PLATFORM_HINTS
-
-        assert "bluebubbles" in PLATFORM_HINTS
-        hint = PLATFORM_HINTS["bluebubbles"]
-        assert "iMessage" in hint
-        assert "plain text" in hint
-
-
 class TestBlueBubblesAttachmentDownload:
    """Verify _download_attachment routes to the correct cache helper."""

@@ -71,6 +71,51 @@ class TestGetConnectedPlatforms:
        config = GatewayConfig()
        assert config.get_connected_platforms() == []

+    def test_dingtalk_recognised_via_extras(self):
+        config = GatewayConfig(
+            platforms={
+                Platform.DINGTALK: PlatformConfig(
+                    enabled=True,
+                    extra={"client_id": "cid", "client_secret": "sec"},
+                ),
+            },
+        )
+        assert Platform.DINGTALK in config.get_connected_platforms()
+
+    def test_dingtalk_recognised_via_env_vars(self, monkeypatch):
+        """DingTalk configured via env vars (no extras) should still be
+        recognised as connected — covers the case where _apply_env_overrides
+        hasn't populated extras yet."""
+        monkeypatch.setenv("DINGTALK_CLIENT_ID", "env_cid")
+        monkeypatch.setenv("DINGTALK_CLIENT_SECRET", "env_sec")
+        config = GatewayConfig(
+            platforms={
+                Platform.DINGTALK: PlatformConfig(enabled=True, extra={}),
+            },
+        )
+        assert Platform.DINGTALK in config.get_connected_platforms()
+
+    def test_dingtalk_missing_creds_not_connected(self, monkeypatch):
+        monkeypatch.delenv("DINGTALK_CLIENT_ID", raising=False)
+        monkeypatch.delenv("DINGTALK_CLIENT_SECRET", raising=False)
+        config = GatewayConfig(
+            platforms={
+                Platform.DINGTALK: PlatformConfig(enabled=True, extra={}),
+            },
+        )
+        assert Platform.DINGTALK not in config.get_connected_platforms()
+
+    def test_dingtalk_disabled_not_connected(self):
+        config = GatewayConfig(
+            platforms={
+                Platform.DINGTALK: PlatformConfig(
+                    enabled=False,
+                    extra={"client_id": "cid", "client_secret": "sec"},
+                ),
+            },
+        )
+        assert Platform.DINGTALK not in config.get_connected_platforms()
+

 class TestSessionResetPolicy:
    def test_roundtrip(self):
@@ -2,6 +2,7 @@
 import asyncio
 import json
 from datetime import datetime, timezone
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock

 import pytest
@@ -230,6 +231,29 @@ class TestSend:

 class TestConnect:

+    @pytest.mark.asyncio
+    async def test_disconnect_closes_session_websocket(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        websocket = AsyncMock()
+        blocker = asyncio.Event()
+
+        async def _run_forever():
+            try:
+                await blocker.wait()
+            except asyncio.CancelledError:
+                return
+
+        adapter._stream_client = SimpleNamespace(websocket=websocket)
+        adapter._stream_task = asyncio.create_task(_run_forever())
+        adapter._running = True
+
+        await adapter.disconnect()
+
+        websocket.close.assert_awaited_once()
+        assert adapter._stream_task is None
+
    @pytest.mark.asyncio
    async def test_connect_fails_without_sdk(self, monkeypatch):
        monkeypatch.setattr(
@@ -269,7 +293,391 @@ class TestConnect:
 # ---------------------------------------------------------------------------


-class TestPlatformEnum:
+# ---------------------------------------------------------------------------
+# SDK compatibility regression tests (dingtalk-stream >= 0.20 / 0.24)
+# ---------------------------------------------------------------------------
+
+
+class TestWebhookDomainAllowlist:
+    """Guard the webhook origin allowlist against regression.
+
+    The SDK started returning reply webhooks on ``oapi.dingtalk.com`` in
+    addition to ``api.dingtalk.com``. Both must be accepted, and hostile
+    lookalikes must still be rejected (SSRF defence-in-depth).
+    """
+
+    def test_api_domain_accepted(self):
+        from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
+        assert _DINGTALK_WEBHOOK_RE.match(
+            "https://api.dingtalk.com/robot/send?access_token=x"
+        )
+
+    def test_oapi_domain_accepted(self):
+        from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
+        assert _DINGTALK_WEBHOOK_RE.match(
+            "https://oapi.dingtalk.com/robot/send?access_token=x"
+        )
+
+    def test_http_rejected(self):
+        from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
+        assert not _DINGTALK_WEBHOOK_RE.match("http://api.dingtalk.com/robot/send")
+
+    def test_suffix_attack_rejected(self):
+        from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
+        assert not _DINGTALK_WEBHOOK_RE.match(
+            "https://api.dingtalk.com.evil.example/"
+        )
+
+    def test_unsanctioned_subdomain_rejected(self):
+        from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
+        # Only api.* and oapi.* are allowed — e.g. eapi.dingtalk.com must not slip through
+        assert not _DINGTALK_WEBHOOK_RE.match("https://eapi.dingtalk.com/robot/send")
+
+
+class TestHandlerProcessIsAsync:
+    """dingtalk-stream >= 0.20 requires ``process`` to be a coroutine."""
+
+    def test_process_is_coroutine_function(self):
+        from gateway.platforms.dingtalk import _IncomingHandler
+        assert asyncio.iscoroutinefunction(_IncomingHandler.process)
+
+
+class TestExtractText:
+    """_extract_text must handle both legacy and current SDK payload shapes.
+
+    Before SDK 0.20 ``message.text`` was a ``dict`` with a ``content`` key.
+    From 0.20 onward it is a ``TextContent`` dataclass whose ``__str__``
+    returns ``"TextContent(content=...)"`` — falling back to ``str(text)``
+    leaks that repr into the agent's input.
+    """
+
+    def test_text_as_dict_legacy(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        msg = MagicMock()
+        msg.text = {"content": "hello world"}
+        msg.rich_text_content = None
+        msg.rich_text = None
+        assert DingTalkAdapter._extract_text(msg) == "hello world"
+
+    def test_text_as_textcontent_object(self):
+        """SDK >= 0.20 shape: object with ``.content`` attribute."""
+        from gateway.platforms.dingtalk import DingTalkAdapter
+
+        class FakeTextContent:
+            content = "hello from new sdk"
+
+            def __str__(self):  # mimic real SDK repr
+                return f"TextContent(content={self.content})"
+
+        msg = MagicMock()
+        msg.text = FakeTextContent()
+        msg.rich_text_content = None
+        msg.rich_text = None
+        result = DingTalkAdapter._extract_text(msg)
+        assert result == "hello from new sdk"
+        assert "TextContent(" not in result
+
+    def test_text_content_attr_with_empty_string(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+
+        class FakeTextContent:
+            content = ""
+
+        msg = MagicMock()
+        msg.text = FakeTextContent()
+        msg.rich_text_content = None
+        msg.rich_text = None
+        assert DingTalkAdapter._extract_text(msg) == ""
+
+    def test_rich_text_content_new_shape(self):
+        """SDK >= 0.20 exposes rich text as ``message.rich_text_content.rich_text_list``."""
+        from gateway.platforms.dingtalk import DingTalkAdapter
+
+        class FakeRichText:
+            rich_text_list = [{"text": "hello "}, {"text": "world"}]
+
+        msg = MagicMock()
+        msg.text = None
+        msg.rich_text_content = FakeRichText()
+        msg.rich_text = None
+        result = DingTalkAdapter._extract_text(msg)
+        assert "hello" in result and "world" in result
+
+    def test_rich_text_legacy_shape(self):
+        """Legacy ``message.rich_text`` list remains supported."""
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        msg = MagicMock()
+        msg.text = None
+        msg.rich_text_content = None
+        msg.rich_text = [{"text": "legacy "}, {"text": "rich"}]
+        result = DingTalkAdapter._extract_text(msg)
+        assert "legacy" in result and "rich" in result
+
+    def test_empty_message(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        msg = MagicMock()
+        msg.text = None
+        msg.rich_text_content = None
+        msg.rich_text = None
+        assert DingTalkAdapter._extract_text(msg) == ""
+
+
+# ---------------------------------------------------------------------------
+# Group gating — require_mention + allowed_users (parity with other platforms)
+# ---------------------------------------------------------------------------
+
+
+def _make_gating_adapter(monkeypatch, *, extra=None, env=None):
+    """Build a DingTalkAdapter with only the gating fields populated.
+
+    Clears every DINGTALK_* gating env var before applying the caller's
+    overrides so individual tests stay isolated.
+    """
+    for key in (
+        "DINGTALK_REQUIRE_MENTION",
+        "DINGTALK_MENTION_PATTERNS",
+        "DINGTALK_FREE_RESPONSE_CHATS",
+        "DINGTALK_ALLOWED_USERS",
+    ):
+        monkeypatch.delenv(key, raising=False)
+    for key, value in (env or {}).items():
+        monkeypatch.setenv(key, value)
+    from gateway.platforms.dingtalk import DingTalkAdapter
+    return DingTalkAdapter(PlatformConfig(enabled=True, extra=extra or {}))
+
+
+class TestAllowedUsersGate:
+
+    def test_empty_allowlist_allows_everyone(self, monkeypatch):
+        adapter = _make_gating_adapter(monkeypatch)
+        assert adapter._is_user_allowed("anyone", "any-staff") is True
+
+    def test_wildcard_allowlist_allows_everyone(self, monkeypatch):
+        adapter = _make_gating_adapter(monkeypatch, extra={"allowed_users": ["*"]})
+        assert adapter._is_user_allowed("anyone", "any-staff") is True
+
+    def test_matches_sender_id_case_insensitive(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"allowed_users": ["SenderABC"]}
+        )
+        assert adapter._is_user_allowed("senderabc", "") is True
+
+    def test_matches_staff_id(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"allowed_users": ["staff_1234"]}
+        )
+        assert adapter._is_user_allowed("", "staff_1234") is True
+
+    def test_rejects_unknown_user(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"allowed_users": ["staff_1234"]}
+        )
+        assert adapter._is_user_allowed("other-sender", "other-staff") is False
+
+    def test_env_var_csv_populates_allowlist(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, env={"DINGTALK_ALLOWED_USERS": "alice,bob,carol"}
+        )
+        assert adapter._is_user_allowed("alice", "") is True
+        assert adapter._is_user_allowed("dave", "") is False
+
+
+class TestMentionPatterns:
+
+    def test_empty_patterns_list(self, monkeypatch):
+        adapter = _make_gating_adapter(monkeypatch)
+        assert adapter._mention_patterns == []
+        assert adapter._message_matches_mention_patterns("anything") is False
+
+    def test_pattern_matches_text(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"mention_patterns": ["^hermes"]}
+        )
+        assert adapter._message_matches_mention_patterns("hermes please help") is True
+        assert adapter._message_matches_mention_patterns("please hermes help") is False
+
+    def test_pattern_is_case_insensitive(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"mention_patterns": ["^hermes"]}
+        )
+        assert adapter._message_matches_mention_patterns("HERMES help") is True
+
+    def test_invalid_regex_is_skipped_not_raised(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch,
+            extra={"mention_patterns": ["[unclosed", "^valid"]},
+        )
+        # Invalid pattern dropped, valid one kept
+        assert len(adapter._mention_patterns) == 1
+        assert adapter._message_matches_mention_patterns("valid trigger") is True
+
+    def test_env_var_json_populates_patterns(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch,
+            env={"DINGTALK_MENTION_PATTERNS": '["^bot", "^assistant"]'},
+        )
+        assert len(adapter._mention_patterns) == 2
+        assert adapter._message_matches_mention_patterns("bot ping") is True
+
+    def test_env_var_newline_fallback_when_not_json(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch,
+            env={"DINGTALK_MENTION_PATTERNS": "^bot\n^assistant"},
+        )
+        assert len(adapter._mention_patterns) == 2
+
+
+class TestShouldProcessMessage:
+
+    def test_dm_always_accepted(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"require_mention": True}
+        )
+        msg = MagicMock(is_in_at_list=False)
+        assert adapter._should_process_message(msg, "hi", is_group=False, chat_id="dm1") is True
+
+    def test_group_rejected_when_require_mention_and_no_trigger(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"require_mention": True}
+        )
+        msg = MagicMock(is_in_at_list=False)
+        assert adapter._should_process_message(msg, "hi", is_group=True, chat_id="grp1") is False
+
+    def test_group_accepted_when_require_mention_disabled(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"require_mention": False}
+        )
+        msg = MagicMock(is_in_at_list=False)
+        assert adapter._should_process_message(msg, "hi", is_group=True, chat_id="grp1") is True
+
+    def test_group_accepted_when_bot_is_mentioned(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch, extra={"require_mention": True}
+        )
+        msg = MagicMock(is_in_at_list=True)
+        assert adapter._should_process_message(msg, "hi", is_group=True, chat_id="grp1") is True
+
+    def test_group_accepted_when_text_matches_wake_word(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch,
+            extra={"require_mention": True, "mention_patterns": ["^hermes"]},
+        )
+        msg = MagicMock(is_in_at_list=False)
+        assert adapter._should_process_message(msg, "hermes help", is_group=True, chat_id="grp1") is True
+
+    def test_group_accepted_when_chat_in_free_response_list(self, monkeypatch):
+        adapter = _make_gating_adapter(
+            monkeypatch,
+            extra={"require_mention": True, "free_response_chats": ["grp1"]},
+        )
+        msg = MagicMock(is_in_at_list=False)
+        assert adapter._should_process_message(msg, "hi", is_group=True, chat_id="grp1") is True
+        # Different group still blocked
+        assert adapter._should_process_message(msg, "hi", is_group=True, chat_id="grp2") is False
+
+
+# ---------------------------------------------------------------------------
+# _IncomingHandler.process — session_webhook extraction & fire-and-forget
+# ---------------------------------------------------------------------------
+
+
+class TestIncomingHandlerProcess:
+    """Verify that _IncomingHandler.process correctly converts callback data
+    and dispatches message processing as a background task (fire-and-forget)
+    so the SDK ACK is returned immediately."""
+
+    @pytest.mark.asyncio
+    async def test_process_extracts_session_webhook(self):
+        """session_webhook must be populated from callback data."""
+        from gateway.platforms.dingtalk import _IncomingHandler, DingTalkAdapter
+
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._on_message = AsyncMock()
+        handler = _IncomingHandler(adapter, asyncio.get_running_loop())
+
+        callback = MagicMock()
+        callback.data = {
+            "msgtype": "text",
+            "text": {"content": "hello"},
+            "senderId": "user1",
+            "conversationId": "conv1",
+            "sessionWebhook": "https://oapi.dingtalk.com/robot/sendBySession?session=abc",
+            "msgId": "msg-001",
+        }
+
+        result = await handler.process(callback)
+        # Should return ACK immediately (STATUS_OK = 200)
+        assert result[0] == 200
+
+        # Let the background task run
+        await asyncio.sleep(0.05)
+
+        # _on_message should have been called with a ChatbotMessage
+        adapter._on_message.assert_called_once()
+        chatbot_msg = adapter._on_message.call_args[0][0]
+        assert chatbot_msg.session_webhook == "https://oapi.dingtalk.com/robot/sendBySession?session=abc"
+
+    @pytest.mark.asyncio
+    async def test_process_fallback_session_webhook_when_from_dict_misses_it(self):
+        """If ChatbotMessage.from_dict does not map sessionWebhook (e.g. SDK
+        version mismatch), the handler should fall back to extracting it
+        directly from the raw data dict."""
+        from gateway.platforms.dingtalk import _IncomingHandler, DingTalkAdapter
+
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._on_message = AsyncMock()
+        handler = _IncomingHandler(adapter, asyncio.get_running_loop())
+
+        callback = MagicMock()
+        # Use a key that from_dict might not recognise in some SDK versions
+        callback.data = {
+            "msgtype": "text",
+            "text": {"content": "hi"},
+            "senderId": "user2",
+            "conversationId": "conv2",
+            "session_webhook": "https://oapi.dingtalk.com/robot/sendBySession?session=def",
+            "msgId": "msg-002",
+        }
+
+        await handler.process(callback)
+        await asyncio.sleep(0.05)
+
+        adapter._on_message.assert_called_once()
+        chatbot_msg = adapter._on_message.call_args[0][0]
+        assert chatbot_msg.session_webhook == "https://oapi.dingtalk.com/robot/sendBySession?session=def"
+
+    @pytest.mark.asyncio
+    async def test_process_returns_ack_immediately(self):
+        """process() must not block on _on_message — it should return
+        the ACK tuple before the message is fully processed."""
+        from gateway.platforms.dingtalk import _IncomingHandler, DingTalkAdapter
+
+        processing_started = asyncio.Event()
+        processing_gate = asyncio.Event()
+
+        async def slow_on_message(msg):
+            processing_started.set()
+            await processing_gate.wait()  # Block until we release
+
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._on_message = slow_on_message
+        handler = _IncomingHandler(adapter, asyncio.get_running_loop())
+
+        callback = MagicMock()
+        callback.data = {
+            "msgtype": "text",
+            "text": {"content": "test"},
+            "senderId": "u",
+            "conversationId": "c",
+            "sessionWebhook": "https://oapi.dingtalk.com/x",
+            "msgId": "m",
+        }
+
+        # process() should return immediately even though _on_message blocks
+        result = await handler.process(callback)
+        assert result[0] == 200
+
+        # Clean up: release the gate so the background task finishes
+        processing_gate.set()
+        await asyncio.sleep(0.05)

-    def test_dingtalk_in_platform_enum(self):
-        assert Platform.DINGTALK.value == "dingtalk"
@@ -0,0 +1,155 @@
+"""Tests for the Discord ``allowed_mentions`` safe-default helper.
+
+Ensures the bot defaults to blocking ``@everyone`` / ``@here`` / role pings
+so an LLM response (or echoed user content) can't spam a whole server —
+and that the four ``DISCORD_ALLOW_MENTION_*`` env vars correctly opt back
+in when an operator explicitly wants a different policy.
+"""
+
+import sys
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+
+
+class _FakeAllowedMentions:
+    """Stand-in for ``discord.AllowedMentions`` that exposes the same four
+    boolean flags as real attributes so the test can assert on them.
+    """
+
+    def __init__(self, *, everyone=True, roles=True, users=True, replied_user=True):
+        self.everyone = everyone
+        self.roles = roles
+        self.users = users
+        self.replied_user = replied_user
+
+    def __repr__(self) -> str:  # pragma: no cover - debug helper
+        return (
+            f"AllowedMentions(everyone={self.everyone}, roles={self.roles}, "
+            f"users={self.users}, replied_user={self.replied_user})"
+        )
+
+
+def _ensure_discord_mock():
+    """Install (or augment) a mock ``discord`` module.
+
+    Other test modules in this directory stub ``discord`` via
+    ``sys.modules.setdefault`` — whichever test file imports first wins and
+    our full module is then silently dropped. We therefore ALWAYS force
+    ``AllowedMentions`` onto whatever is currently in ``sys.modules["discord"]``;
+    that's the only attribute this test file actually needs real behavior from.
+    """
+    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        sys.modules["discord"].AllowedMentions = _FakeAllowedMentions
+        return
+
+    if sys.modules.get("discord") is None:
+        discord_mod = MagicMock()
+        discord_mod.Intents.default.return_value = MagicMock()
+        discord_mod.Client = MagicMock
+        discord_mod.File = MagicMock
+        discord_mod.DMChannel = type("DMChannel", (), {})
+        discord_mod.Thread = type("Thread", (), {})
+        discord_mod.ForumChannel = type("ForumChannel", (), {})
+        discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
+        discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, danger=3, green=1, blurple=2, red=3, grey=4, secondary=5)
+        discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4)
+        discord_mod.Interaction = object
+        discord_mod.Embed = MagicMock
+        discord_mod.app_commands = SimpleNamespace(
+            describe=lambda **kwargs: (lambda fn: fn),
+            choices=lambda **kwargs: (lambda fn: fn),
+            Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+        )
+        discord_mod.opus = SimpleNamespace(is_loaded=lambda: True)
+
+        ext_mod = MagicMock()
+        commands_mod = MagicMock()
+        commands_mod.Bot = MagicMock
+        ext_mod.commands = commands_mod
+
+        sys.modules["discord"] = discord_mod
+        sys.modules.setdefault("discord.ext", ext_mod)
+        sys.modules.setdefault("discord.ext.commands", commands_mod)
+
+    # Whether we just installed the mock OR the mock was already installed
+    # by another test's _ensure_discord_mock, force the AllowedMentions
+    # stand-in onto it — _build_allowed_mentions() reads this attribute.
+    sys.modules["discord"].AllowedMentions = _FakeAllowedMentions
+
+
+_ensure_discord_mock()
+
+from gateway.platforms.discord import _build_allowed_mentions  # noqa: E402
+
+
+# The four DISCORD_ALLOW_MENTION_* env vars that _build_allowed_mentions reads.
+# Cleared before each test so env leakage from other tests never masks a regression.
+_ENV_VARS = (
+    "DISCORD_ALLOW_MENTION_EVERYONE",
+    "DISCORD_ALLOW_MENTION_ROLES",
+    "DISCORD_ALLOW_MENTION_USERS",
+    "DISCORD_ALLOW_MENTION_REPLIED_USER",
+)
+
+
+@pytest.fixture(autouse=True)
+def _clear_allowed_mention_env(monkeypatch):
+    for name in _ENV_VARS:
+        monkeypatch.delenv(name, raising=False)
+
+
+def test_safe_defaults_block_everyone_and_roles():
+    am = _build_allowed_mentions()
+    assert am.everyone is False, "default must NOT allow @everyone/@here pings"
+    assert am.roles is False, "default must NOT allow role pings"
+    assert am.users is True, "default must allow user pings so replies work"
+    assert am.replied_user is True, "default must allow reply-reference pings"
+
+
+def test_env_var_opts_back_into_everyone(monkeypatch):
+    monkeypatch.setenv("DISCORD_ALLOW_MENTION_EVERYONE", "true")
+    am = _build_allowed_mentions()
+    assert am.everyone is True
+    # other defaults unaffected
+    assert am.roles is False
+    assert am.users is True
+    assert am.replied_user is True
+
+
+def test_env_var_can_disable_users(monkeypatch):
+    monkeypatch.setenv("DISCORD_ALLOW_MENTION_USERS", "false")
+    am = _build_allowed_mentions()
+    assert am.users is False
+    # safe defaults elsewhere remain
+    assert am.everyone is False
+    assert am.roles is False
+    assert am.replied_user is True
+
+
+@pytest.mark.parametrize("raw, expected", [
+    ("true", True), ("True", True), ("TRUE", True),
+    ("1", True), ("yes", True), ("YES", True), ("on", True),
+    ("false", False), ("False", False), ("0", False),
+    ("no", False), ("off", False),
+    ("", False),                 # empty falls back to default (False for everyone)
+    ("garbage", False),          # unknown falls back to default
+    (" true ", True),            # whitespace tolerated
+])
+def test_everyone_boolean_parsing(monkeypatch, raw, expected):
+    monkeypatch.setenv("DISCORD_ALLOW_MENTION_EVERYONE", raw)
+    am = _build_allowed_mentions()
+    assert am.everyone is expected
+
+
+def test_all_four_knobs_together(monkeypatch):
+    monkeypatch.setenv("DISCORD_ALLOW_MENTION_EVERYONE", "true")
+    monkeypatch.setenv("DISCORD_ALLOW_MENTION_ROLES", "true")
+    monkeypatch.setenv("DISCORD_ALLOW_MENTION_USERS", "false")
+    monkeypatch.setenv("DISCORD_ALLOW_MENTION_REPLIED_USER", "false")
+    am = _build_allowed_mentions()
+    assert am.everyone is True
+    assert am.roles is True
+    assert am.users is False
+    assert am.replied_user is False
@@ -0,0 +1,360 @@
+"""Tests for Discord attachment downloads via the authenticated bot session.
+
+Covers the three download paths (image / audio / document) in
+``DiscordAdapter._handle_message()`` and the shared ``_cache_discord_*``
+helpers. Verifies that:
+
+- ``att.read()`` is preferred over the legacy URL-based downloaders so
+  that Discord's CDN auth (and user-environment DNS quirks) can't block
+  media caching. (issues #8242 image 403s, #6587 CDN SSRF false-positives)
+- Falls back cleanly to the SSRF-gated ``cache_*_from_url`` helpers
+  (image/audio) or SSRF-gated aiohttp (documents) when ``att.read()``
+  isn't available or fails.
+- The document fallback path now runs through the SSRF gate for
+  defense-in-depth. (issue #11345)
+"""
+
+import sys
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import PlatformConfig
+
+
+def _ensure_discord_mock():
+    """Install a mock discord module when discord.py isn't available."""
+    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        return
+
+    discord_mod = MagicMock()
+    discord_mod.Intents.default.return_value = MagicMock()
+    discord_mod.Client = MagicMock
+    discord_mod.File = MagicMock
+    discord_mod.DMChannel = type("DMChannel", (), {})
+    discord_mod.Thread = type("Thread", (), {})
+    discord_mod.ForumChannel = type("ForumChannel", (), {})
+    discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
+    discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, secondary=2, danger=3, green=1, grey=2, blurple=2, red=3)
+    discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4, purple=lambda: 5)
+    discord_mod.Interaction = object
+    discord_mod.Embed = MagicMock
+    discord_mod.app_commands = SimpleNamespace(
+        describe=lambda **kwargs: (lambda fn: fn),
+        choices=lambda **kwargs: (lambda fn: fn),
+        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+    )
+
+    ext_mod = MagicMock()
+    commands_mod = MagicMock()
+    commands_mod.Bot = MagicMock
+    ext_mod.commands = commands_mod
+
+    sys.modules.setdefault("discord", discord_mod)
+    sys.modules.setdefault("discord.ext", ext_mod)
+    sys.modules.setdefault("discord.ext.commands", commands_mod)
+
+
+_ensure_discord_mock()
+
+from gateway.platforms.discord import DiscordAdapter  # noqa: E402
+
+
+# Minimal valid image / audio / PDF bytes so the cache_*_from_bytes
+# validators accept them. cache_image_from_bytes runs _looks_like_image()
+# which checks for magic bytes; PNG's magic is sufficient.
+_PNG_BYTES = b"\x89PNG\r\n\x1a\n" + b"\x00" * 64
+_OGG_BYTES = b"OggS" + b"\x00" * 60
+_PDF_BYTES = b"%PDF-1.4\n" + b"fake pdf body" + b"\n%%EOF"
+
+
+def _make_adapter() -> DiscordAdapter:
+    return DiscordAdapter(PlatformConfig(enabled=True, token="***"))
+
+
+def _make_attachment_with_read(payload: bytes) -> SimpleNamespace:
+    """Attachment stub that exposes .read() — the happy-path primary."""
+    return SimpleNamespace(
+        url="https://cdn.discordapp.com/attachments/fake/file.png",
+        filename="file.png",
+        size=len(payload),
+        read=AsyncMock(return_value=payload),
+    )
+
+
+def _make_attachment_without_read() -> SimpleNamespace:
+    """Attachment stub that has no .read() — exercises the URL fallback."""
+    return SimpleNamespace(
+        url="https://cdn.discordapp.com/attachments/fake/file.png",
+        filename="file.png",
+        size=1024,
+    )
+
+
+# ---------------------------------------------------------------------------
+# _read_attachment_bytes
+# ---------------------------------------------------------------------------
+
+class TestReadAttachmentBytes:
+    """Unit tests for the low-level att.read() wrapper."""
+
+    @pytest.mark.asyncio
+    async def test_returns_bytes_on_successful_read(self):
+        adapter = _make_adapter()
+        att = _make_attachment_with_read(b"hello world")
+
+        result = await adapter._read_attachment_bytes(att)
+
+        assert result == b"hello world"
+        att.read.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_read_missing(self):
+        adapter = _make_adapter()
+        att = _make_attachment_without_read()
+
+        result = await adapter._read_attachment_bytes(att)
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_read_raises(self):
+        """Bot-session fetch failures are swallowed so callers fall back."""
+        adapter = _make_adapter()
+        att = SimpleNamespace(
+            url="https://cdn.discordapp.com/attachments/fake/file.png",
+            filename="file.png",
+            read=AsyncMock(side_effect=RuntimeError("403 Forbidden")),
+        )
+
+        result = await adapter._read_attachment_bytes(att)
+
+        assert result is None
+
+
+# ---------------------------------------------------------------------------
+# _cache_discord_image
+# ---------------------------------------------------------------------------
+
+class TestCacheDiscordImage:
+    @pytest.mark.asyncio
+    async def test_prefers_att_read_over_url(self):
+        """Primary path: att.read() bytes → cache_image_from_bytes, no URL fetch."""
+        adapter = _make_adapter()
+        att = _make_attachment_with_read(_PNG_BYTES)
+
+        with patch(
+            "gateway.platforms.discord.cache_image_from_bytes",
+            return_value="/tmp/cached.png",
+        ) as mock_bytes, patch(
+            "gateway.platforms.discord.cache_image_from_url",
+            new_callable=AsyncMock,
+        ) as mock_url:
+            result = await adapter._cache_discord_image(att, ".png")
+
+        assert result == "/tmp/cached.png"
+        mock_bytes.assert_called_once_with(_PNG_BYTES, ext=".png")
+        mock_url.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_url_when_no_read(self):
+        """No .read() → URL path is used (existing SSRF-gated behavior)."""
+        adapter = _make_adapter()
+        att = _make_attachment_without_read()
+
+        with patch(
+            "gateway.platforms.discord.cache_image_from_bytes",
+        ) as mock_bytes, patch(
+            "gateway.platforms.discord.cache_image_from_url",
+            new_callable=AsyncMock,
+            return_value="/tmp/from_url.png",
+        ) as mock_url:
+            result = await adapter._cache_discord_image(att, ".png")
+
+        assert result == "/tmp/from_url.png"
+        mock_bytes.assert_not_called()
+        mock_url.assert_awaited_once_with(att.url, ext=".png")
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_url_when_bytes_validator_rejects(self):
+        """If att.read() returns garbage that cache_image_from_bytes rejects
+        (e.g. an HTML error page), fall back to the URL downloader instead
+        of surfacing the validation error to the caller."""
+        adapter = _make_adapter()
+        att = _make_attachment_with_read(b"<html>forbidden</html>")
+
+        with patch(
+            "gateway.platforms.discord.cache_image_from_bytes",
+            side_effect=ValueError("not a valid image"),
+        ), patch(
+            "gateway.platforms.discord.cache_image_from_url",
+            new_callable=AsyncMock,
+            return_value="/tmp/fallback.png",
+        ) as mock_url:
+            result = await adapter._cache_discord_image(att, ".png")
+
+        assert result == "/tmp/fallback.png"
+        mock_url.assert_awaited_once()
+
+
+# ---------------------------------------------------------------------------
+# _cache_discord_audio
+# ---------------------------------------------------------------------------
+
+class TestCacheDiscordAudio:
+    @pytest.mark.asyncio
+    async def test_prefers_att_read_over_url(self):
+        adapter = _make_adapter()
+        att = _make_attachment_with_read(_OGG_BYTES)
+
+        with patch(
+            "gateway.platforms.discord.cache_audio_from_bytes",
+            return_value="/tmp/voice.ogg",
+        ) as mock_bytes, patch(
+            "gateway.platforms.discord.cache_audio_from_url",
+            new_callable=AsyncMock,
+        ) as mock_url:
+            result = await adapter._cache_discord_audio(att, ".ogg")
+
+        assert result == "/tmp/voice.ogg"
+        mock_bytes.assert_called_once_with(_OGG_BYTES, ext=".ogg")
+        mock_url.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_url_when_no_read(self):
+        adapter = _make_adapter()
+        att = _make_attachment_without_read()
+
+        with patch(
+            "gateway.platforms.discord.cache_audio_from_url",
+            new_callable=AsyncMock,
+            return_value="/tmp/from_url.ogg",
+        ) as mock_url:
+            result = await adapter._cache_discord_audio(att, ".ogg")
+
+        assert result == "/tmp/from_url.ogg"
+        mock_url.assert_awaited_once_with(att.url, ext=".ogg")
+
+
+# ---------------------------------------------------------------------------
+# _cache_discord_document
+# ---------------------------------------------------------------------------
+
+class TestCacheDiscordDocument:
+    @pytest.mark.asyncio
+    async def test_prefers_att_read_returns_bytes_directly(self):
+        """Primary path: att.read() → raw bytes, no aiohttp involvement."""
+        adapter = _make_adapter()
+        att = _make_attachment_with_read(_PDF_BYTES)
+
+        with patch("aiohttp.ClientSession") as mock_session:
+            result = await adapter._cache_discord_document(att, ".pdf")
+
+        assert result == _PDF_BYTES
+        mock_session.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_fallback_blocked_by_ssrf_guard(self):
+        """Document fallback path now honors is_safe_url — was missing before.
+
+        Regression guard for #11345: the old aiohttp block skipped the
+        SSRF check entirely; a non-CDN ``att.url`` could have reached
+        internal-looking hosts. The fallback must now refuse unsafe URLs.
+        """
+        adapter = _make_adapter()
+        att = _make_attachment_without_read()  # no .read → forces fallback
+
+        with patch(
+            "gateway.platforms.discord.is_safe_url", return_value=False
+        ) as mock_safe, patch("aiohttp.ClientSession") as mock_session:
+            with pytest.raises(ValueError, match="SSRF"):
+                await adapter._cache_discord_document(att, ".pdf")
+
+        mock_safe.assert_called_once_with(att.url)
+        # aiohttp must NOT be contacted when the URL is blocked.
+        mock_session.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_fallback_aiohttp_when_safe_url(self):
+        """Safe URL + no att.read() → aiohttp fallback executes."""
+        adapter = _make_adapter()
+        att = _make_attachment_without_read()
+
+        # Build an aiohttp session mock that returns 200 + payload.
+        resp = AsyncMock()
+        resp.status = 200
+        resp.read = AsyncMock(return_value=_PDF_BYTES)
+        resp.__aenter__ = AsyncMock(return_value=resp)
+        resp.__aexit__ = AsyncMock(return_value=False)
+
+        session = AsyncMock()
+        session.get = MagicMock(return_value=resp)
+        session.__aenter__ = AsyncMock(return_value=session)
+        session.__aexit__ = AsyncMock(return_value=False)
+
+        with patch(
+            "gateway.platforms.discord.is_safe_url", return_value=True
+        ), patch("aiohttp.ClientSession", return_value=session):
+            result = await adapter._cache_discord_document(att, ".pdf")
+
+        assert result == _PDF_BYTES
+
+
+# ---------------------------------------------------------------------------
+# Integration: end-to-end via _handle_message
+# ---------------------------------------------------------------------------
+
+class TestHandleMessageUsesAuthenticatedRead:
+    """E2E: verify _handle_message routes image/audio downloads through
+    att.read() so cdn.discordapp.com 403s (#8242) and SSRF false-positives
+    on mangled DNS (#6587) no longer block media caching.
+    """
+
+    @pytest.mark.asyncio
+    async def test_image_downloads_via_att_read_not_url(self, monkeypatch):
+        """Image attachments with .read() never call cache_image_from_url."""
+        adapter = _make_adapter()
+        adapter._client = SimpleNamespace(user=SimpleNamespace(id=999))
+        adapter.handle_message = AsyncMock()
+
+        with patch(
+            "gateway.platforms.discord.cache_image_from_bytes",
+            return_value="/tmp/img_from_read.png",
+        ), patch(
+            "gateway.platforms.discord.cache_image_from_url",
+            new_callable=AsyncMock,
+        ) as mock_url_download:
+            att = SimpleNamespace(
+                url="https://cdn.discordapp.com/attachments/fake/file.png",
+                filename="file.png",
+                content_type="image/png",
+                size=len(_PNG_BYTES),
+                read=AsyncMock(return_value=_PNG_BYTES),
+            )
+            # Minimal Discord message stub for _handle_message.
+            from datetime import datetime, timezone
+
+            class _FakeDMChannel:
+                id = 100
+                name = "dm"
+
+            # Patch the DMChannel isinstance check so our fake counts as DM.
+            monkeypatch.setattr(
+                "gateway.platforms.discord.discord.DMChannel",
+                _FakeDMChannel,
+            )
+            chan = _FakeDMChannel()
+            msg = SimpleNamespace(
+                id=1, content="", attachments=[att], mentions=[],
+                reference=None,
+                created_at=datetime.now(timezone.utc),
+                channel=chan,
+                author=SimpleNamespace(id=42, display_name="U", name="U"),
+            )
+            await adapter._handle_message(msg)
+
+        mock_url_download.assert_not_called()
+        event = adapter.handle_message.call_args[0][0]
+        assert event.media_urls == ["/tmp/img_from_read.png"]
+        assert event.media_types == ["image/png"]
@@ -0,0 +1,226 @@
+"""Regression guard for #4466: DISCORD_ALLOW_BOTS works without DISCORD_ALLOWED_USERS.
+
+The bug had two sequential gates both rejecting bot messages:
+
+  Gate 1 — `on_message` in gateway/platforms/discord.py ran the user-allowlist
+  check BEFORE the bot filter, so bot senders were dropped with a warning
+  before the DISCORD_ALLOW_BOTS policy was ever evaluated.
+
+  Gate 2 — `_is_user_authorized` in gateway/run.py rejected bots at the
+  gateway level even if they somehow reached that layer.
+
+These tests assert both gates now pass a bot message through when
+DISCORD_ALLOW_BOTS permits it AND no user allowlist entry exists.
+"""
+
+import os
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+from gateway.session import Platform, SessionSource
+
+
+@pytest.fixture(autouse=True)
+def _isolate_discord_env(monkeypatch):
+    """Make every test start with a clean Discord env so prior tests in the
+    session (or CI setups) can't leak DISCORD_ALLOWED_ROLES / DISCORD_ALLOWED_USERS
+    / DISCORD_ALLOW_BOTS and silently flip the auth result.
+    """
+    for var in (
+        "DISCORD_ALLOW_BOTS",
+        "DISCORD_ALLOWED_USERS",
+        "DISCORD_ALLOWED_ROLES",
+        "DISCORD_ALLOW_ALL_USERS",
+        "GATEWAY_ALLOW_ALL_USERS",
+        "GATEWAY_ALLOWED_USERS",
+    ):
+        monkeypatch.delenv(var, raising=False)
+
+
+# -----------------------------------------------------------------------------
+# Gate 2: _is_user_authorized bypasses allowlist for permitted bots
+# -----------------------------------------------------------------------------
+
+
+def _make_bare_runner():
+    """Build a GatewayRunner skeleton with just enough wiring for the auth test.
+
+    Uses ``object.__new__`` to skip the heavy __init__ — many gateway tests
+    use this pattern (see AGENTS.md pitfall #17).
+    """
+    from gateway.run import GatewayRunner
+    runner = object.__new__(GatewayRunner)
+    # _is_user_authorized reads self.pairing_store.is_approved(...) before
+    # any allowlist check succeeds; stub it to never approve so we exercise
+    # the real allowlist path.
+    runner.pairing_store = SimpleNamespace(is_approved=lambda *_a, **_kw: False)
+    return runner
+
+
+def _make_discord_bot_source(bot_id: str = "999888777"):
+    return SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="123",
+        chat_type="channel",
+        user_id=bot_id,
+        user_name="SomeBot",
+        is_bot=True,
+    )
+
+
+def _make_discord_human_source(user_id: str = "100200300"):
+    return SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="123",
+        chat_type="channel",
+        user_id=user_id,
+        user_name="SomeHuman",
+        is_bot=False,
+    )
+
+
+def test_discord_bot_authorized_when_allow_bots_mentions(monkeypatch):
+    """DISCORD_ALLOW_BOTS=mentions must authorize a bot sender even when
+    DISCORD_ALLOWED_USERS is set and the bot's ID is NOT in it.
+
+    This is the exact scenario from #4466 — a Cloudflare Worker webhook
+    posts Notion events to Discord, the Hermes bot gets @mentioned, and
+    the webhook's bot ID is not (and shouldn't be) on the human
+    allowlist.
+    """
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "mentions")
+    monkeypatch.setenv("DISCORD_ALLOWED_USERS", "100200300")  # human-only allowlist
+
+    source = _make_discord_bot_source(bot_id="999888777")
+    assert runner._is_user_authorized(source) is True
+
+
+def test_discord_bot_authorized_when_allow_bots_all(monkeypatch):
+    """DISCORD_ALLOW_BOTS=all is a superset of =mentions — should also bypass."""
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
+    monkeypatch.setenv("DISCORD_ALLOWED_USERS", "100200300")
+
+    source = _make_discord_bot_source()
+    assert runner._is_user_authorized(source) is True
+
+
+def test_discord_bot_NOT_authorized_when_allow_bots_none(monkeypatch):
+    """DISCORD_ALLOW_BOTS=none (default) must still reject bots that aren't
+    in DISCORD_ALLOWED_USERS — preserves the original security behavior.
+    """
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "none")
+    monkeypatch.setenv("DISCORD_ALLOWED_USERS", "100200300")
+
+    source = _make_discord_bot_source(bot_id="999888777")
+    assert runner._is_user_authorized(source) is False
+
+
+def test_discord_bot_NOT_authorized_when_allow_bots_unset(monkeypatch):
+    """Unset DISCORD_ALLOW_BOTS must behave like 'none'."""
+    runner = _make_bare_runner()
+
+    monkeypatch.delenv("DISCORD_ALLOW_BOTS", raising=False)
+    monkeypatch.setenv("DISCORD_ALLOWED_USERS", "100200300")
+
+    source = _make_discord_bot_source(bot_id="999888777")
+    assert runner._is_user_authorized(source) is False
+
+
+def test_discord_human_still_checked_against_allowlist_when_bot_policy_set(monkeypatch):
+    """DISCORD_ALLOW_BOTS=all must NOT open the gate for humans — they
+    still need to be in DISCORD_ALLOWED_USERS (or a pairing approval).
+    """
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
+    monkeypatch.setenv("DISCORD_ALLOWED_USERS", "100200300")
+
+    # Human NOT on the allowlist → must be rejected.
+    source = _make_discord_human_source(user_id="999999999")
+    assert runner._is_user_authorized(source) is False
+
+    # Human ON the allowlist → accepted.
+    source_allowed = _make_discord_human_source(user_id="100200300")
+    assert runner._is_user_authorized(source_allowed) is True
+
+
+def test_bot_bypass_does_not_leak_to_other_platforms(monkeypatch):
+    """The is_bot bypass is Discord-specific — a Telegram bot source with
+    is_bot=True must NOT be authorized just because DISCORD_ALLOW_BOTS=all.
+    """
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
+    monkeypatch.setenv("TELEGRAM_ALLOWED_USERS", "100200300")
+
+    telegram_bot = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="123",
+        chat_type="channel",
+        user_id="999888777",
+        is_bot=True,
+    )
+    assert runner._is_user_authorized(telegram_bot) is False
+
+
+# -----------------------------------------------------------------------------
+# DISCORD_ALLOWED_ROLES gateway-layer bypass (#7871)
+# -----------------------------------------------------------------------------
+
+
+def test_discord_role_config_bypasses_gateway_allowlist(monkeypatch):
+    """When DISCORD_ALLOWED_ROLES is set, _is_user_authorized must trust
+    the adapter's pre-filter and authorize. Without this, role-only setups
+    (DISCORD_ALLOWED_ROLES populated, DISCORD_ALLOWED_USERS empty) would
+    hit the 'no allowlists configured' branch and get rejected.
+    """
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOWED_ROLES", "1493705176387948674")
+    # Note: DISCORD_ALLOWED_USERS is NOT set — the entire point.
+
+    source = _make_discord_human_source(user_id="999888777")
+    assert runner._is_user_authorized(source) is True
+
+
+def test_discord_role_config_still_authorizes_alongside_users(monkeypatch):
+    """Sanity: setting both DISCORD_ALLOWED_ROLES and DISCORD_ALLOWED_USERS
+    doesn't break the user-id path. Users in the allowlist should still be
+    authorized even if they don't have a role. (OR semantics.)
+    """
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOWED_ROLES", "1493705176387948674")
+    monkeypatch.setenv("DISCORD_ALLOWED_USERS", "100200300")
+
+    # User on the user allowlist, no role → still authorized at gateway
+    # level via the role bypass (adapter already approved them).
+    source = _make_discord_human_source(user_id="100200300")
+    assert runner._is_user_authorized(source) is True
+
+
+def test_discord_role_bypass_does_not_leak_to_other_platforms(monkeypatch):
+    """DISCORD_ALLOWED_ROLES must only affect Discord. Setting it should
+    not suddenly start authorizing Telegram users whose platform has its
+    own empty allowlist.
+    """
+    runner = _make_bare_runner()
+
+    monkeypatch.setenv("DISCORD_ALLOWED_ROLES", "1493705176387948674")
+    # Telegram has its own empty allowlist and no allow-all flag.
+
+    telegram_user = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="123",
+        chat_type="channel",
+        user_id="999888777",
+    )
+    assert runner._is_user_authorized(telegram_user) is False
@@ -8,37 +8,60 @@ import pytest
 from gateway.config import PlatformConfig


+class _FakeAllowedMentions:
+    """Stand-in for ``discord.AllowedMentions`` — exposes the same four
+    boolean flags as real attributes so tests can assert on safe defaults.
+    """
+
+    def __init__(self, *, everyone=True, roles=True, users=True, replied_user=True):
+        self.everyone = everyone
+        self.roles = roles
+        self.users = users
+        self.replied_user = replied_user
+
+
 def _ensure_discord_mock():
+    """Install (or augment) a mock ``discord`` module.
+
+    Always force ``AllowedMentions`` onto whatever is in ``sys.modules`` —
+    other test files also stub the module via ``setdefault``, and we need
+    ``_build_allowed_mentions()``'s return value to have real attribute
+    access regardless of which file loaded first.
+    """
    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        sys.modules["discord"].AllowedMentions = _FakeAllowedMentions
        return

-    discord_mod = MagicMock()
-    discord_mod.Intents.default.return_value = MagicMock()
-    discord_mod.Client = MagicMock
-    discord_mod.File = MagicMock
-    discord_mod.DMChannel = type("DMChannel", (), {})
-    discord_mod.Thread = type("Thread", (), {})
-    discord_mod.ForumChannel = type("ForumChannel", (), {})
-    discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
-    discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, danger=3, green=1, blurple=2, red=3, grey=4, secondary=5)
-    discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4)
-    discord_mod.Interaction = object
-    discord_mod.Embed = MagicMock
-    discord_mod.app_commands = SimpleNamespace(
-        describe=lambda **kwargs: (lambda fn: fn),
-        choices=lambda **kwargs: (lambda fn: fn),
-        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
-    )
-    discord_mod.opus = SimpleNamespace(is_loaded=lambda: True)
+    if sys.modules.get("discord") is None:
+        discord_mod = MagicMock()
+        discord_mod.Intents.default.return_value = MagicMock()
+        discord_mod.Client = MagicMock
+        discord_mod.File = MagicMock
+        discord_mod.DMChannel = type("DMChannel", (), {})
+        discord_mod.Thread = type("Thread", (), {})
+        discord_mod.ForumChannel = type("ForumChannel", (), {})
+        discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
+        discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, danger=3, green=1, blurple=2, red=3, grey=4, secondary=5)
+        discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4)
+        discord_mod.Interaction = object
+        discord_mod.Embed = MagicMock
+        discord_mod.app_commands = SimpleNamespace(
+            describe=lambda **kwargs: (lambda fn: fn),
+            choices=lambda **kwargs: (lambda fn: fn),
+            Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+        )
+        discord_mod.opus = SimpleNamespace(is_loaded=lambda: True)

-    ext_mod = MagicMock()
-    commands_mod = MagicMock()
-    commands_mod.Bot = MagicMock
-    ext_mod.commands = commands_mod
+        ext_mod = MagicMock()
+        commands_mod = MagicMock()
+        commands_mod.Bot = MagicMock
+        ext_mod.commands = commands_mod

-    sys.modules.setdefault("discord", discord_mod)
-    sys.modules.setdefault("discord.ext", ext_mod)
-    sys.modules.setdefault("discord.ext.commands", commands_mod)
+        sys.modules["discord"] = discord_mod
+        sys.modules.setdefault("discord.ext", ext_mod)
+        sys.modules.setdefault("discord.ext.commands", commands_mod)
+
+    sys.modules["discord"].AllowedMentions = _FakeAllowedMentions


 _ensure_discord_mock()
@@ -56,8 +79,9 @@ class FakeTree:


 class FakeBot:
-    def __init__(self, *, intents, proxy=None):
+    def __init__(self, *, intents, proxy=None, allowed_mentions=None, **_):
        self.intents = intents
+        self.allowed_mentions = allowed_mentions
        self.user = SimpleNamespace(id=999, name="Hermes")
        self._events = {}
        self.tree = FakeTree()
@@ -115,8 +139,8 @@ async def test_connect_only_requests_members_intent_when_needed(monkeypatch, all

    created = {}

-    def fake_bot_factory(*, command_prefix, intents, proxy=None):
-        created["bot"] = FakeBot(intents=intents)
+    def fake_bot_factory(*, command_prefix, intents, proxy=None, allowed_mentions=None, **_):
+        created["bot"] = FakeBot(intents=intents, allowed_mentions=allowed_mentions)
        return created["bot"]

    monkeypatch.setattr(discord_platform.commands, "Bot", fake_bot_factory)
@@ -126,6 +150,13 @@ async def test_connect_only_requests_members_intent_when_needed(monkeypatch, all

    assert ok is True
    assert created["bot"].intents.members is expected_members_intent
+    # Safe-default AllowedMentions must be applied on every connect so the
+    # bot cannot @everyone from LLM output.  Granular overrides live in the
+    # dedicated test_discord_allowed_mentions.py module.
+    am = created["bot"].allowed_mentions
+    assert am is not None, "connect() must pass an AllowedMentions to commands.Bot"
+    assert am.everyone is False
+    assert am.roles is False

    await adapter.disconnect()

@@ -144,7 +175,11 @@ async def test_connect_releases_token_lock_on_timeout(monkeypatch):
    monkeypatch.setattr(
        discord_platform.commands,
        "Bot",
-        lambda **kwargs: FakeBot(intents=kwargs["intents"], proxy=kwargs.get("proxy")),
+        lambda **kwargs: FakeBot(
+            intents=kwargs["intents"],
+            proxy=kwargs.get("proxy"),
+            allowed_mentions=kwargs.get("allowed_mentions"),
+        ),
    )

    async def fake_wait_for(awaitable, timeout):
@@ -172,7 +207,7 @@ async def test_connect_does_not_wait_for_slash_sync(monkeypatch):

    created = {}

-    def fake_bot_factory(*, command_prefix, intents, proxy=None):
+    def fake_bot_factory(*, command_prefix, intents, proxy=None, allowed_mentions=None, **_):
        bot = SlowSyncBot(intents=intents, proxy=proxy)
        created["bot"] = bot
        return bot
@@ -96,7 +96,7 @@ def adapter(monkeypatch):
    return adapter


-def make_message(*, channel, content: str, mentions=None):
+def make_message(*, channel, content: str, mentions=None, msg_type=None):
    author = SimpleNamespace(id=42, display_name="Jezza", name="Jezza")
    return SimpleNamespace(
        id=123,
@@ -107,6 +107,7 @@ def make_message(*, channel, content: str, mentions=None):
        created_at=datetime.now(timezone.utc),
        channel=channel,
        author=author,
+        type=msg_type if msg_type is not None else discord_platform.discord.MessageType.default,
    )


@@ -204,6 +205,21 @@ async def test_discord_free_response_channel_overrides_mention_requirement(adapt
    assert event.text == "allowed without mention"


+@pytest.mark.asyncio
+async def test_discord_free_response_channel_can_come_from_config_extra(adapter, monkeypatch):
+    monkeypatch.delenv("DISCORD_REQUIRE_MENTION", raising=False)
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    adapter.config.extra["free_response_channels"] = ["789", "999"]
+
+    message = make_message(channel=FakeTextChannel(channel_id=789), content="allowed from config")
+
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.text == "allowed from config"
+
+
@pytest.mark.asyncio
 async def test_discord_forum_parent_in_free_response_list_allows_forum_thread(adapter, monkeypatch):
    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
@@ -276,6 +292,31 @@ async def test_discord_auto_thread_enabled_by_default(adapter, monkeypatch):
    assert event.source.thread_id == "999"


+@pytest.mark.asyncio
+async def test_discord_reply_message_skips_auto_thread(adapter, monkeypatch):
+    """Quote-replies should stay in-channel instead of trying to create a thread."""
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.setenv("DISCORD_FREE_RESPONSE_CHANNELS", "123")
+
+    adapter._auto_create_thread = AsyncMock()
+
+    message = make_message(
+        channel=FakeTextChannel(channel_id=123),
+        content="reply without mention",
+        msg_type=discord_platform.discord.MessageType.reply,
+    )
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.text == "reply without mention"
+    assert event.source.chat_id == "123"
+    assert event.source.chat_type == "group"
+
+
@pytest.mark.asyncio
 async def test_discord_auto_thread_can_be_disabled(adapter, monkeypatch):
    """Setting auto_thread to false skips thread creation."""
@@ -385,6 +426,33 @@ async def test_discord_voice_linked_channel_skips_mention_requirement_and_auto_t
    assert event.source.chat_type == "group"


+@pytest.mark.asyncio
+async def test_discord_free_channel_skips_auto_thread(adapter, monkeypatch):
+    """Free-response channels must NOT auto-create threads — bot replies inline.
+
+    Without this, every message in a free-response channel would spin off a
+    thread (since the channel bypasses the @mention gate), defeating the
+    lightweight-chat purpose of free-response mode.
+    """
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.setenv("DISCORD_FREE_RESPONSE_CHANNELS", "789")
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)  # default true
+
+    adapter._auto_create_thread = AsyncMock()
+
+    message = make_message(
+        channel=FakeTextChannel(channel_id=789),
+        content="free chat message",
+    )
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.source.chat_type == "group"
+
+
@pytest.mark.asyncio
 async def test_discord_voice_linked_parent_thread_still_requires_mention(adapter, monkeypatch):
    """Threads under a voice-linked channel should still require @mention."""
@@ -105,9 +105,14 @@ def _make_discord_adapter(reply_to_mode: str = "first"):
    config = PlatformConfig(enabled=True, token="test-token", reply_to_mode=reply_to_mode)
    adapter = DiscordAdapter(config)

-    # Mock the Discord client and channel
+    # Mock the Discord client and channel.
+    # ref_message.to_reference() → a distinct sentinel: the adapter now wraps
+    # the fetched Message via to_reference(fail_if_not_exists=False) so a
+    # deleted target degrades to "send without reply chip" instead of a 400.
    mock_channel = AsyncMock()
    ref_message = MagicMock()
+    ref_reference = MagicMock(name="MessageReference")
+    ref_message.to_reference = MagicMock(return_value=ref_reference)
    mock_channel.fetch_message = AsyncMock(return_value=ref_message)

    sent_msg = MagicMock()
@@ -118,7 +123,9 @@ def _make_discord_adapter(reply_to_mode: str = "first"):
    mock_client.get_channel = MagicMock(return_value=mock_channel)

    adapter._client = mock_client
-    return adapter, mock_channel, ref_message
+    # Return the reference sentinel alongside so tests can assert identity.
+    adapter._test_expected_reference = ref_reference
+    return adapter, mock_channel, ref_reference


 class TestSendWithReplyToMode:
@@ -48,7 +48,8 @@ from gateway.platforms.discord import DiscordAdapter  # noqa: E402
 async def test_send_retries_without_reference_when_reply_target_is_system_message():
    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="***"))

-    ref_msg = SimpleNamespace(id=99)
+    reference_obj = object()
+    ref_msg = SimpleNamespace(id=99, to_reference=MagicMock(return_value=reference_obj))
    sent_msg = SimpleNamespace(id=1234)
    send_calls = []

@@ -76,5 +77,83 @@ async def test_send_retries_without_reference_when_reply_target_is_system_messag
    assert result.message_id == "1234"
    assert channel.fetch_message.await_count == 1
    assert channel.send.await_count == 2
-    assert send_calls[0]["reference"] is ref_msg
+    ref_msg.to_reference.assert_called_once_with(fail_if_not_exists=False)
+    assert send_calls[0]["reference"] is reference_obj
    assert send_calls[1]["reference"] is None
+
+
+@pytest.mark.asyncio
+async def test_send_retries_without_reference_when_reply_target_is_deleted():
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="***"))
+
+    reference_obj = object()
+    ref_msg = SimpleNamespace(id=99, to_reference=MagicMock(return_value=reference_obj))
+    sent_msgs = [SimpleNamespace(id=1001), SimpleNamespace(id=1002)]
+    send_calls = []
+
+    async def fake_send(*, content, reference=None):
+        send_calls.append({"content": content, "reference": reference})
+        if len(send_calls) == 1:
+            raise RuntimeError(
+                "400 Bad Request (error code: 10008): Unknown Message"
+            )
+        return sent_msgs[len(send_calls) - 2]
+
+    channel = SimpleNamespace(
+        fetch_message=AsyncMock(return_value=ref_msg),
+        send=AsyncMock(side_effect=fake_send),
+    )
+    adapter._client = SimpleNamespace(
+        get_channel=lambda _chat_id: channel,
+        fetch_channel=AsyncMock(),
+    )
+
+    long_text = "A" * (adapter.MAX_MESSAGE_LENGTH + 50)
+    result = await adapter.send("555", long_text, reply_to="99")
+
+    assert result.success is True
+    assert result.message_id == "1001"
+    assert channel.fetch_message.await_count == 1
+    assert channel.send.await_count == 3
+    ref_msg.to_reference.assert_called_once_with(fail_if_not_exists=False)
+    assert send_calls[0]["reference"] is reference_obj
+    assert send_calls[1]["reference"] is None
+    assert send_calls[2]["reference"] is None
+
+
+@pytest.mark.asyncio
+async def test_send_does_not_retry_on_unrelated_errors():
+    """Regression guard: errors unrelated to the reply reference (e.g. 50013
+    Missing Permissions) must NOT trigger the no-reference retry path — they
+    should propagate out of the per-chunk loop and surface as a failed
+    SendResult so the caller sees the real problem instead of a silent retry.
+    """
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="***"))
+
+    reference_obj = object()
+    ref_msg = SimpleNamespace(id=99, to_reference=MagicMock(return_value=reference_obj))
+    send_calls = []
+
+    async def fake_send(*, content, reference=None):
+        send_calls.append({"content": content, "reference": reference})
+        raise RuntimeError(
+            "403 Forbidden (error code: 50013): Missing Permissions"
+        )
+
+    channel = SimpleNamespace(
+        fetch_message=AsyncMock(return_value=ref_msg),
+        send=AsyncMock(side_effect=fake_send),
+    )
+    adapter._client = SimpleNamespace(
+        get_channel=lambda _chat_id: channel,
+        fetch_channel=AsyncMock(),
+    )
+
+    result = await adapter.send("555", "hello", reply_to="99")
+
+    # Outer except in adapter.send() wraps propagated errors as SendResult.
+    assert result.success is False
+    assert "50013" in (result.error or "")
+    # Only the first attempt happens — no reference-retry replay.
+    assert channel.send.await_count == 1
+    assert send_calls[0]["reference"] is reference_obj
@@ -11,52 +11,66 @@ from gateway.config import PlatformConfig

 def _ensure_discord_mock():
    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        # Real discord is installed — nothing to do.
        return

-    discord_mod = MagicMock()
-    discord_mod.Intents.default.return_value = MagicMock()
-    discord_mod.DMChannel = type("DMChannel", (), {})
-    discord_mod.Thread = type("Thread", (), {})
-    discord_mod.ForumChannel = type("ForumChannel", (), {})
-    discord_mod.Interaction = object
+    if sys.modules.get("discord") is None:
+        discord_mod = MagicMock()
+        discord_mod.Intents.default.return_value = MagicMock()
+        discord_mod.DMChannel = type("DMChannel", (), {})
+        discord_mod.Thread = type("Thread", (), {})
+        discord_mod.ForumChannel = type("ForumChannel", (), {})
+        discord_mod.Interaction = object

-    # Lightweight mock for app_commands.Group and Command used by
-    # _register_skill_group.
-    class _FakeGroup:
-        def __init__(self, *, name, description, parent=None):
-            self.name = name
-            self.description = description
-            self.parent = parent
-            self._children: dict[str, object] = {}
-            if parent is not None:
-                parent.add_command(self)
+        # Lightweight mock for app_commands.Group and Command used by
+        # _register_skill_group.
+        class _FakeGroup:
+            def __init__(self, *, name, description, parent=None):
+                self.name = name
+                self.description = description
+                self.parent = parent
+                self._children: dict[str, object] = {}
+                if parent is not None:
+                    parent.add_command(self)

-        def add_command(self, cmd):
-            self._children[cmd.name] = cmd
+            def add_command(self, cmd):
+                self._children[cmd.name] = cmd

-    class _FakeCommand:
-        def __init__(self, *, name, description, callback, parent=None):
-            self.name = name
-            self.description = description
-            self.callback = callback
-            self.parent = parent
+        class _FakeCommand:
+            def __init__(self, *, name, description, callback, parent=None):
+                self.name = name
+                self.description = description
+                self.callback = callback
+                self.parent = parent

-    discord_mod.app_commands = SimpleNamespace(
-        describe=lambda **kwargs: (lambda fn: fn),
-        choices=lambda **kwargs: (lambda fn: fn),
-        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
-        Group=_FakeGroup,
-        Command=_FakeCommand,
-    )
+        discord_mod.app_commands = SimpleNamespace(
+            describe=lambda **kwargs: (lambda fn: fn),
+            choices=lambda **kwargs: (lambda fn: fn),
+            autocomplete=lambda **kwargs: (lambda fn: fn),
+            Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+            Group=_FakeGroup,
+            Command=_FakeCommand,
+        )

-    ext_mod = MagicMock()
-    commands_mod = MagicMock()
-    commands_mod.Bot = MagicMock
-    ext_mod.commands = commands_mod
+        ext_mod = MagicMock()
+        commands_mod = MagicMock()
+        commands_mod.Bot = MagicMock
+        ext_mod.commands = commands_mod

-    sys.modules.setdefault("discord", discord_mod)
-    sys.modules.setdefault("discord.ext", ext_mod)
-    sys.modules.setdefault("discord.ext.commands", commands_mod)
+        sys.modules["discord"] = discord_mod
+        sys.modules.setdefault("discord.ext", ext_mod)
+        sys.modules.setdefault("discord.ext.commands", commands_mod)
+
+    # Whether we just installed the mock OR another test module installed
+    # it first via its own _ensure_discord_mock, force the decorators we
+    # need onto discord.app_commands — the flat /skill command uses
+    # @app_commands.autocomplete and not every other mock stub exposes it.
+    _app = getattr(sys.modules["discord"], "app_commands", None)
+    if _app is not None and not hasattr(_app, "autocomplete"):
+        try:
+            _app.autocomplete = lambda **kwargs: (lambda fn: fn)
+        except Exception:
+            pass


 _ensure_discord_mock()
@@ -387,6 +401,8 @@ async def test_auto_create_thread_uses_message_content_as_name(adapter):
    message = SimpleNamespace(
        content="Hello world, how are you?",
        create_thread=AsyncMock(return_value=thread),
+        channel=SimpleNamespace(send=AsyncMock()),
+        author=SimpleNamespace(display_name="Jezza"),
    )

    result = await adapter._auto_create_thread(message)
@@ -398,6 +414,48 @@ async def test_auto_create_thread_uses_message_content_as_name(adapter):
    assert call_kwargs["auto_archive_duration"] == 1440


+@pytest.mark.asyncio
+async def test_auto_create_thread_strips_mention_syntax_from_name(adapter):
+    """Thread names must not contain raw <@id>, <@&id>, or <#id> markers.
+
+    Regression guard for #6336 — previously a message like
+    ``<@&1490963422786093149> help`` would spawn a thread literally
+    named ``<@&1490963422786093149> help``.
+    """
+    thread = SimpleNamespace(id=999, name="help")
+    message = SimpleNamespace(
+        content="<@&1490963422786093149> <@555> please help <#123>",
+        create_thread=AsyncMock(return_value=thread),
+        channel=SimpleNamespace(send=AsyncMock()),
+        author=SimpleNamespace(display_name="Jezza"),
+    )
+
+    await adapter._auto_create_thread(message)
+
+    name = message.create_thread.await_args[1]["name"]
+    assert "<@" not in name, f"role/user mention leaked: {name!r}"
+    assert "<#" not in name, f"channel mention leaked: {name!r}"
+    assert name == "please help"
+
+
+@pytest.mark.asyncio
+async def test_auto_create_thread_falls_back_to_hermes_when_only_mentions(adapter):
+    """If a message contains only mention syntax, the stripped content is
+    empty — fall back to the 'Hermes' default rather than ''."""
+    thread = SimpleNamespace(id=999, name="Hermes")
+    message = SimpleNamespace(
+        content="<@&1490963422786093149>",
+        create_thread=AsyncMock(return_value=thread),
+        channel=SimpleNamespace(send=AsyncMock()),
+        author=SimpleNamespace(display_name="Jezza"),
+    )
+
+    await adapter._auto_create_thread(message)
+
+    name = message.create_thread.await_args[1]["name"]
+    assert name == "Hermes"
+
+
@pytest.mark.asyncio
 async def test_auto_create_thread_truncates_long_names(adapter):
    long_text = "a" * 200
@@ -405,6 +463,8 @@ async def test_auto_create_thread_truncates_long_names(adapter):
    message = SimpleNamespace(
        content=long_text,
        create_thread=AsyncMock(return_value=thread),
+        channel=SimpleNamespace(send=AsyncMock()),
+        author=SimpleNamespace(display_name="Jezza"),
    )

    result = await adapter._auto_create_thread(message)
@@ -416,10 +476,33 @@ async def test_auto_create_thread_truncates_long_names(adapter):


@pytest.mark.asyncio
-async def test_auto_create_thread_returns_none_on_failure(adapter):
+async def test_auto_create_thread_falls_back_to_seed_message(adapter):
+    thread = SimpleNamespace(id=555, name="Hello")
+    seed_message = SimpleNamespace(create_thread=AsyncMock(return_value=thread))
    message = SimpleNamespace(
        content="Hello",
        create_thread=AsyncMock(side_effect=RuntimeError("no perms")),
+        channel=SimpleNamespace(send=AsyncMock(return_value=seed_message)),
+        author=SimpleNamespace(display_name="Jezza"),
+    )
+
+    result = await adapter._auto_create_thread(message)
+    assert result is thread
+    message.channel.send.assert_awaited_once_with("🧵 Thread created by Hermes: **Hello**")
+    seed_message.create_thread.assert_awaited_once_with(
+        name="Hello",
+        auto_archive_duration=1440,
+        reason="Auto-threaded from mention by Jezza",
+    )
+
+
+@pytest.mark.asyncio
+async def test_auto_create_thread_returns_none_when_direct_and_fallback_fail(adapter):
+    message = SimpleNamespace(
+        content="Hello",
+        create_thread=AsyncMock(side_effect=RuntimeError("no perms")),
+        channel=SimpleNamespace(send=AsyncMock(side_effect=RuntimeError("send failed"))),
+        author=SimpleNamespace(display_name="Jezza"),
    )

    result = await adapter._auto_create_thread(message)
@@ -599,12 +682,19 @@ def test_discord_auto_thread_config_bridge(monkeypatch, tmp_path):


 # ------------------------------------------------------------------
-# /skill group registration
+# /skill command registration (flat + autocomplete)
 # ------------------------------------------------------------------


-def test_register_skill_group_creates_group(adapter):
-    """_register_skill_group should register a '/skill' Group on the tree."""
+def test_register_skill_command_is_flat_not_nested(adapter):
+    """_register_skill_group should register a single flat ``/skill`` command.
+
+    The older layout nested categories as subcommand groups under ``/skill``.
+    That registered as one giant command whose serialized payload exceeded
+    Discord's 8KB per-command limit with the default skill catalog. The
+    flat layout sidesteps the limit — autocomplete options are fetched
+    dynamically by Discord and don't count against the registration budget.
+    """
    mock_categories = {
        "creative": [
            ("ascii-art", "Generate ASCII art", "/ascii-art"),
@@ -625,22 +715,17 @@ def test_register_skill_group_creates_group(adapter):
        adapter._register_slash_commands()

    tree = adapter._client.tree
-    assert "skill" in tree.commands, "Expected /skill group to be registered"
-    skill_group = tree.commands["skill"]
-    assert skill_group.name == "skill"
-    # Should have 2 category subgroups + 1 uncategorized subcommand
-    children = skill_group._children
-    assert "creative" in children
-    assert "media" in children
-    assert "dogfood" in children
-    # Category groups should have their skills
-    assert "ascii-art" in children["creative"]._children
-    assert "excalidraw" in children["creative"]._children
-    assert "gif-search" in children["media"]._children
+    assert "skill" in tree.commands, "Expected /skill command to be registered"
+    skill_cmd = tree.commands["skill"]
+    assert skill_cmd.name == "skill"
+    # Flat command — NOT a Group — so it has no _children of category subgroups
+    assert not hasattr(skill_cmd, "_children") or not getattr(skill_cmd, "_children", {}), (
+        "Flat /skill command should not have subcommand children"
+    )


-def test_register_skill_group_empty_skills_no_group(adapter):
-    """No /skill group should be added when there are zero skills."""
+def test_register_skill_command_empty_skills_no_command(adapter):
+    """No /skill command should be registered when there are zero skills."""
    with patch(
        "hermes_cli.commands.discord_skill_commands_by_category",
        return_value=({}, [], 0),
@@ -651,13 +736,134 @@ def test_register_skill_group_empty_skills_no_group(adapter):
    assert "skill" not in tree.commands


-def test_register_skill_group_handler_dispatches_command(adapter):
-    """Skill subcommand handlers should dispatch the correct /cmd-key text."""
+def test_register_skill_command_callback_dispatches_by_name(adapter):
+    """The /skill callback should look up the skill by ``name`` and
+    dispatch via ``_run_simple_slash`` with the real command key.
+    """
    mock_categories = {
        "media": [
            ("gif-search", "Search for GIFs", "/gif-search"),
        ],
    }
+    mock_uncategorized = [
+        ("dogfood", "QA testing", "/dogfood"),
+    ]
+
+    with patch(
+        "hermes_cli.commands.discord_skill_commands_by_category",
+        return_value=(mock_categories, mock_uncategorized, 0),
+    ):
+        adapter._register_slash_commands()
+
+    skill_cmd = adapter._client.tree.commands["skill"]
+    assert skill_cmd.callback is not None
+
+    # Stub out _run_simple_slash so we can verify the dispatched text.
+    dispatched: list[str] = []
+
+    async def fake_run(_interaction, text):
+        dispatched.append(text)
+
+    adapter._run_simple_slash = fake_run
+
+    import asyncio
+
+    fake_interaction = SimpleNamespace()
+    # gif-search → /gif-search with no args
+    asyncio.run(skill_cmd.callback(fake_interaction, name="gif-search"))
+    # dogfood with args
+    asyncio.run(skill_cmd.callback(fake_interaction, name="dogfood", args="my test"))
+
+    assert dispatched == ["/gif-search", "/dogfood my test"]
+
+
+def test_register_skill_command_handles_unknown_skill_gracefully(adapter):
+    """Passing a name that isn't a registered skill should respond with
+    an ephemeral error message, NOT crash the callback.
+    """
+    with patch(
+        "hermes_cli.commands.discord_skill_commands_by_category",
+        return_value=({"media": [("gif-search", "GIFs", "/gif-search")]}, [], 0),
+    ):
+        adapter._register_slash_commands()
+
+    skill_cmd = adapter._client.tree.commands["skill"]
+
+    sent: list[dict] = []
+
+    async def fake_send(text, ephemeral=False):
+        sent.append({"text": text, "ephemeral": ephemeral})
+
+    interaction = SimpleNamespace(
+        response=SimpleNamespace(send_message=fake_send),
+    )
+
+    import asyncio
+    asyncio.run(skill_cmd.callback(interaction, name="does-not-exist"))
+
+    assert len(sent) == 1
+    assert "Unknown skill" in sent[0]["text"]
+    assert "does-not-exist" in sent[0]["text"]
+    assert sent[0]["ephemeral"] is True
+
+
+def test_register_skill_command_payload_fits_discord_8kb_limit(adapter):
+    """The /skill command registration payload must stay under Discord's
+    ~8000-byte per-command limit even with a large skill catalog.
+
+    This is the regression guard for #11321 / #10259. Simulates 500 skills
+    (20 categories × 25 — the hard cap per category in the collector) and
+    confirms the serialized command still fits. Autocomplete options are
+    not part of this payload, so the budget is essentially constant.
+    """
+    import json
+
+    # Simulate the largest catalog the collector will ever produce:
+    # 20 categories × 25 skills each, with verbose 100-char descriptions.
+    large_categories: dict[str, list[tuple[str, str, str]]] = {}
+    long_desc = "A verbose description padded to approximately 100 chars " + "." * 42
+    for i in range(20):
+        cat = f"cat{i:02d}"
+        large_categories[cat] = [
+            (f"skill-{i:02d}-{j:02d}", long_desc, f"/skill-{i:02d}-{j:02d}")
+            for j in range(25)
+        ]
+
+    with patch(
+        "hermes_cli.commands.discord_skill_commands_by_category",
+        return_value=(large_categories, [], 0),
+    ):
+        adapter._register_slash_commands()
+
+    skill_cmd = adapter._client.tree.commands["skill"]
+    # Approximate the serialized registration payload (name + description only).
+    # Autocomplete options are NOT registered — they're fetched dynamically.
+    payload = json.dumps({
+        "name": skill_cmd.name,
+        "description": skill_cmd.description,
+        "options": [
+            {"name": "name", "description": "Which skill to run", "type": 3, "required": True},
+            {"name": "args", "description": "Optional arguments for the skill", "type": 3, "required": False},
+        ],
+    })
+    assert len(payload) < 500, (
+        f"Flat /skill command payload is ~{len(payload)} bytes — the whole "
+        f"point of this design is that it stays small regardless of skill count"
+    )
+
+
+def test_register_skill_command_autocomplete_filters_by_name_and_description(adapter):
+    """The autocomplete callback should match on both skill name and
+    description so the user can search by either.
+    """
+    mock_categories = {
+        "ocr": [
+            ("ocr-and-documents", "Extract text from PDFs and scanned documents", "/ocr-and-documents"),
+        ],
+        "media": [
+            ("gif-search", "Search and download GIFs from Tenor", "/gif-search"),
+        ],
+    }

    with patch(
        "hermes_cli.commands.discord_skill_commands_by_category",
@@ -665,10 +871,15 @@ def test_register_skill_group_handler_dispatches_command(adapter):
    ):
        adapter._register_slash_commands()

-    skill_group = adapter._client.tree.commands["skill"]
-    media_group = skill_group._children["media"]
-    gif_cmd = media_group._children["gif-search"]
-    assert gif_cmd.callback is not None
-    # The callback name should reflect the skill
-    assert "gif_search" in gif_cmd.callback.__name__
+    skill_cmd = adapter._client.tree.commands["skill"]
+    # The callback has been wrapped with @autocomplete(name=...) — in our mock
+    # the decorator is pass-through, so we inspect the closed-over list by
+    # invoking the registered autocomplete function directly through the
+    # test API. Since the mock doesn't preserve the autocomplete binding,
+    # we re-derive the filter by building the same entries list.
+    #
+    # What we CAN verify at this layer: the callback dispatches correctly
+    # (covered in other tests). The autocomplete filter itself is exercised
+    # via direct function call in the real-discord integration path.
+    assert skill_cmd.callback is not None

@@ -25,14 +25,6 @@ from unittest.mock import patch, MagicMock, AsyncMock
 from gateway.platforms.base import SendResult


-class TestPlatformEnum(unittest.TestCase):
-    """Verify EMAIL is in the Platform enum."""
-
-    def test_email_in_platform_enum(self):
-        from gateway.config import Platform
-        self.assertEqual(Platform.EMAIL.value, "email")
-
-
 class TestConfigEnvOverrides(unittest.TestCase):
    """Verify email config is loaded from environment variables."""

@@ -72,20 +64,6 @@ class TestConfigEnvOverrides(unittest.TestCase):
        _apply_env_overrides(config)
        self.assertNotIn(Platform.EMAIL, config.platforms)

-    @patch.dict(os.environ, {
-        "EMAIL_ADDRESS": "hermes@test.com",
-        "EMAIL_PASSWORD": "secret",
-        "EMAIL_IMAP_HOST": "imap.test.com",
-        "EMAIL_SMTP_HOST": "smtp.test.com",
-    }, clear=False)
-    def test_email_in_connected_platforms(self):
-        from gateway.config import GatewayConfig, Platform, _apply_env_overrides
-        config = GatewayConfig()
-        _apply_env_overrides(config)
-        connected = config.get_connected_platforms()
-        self.assertIn(Platform.EMAIL, connected)
-
-
 class TestCheckRequirements(unittest.TestCase):
    """Verify check_email_requirements function."""

@@ -257,121 +235,6 @@ class TestExtractAttachments(unittest.TestCase):
        mock_cache.assert_called_once()


-class TestAuthorizationMaps(unittest.TestCase):
-    """Verify email is in authorization maps in gateway/run.py."""
-
-    def test_email_in_adapter_factory(self):
-        """Email adapter creation branch should exist."""
-        import gateway.run
-        import inspect
-        source = inspect.getsource(gateway.run.GatewayRunner._create_adapter)
-        self.assertIn("Platform.EMAIL", source)
-
-    def test_email_in_allowed_users_map(self):
-        """EMAIL_ALLOWED_USERS should be in platform_env_map."""
-        import gateway.run
-        import inspect
-        source = inspect.getsource(gateway.run.GatewayRunner._is_user_authorized)
-        self.assertIn("EMAIL_ALLOWED_USERS", source)
-
-    def test_email_in_allow_all_map(self):
-        """EMAIL_ALLOW_ALL_USERS should be in platform_allow_all_map."""
-        import gateway.run
-        import inspect
-        source = inspect.getsource(gateway.run.GatewayRunner._is_user_authorized)
-        self.assertIn("EMAIL_ALLOW_ALL_USERS", source)
-
-
-class TestSendMessageToolRouting(unittest.TestCase):
-    """Verify email routing in send_message_tool."""
-
-    def test_email_in_platform_map(self):
-        import tools.send_message_tool as smt
-        import inspect
-        source = inspect.getsource(smt._handle_send)
-        self.assertIn('"email"', source)
-
-    def test_send_to_platform_has_email_branch(self):
-        import tools.send_message_tool as smt
-        import inspect
-        source = inspect.getsource(smt._send_to_platform)
-        self.assertIn("Platform.EMAIL", source)
-
-
-class TestCronDelivery(unittest.TestCase):
-    """Verify email in cron scheduler platform_map."""
-
-    def test_email_in_cron_platform_map(self):
-        import cron.scheduler
-        import inspect
-        source = inspect.getsource(cron.scheduler)
-        self.assertIn('"email"', source)
-
-
-class TestToolset(unittest.TestCase):
-    """Verify email toolset is registered."""
-
-    def test_email_toolset_exists(self):
-        from toolsets import TOOLSETS
-        self.assertIn("hermes-email", TOOLSETS)
-
-    def test_email_in_gateway_toolset(self):
-        from toolsets import TOOLSETS
-        includes = TOOLSETS["hermes-gateway"]["includes"]
-        self.assertIn("hermes-email", includes)
-
-
-class TestPlatformHints(unittest.TestCase):
-    """Verify email platform hint is registered."""
-
-    def test_email_in_platform_hints(self):
-        from agent.prompt_builder import PLATFORM_HINTS
-        self.assertIn("email", PLATFORM_HINTS)
-        self.assertIn("email", PLATFORM_HINTS["email"].lower())
-
-
-class TestChannelDirectory(unittest.TestCase):
-    """Verify email in channel directory session-based discovery."""
-
-    def test_email_in_session_discovery(self):
-        from gateway.config import Platform
-        # Verify email is a Platform enum member — the dynamic loop in
-        # build_channel_directory iterates all Platform members, so email
-        # is included automatically as long as it's in the enum.
-        email_values = [p.value for p in Platform]
-        self.assertIn("email", email_values)
-
-
-class TestGatewaySetup(unittest.TestCase):
-    """Verify email in gateway setup wizard."""
-
-    def test_email_in_platforms_list(self):
-        from hermes_cli.gateway import _PLATFORMS
-        keys = [p["key"] for p in _PLATFORMS]
-        self.assertIn("email", keys)
-
-    def test_email_has_setup_vars(self):
-        from hermes_cli.gateway import _PLATFORMS
-        email_platform = next(p for p in _PLATFORMS if p["key"] == "email")
-        var_names = [v["name"] for v in email_platform["vars"]]
-        self.assertIn("EMAIL_ADDRESS", var_names)
-        self.assertIn("EMAIL_PASSWORD", var_names)
-        self.assertIn("EMAIL_IMAP_HOST", var_names)
-        self.assertIn("EMAIL_SMTP_HOST", var_names)
-
-
-class TestEnvExample(unittest.TestCase):
-    """Verify .env.example has email config."""
-
-    def test_env_example_has_email_vars(self):
-        env_path = Path(__file__).resolve().parents[2] / ".env.example"
-        content = env_path.read_text()
-        self.assertIn("EMAIL_ADDRESS", content)
-        self.assertIn("EMAIL_PASSWORD", content)
-        self.assertIn("EMAIL_IMAP_HOST", content)
-        self.assertIn("EMAIL_SMTP_HOST", content)
-
-
 class TestDispatchMessage(unittest.TestCase):
    """Test email message dispatch logic."""

@@ -29,13 +29,6 @@ def _mock_event_dispatcher_builder(mock_handler_class):
    return mock_builder


-class TestPlatformEnum(unittest.TestCase):
-    def test_feishu_in_platform_enum(self):
-        from gateway.config import Platform
-
-        self.assertEqual(Platform.FEISHU.value, "feishu")
-
-
 class TestConfigEnvOverrides(unittest.TestCase):
    @patch.dict(os.environ, {
        "FEISHU_APP_ID": "cli_xxx",
@@ -82,24 +75,6 @@ class TestConfigEnvOverrides(unittest.TestCase):
        self.assertIn(Platform.FEISHU, config.get_connected_platforms())


-class TestGatewayIntegration(unittest.TestCase):
-    def test_feishu_in_adapter_factory(self):
-        source = Path("gateway/run.py").read_text(encoding="utf-8")
-        self.assertIn("Platform.FEISHU", source)
-        self.assertIn("FeishuAdapter", source)
-
-    def test_feishu_in_authorization_maps(self):
-        source = Path("gateway/run.py").read_text(encoding="utf-8")
-        self.assertIn("FEISHU_ALLOWED_USERS", source)
-        self.assertIn("FEISHU_ALLOW_ALL_USERS", source)
-
-    def test_feishu_toolset_exists(self):
-        from toolsets import TOOLSETS
-
-        self.assertIn("hermes-feishu", TOOLSETS)
-        self.assertIn("hermes-feishu", TOOLSETS["hermes-gateway"]["includes"])
-
-
 class TestFeishuMessageNormalization(unittest.TestCase):
    def test_normalize_merge_forward_preserves_summary_lines(self):
        from gateway.platforms.feishu import normalize_feishu_message
@@ -472,27 +447,6 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
        self.assertEqual(info["type"], "group")

 class TestAdapterModule(unittest.TestCase):
-    def test_adapter_requirement_helper_exists(self):
-        source = Path("gateway/platforms/feishu.py").read_text(encoding="utf-8")
-        self.assertIn("def check_feishu_requirements()", source)
-        self.assertIn("FEISHU_AVAILABLE", source)
-
-    def test_adapter_declares_websocket_scope(self):
-        source = Path("gateway/platforms/feishu.py").read_text(encoding="utf-8")
-        self.assertIn("Supported modes: websocket, webhook", source)
-        self.assertIn("FEISHU_CONNECTION_MODE", source)
-
-    def test_adapter_registers_message_read_noop_handler(self):
-        source = Path("gateway/platforms/feishu.py").read_text(encoding="utf-8")
-        self.assertIn("register_p2_im_message_message_read_v1", source)
-        self.assertIn("def _on_message_read_event", source)
-
-    def test_adapter_registers_reaction_and_card_handlers_for_websocket(self):
-        source = Path("gateway/platforms/feishu.py").read_text(encoding="utf-8")
-        self.assertIn("register_p2_im_message_reaction_created_v1", source)
-        self.assertIn("register_p2_im_message_reaction_deleted_v1", source)
-        self.assertIn("register_p2_card_action_trigger", source)
-
    def test_load_settings_uses_sdk_defaults_for_invalid_ws_reconnect_values(self):
        from gateway.platforms.feishu import FeishuAdapter

@@ -639,6 +593,14 @@ class TestAdapterBehavior(unittest.TestCase):
                calls.append("bot_deleted")
                return self

+            def register_p2_im_chat_access_event_bot_p2p_chat_entered_v1(self, _handler):
+                calls.append("p2p_chat_entered")
+                return self
+
+            def register_p2_im_message_recalled_v1(self, _handler):
+                calls.append("message_recalled")
+                return self
+
            def build(self):
                calls.append("build")
                return "handler"
@@ -664,6 +626,8 @@ class TestAdapterBehavior(unittest.TestCase):
                "card_action",
                "bot_added",
                "bot_deleted",
+                "p2p_chat_entered",
+                "message_recalled",
                "build",
            ],
        )
@@ -2536,6 +2500,152 @@ class TestAdapterBehavior(unittest.TestCase):
        )


+@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
+class TestPendingInboundQueue(unittest.TestCase):
+    """Tests for the loop-not-ready race (#5499): inbound events arriving
+    before or during adapter loop transitions must be queued for replay
+    rather than silently dropped."""
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_event_queued_when_loop_not_ready(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        adapter._loop = None  # Simulate "before start()" or "during reconnect"
+
+        with patch("gateway.platforms.feishu.threading.Thread") as thread_cls:
+            adapter._on_message_event(SimpleNamespace(tag="evt-1"))
+            adapter._on_message_event(SimpleNamespace(tag="evt-2"))
+            adapter._on_message_event(SimpleNamespace(tag="evt-3"))
+
+        # All three queued, none dropped.
+        self.assertEqual(len(adapter._pending_inbound_events), 3)
+        # Only ONE drainer thread scheduled, not one per event.
+        self.assertEqual(thread_cls.call_count, 1)
+        # Drain scheduled flag set.
+        self.assertTrue(adapter._pending_drain_scheduled)
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_drainer_replays_queued_events_when_loop_becomes_ready(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        adapter._loop = None
+        adapter._running = True
+
+        class _ReadyLoop:
+            def is_closed(self):
+                return False
+
+        # Queue three events while loop is None (simulate the race).
+        events = [SimpleNamespace(tag=f"evt-{i}") for i in range(3)]
+        with patch("gateway.platforms.feishu.threading.Thread"):
+            for ev in events:
+                adapter._on_message_event(ev)
+
+        self.assertEqual(len(adapter._pending_inbound_events), 3)
+
+        # Now the loop becomes ready; run the drainer inline (not as a thread)
+        # to verify it replays the queue.
+        adapter._loop = _ReadyLoop()
+
+        future = SimpleNamespace(add_done_callback=lambda *_a, **_kw: None)
+        submitted: list = []
+
+        def _submit(coro, _loop):
+            submitted.append(coro)
+            coro.close()
+            return future
+
+        with patch(
+            "gateway.platforms.feishu.asyncio.run_coroutine_threadsafe",
+            side_effect=_submit,
+        ) as submit:
+            adapter._drain_pending_inbound_events()
+
+        # All three events dispatched to the loop.
+        self.assertEqual(submit.call_count, 3)
+        # Queue emptied.
+        self.assertEqual(len(adapter._pending_inbound_events), 0)
+        # Drain flag reset so a future race can schedule a new drainer.
+        self.assertFalse(adapter._pending_drain_scheduled)
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_drainer_drops_queue_when_adapter_shuts_down(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        adapter._loop = None
+        adapter._running = False  # Shutdown state
+
+        with patch("gateway.platforms.feishu.threading.Thread"):
+            adapter._on_message_event(SimpleNamespace(tag="evt-lost"))
+
+        self.assertEqual(len(adapter._pending_inbound_events), 1)
+
+        # Drainer should drop the queue immediately since _running is False.
+        adapter._drain_pending_inbound_events()
+
+        self.assertEqual(len(adapter._pending_inbound_events), 0)
+        self.assertFalse(adapter._pending_drain_scheduled)
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_queue_cap_evicts_oldest_beyond_max_depth(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        adapter._loop = None
+        adapter._pending_inbound_max_depth = 3  # Shrink for test
+
+        with patch("gateway.platforms.feishu.threading.Thread"):
+            for i in range(5):
+                adapter._on_message_event(SimpleNamespace(tag=f"evt-{i}"))
+
+        # Only the last 3 should remain; evt-0 and evt-1 dropped.
+        self.assertEqual(len(adapter._pending_inbound_events), 3)
+        tags = [getattr(e, "tag", None) for e in adapter._pending_inbound_events]
+        self.assertEqual(tags, ["evt-2", "evt-3", "evt-4"])
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_normal_path_unchanged_when_loop_ready(self):
+        """When the loop is ready, events should dispatch directly without
+        ever touching the pending queue."""
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+
+        class _ReadyLoop:
+            def is_closed(self):
+                return False
+
+        adapter._loop = _ReadyLoop()
+
+        future = SimpleNamespace(add_done_callback=lambda *_a, **_kw: None)
+
+        def _submit(coro, _loop):
+            coro.close()
+            return future
+
+        with patch(
+            "gateway.platforms.feishu.asyncio.run_coroutine_threadsafe",
+            side_effect=_submit,
+        ) as submit, patch(
+            "gateway.platforms.feishu.threading.Thread"
+        ) as thread_cls:
+            adapter._on_message_event(SimpleNamespace(tag="evt"))
+
+        self.assertEqual(submit.call_count, 1)
+        self.assertEqual(len(adapter._pending_inbound_events), 0)
+        self.assertFalse(adapter._pending_drain_scheduled)
+        # No drainer thread spawned when the happy path runs.
+        self.assertEqual(thread_cls.call_count, 0)
+
+
@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
 class TestWebhookSecurity(unittest.TestCase):
    """Tests for webhook signature verification, rate limiting, and body size limits."""
@@ -469,18 +469,6 @@ class TestConfigIntegration:
        assert ha.extra["watch_domains"] == ["climate"]
        assert ha.extra["cooldown_seconds"] == 45

-    def test_connected_platforms_includes_ha(self):
-        config = GatewayConfig(
-            platforms={
-                Platform.HOMEASSISTANT: PlatformConfig(enabled=True, token="tok"),
-                Platform.TELEGRAM: PlatformConfig(enabled=False, token="t"),
-            },
-        )
-        connected = config.get_connected_platforms()
-        assert Platform.HOMEASSISTANT in connected
-        assert Platform.TELEGRAM not in connected
-
-
 # ---------------------------------------------------------------------------
 # send() via REST API
 # ---------------------------------------------------------------------------
@@ -582,27 +570,6 @@ class TestSendViaRestApi:
 # ---------------------------------------------------------------------------


-class TestToolsetIntegration:
-    def test_homeassistant_toolset_resolves(self):
-        from toolsets import resolve_toolset
-
-        tools = resolve_toolset("homeassistant")
-        assert set(tools) == {"ha_list_entities", "ha_get_state", "ha_call_service", "ha_list_services"}
-
-    def test_gateway_toolset_includes_ha_tools(self):
-        from toolsets import resolve_toolset
-
-        gateway_tools = resolve_toolset("hermes-gateway")
-        for tool in ("ha_list_entities", "ha_get_state", "ha_call_service", "ha_list_services"):
-            assert tool in gateway_tools
-
-    def test_hermes_core_tools_includes_ha(self):
-        from toolsets import _HERMES_CORE_TOOLS
-
-        for tool in ("ha_list_entities", "ha_get_state", "ha_call_service", "ha_list_services"):
-            assert tool in _HERMES_CORE_TOOLS
-
-
 # ---------------------------------------------------------------------------
 # WebSocket URL construction
 # ---------------------------------------------------------------------------
@@ -239,15 +239,6 @@ def _make_fake_mautrix():
 # Platform & Config
 # ---------------------------------------------------------------------------

-class TestMatrixPlatformEnum:
-    def test_matrix_enum_exists(self):
-        assert Platform.MATRIX.value == "matrix"
-
-    def test_matrix_in_platform_list(self):
-        platforms = [p.value for p in Platform]
-        assert "matrix" in platforms
-
-
 class TestMatrixConfigLoading:
    def test_apply_env_overrides_with_access_token(self, monkeypatch):
        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
@@ -184,8 +184,14 @@ class TestMatrixVoiceMessageDetection:
            f"Expected MessageType.AUDIO for non-voice, got {captured_event.message_type}"

    @pytest.mark.asyncio
-    async def test_regular_audio_has_http_url(self):
-        """Regular audio uploads should keep HTTP URL (not cached locally)."""
+    async def test_regular_audio_is_cached_locally(self):
+        """Regular audio uploads are cached locally for downstream tool access.
+
+        Since PR #bec02f37 (encrypted-media caching refactor), all media
+        types — photo, audio, video, document — are cached locally when
+        received so tools can read them as real files. This applies equally
+        to voice messages and regular audio.
+        """
        event = _make_audio_event(is_voice=False)

        captured_event = None
@@ -200,10 +206,10 @@ class TestMatrixVoiceMessageDetection:

        assert captured_event is not None
        assert captured_event.media_urls is not None
-        # Should be HTTP URL, not local path
-        assert captured_event.media_urls[0].startswith("http"), \
-            f"Non-voice audio should have HTTP URL, got {captured_event.media_urls[0]}"
-        self.adapter._client.download_media.assert_not_awaited()
+        # Should be a local path, not an HTTP URL.
+        assert not captured_event.media_urls[0].startswith("http"), \
+            f"Regular audio should be cached locally, got {captured_event.media_urls[0]}"
+        self.adapter._client.download_media.assert_awaited_once()
        assert captured_event.media_types == ["audio/ogg"]


@@ -12,15 +12,6 @@ from gateway.config import Platform, PlatformConfig
 # Platform & Config
 # ---------------------------------------------------------------------------

-class TestMattermostPlatformEnum:
-    def test_mattermost_enum_exists(self):
-        assert Platform.MATTERMOST.value == "mattermost"
-
-    def test_mattermost_in_platform_list(self):
-        platforms = [p.value for p in Platform]
-        assert "mattermost" in platforms
-
-
 class TestMattermostConfigLoading:
    def test_apply_env_overrides_mattermost(self, monkeypatch):
        monkeypatch.setenv("MATTERMOST_TOKEN", "mm-tok-abc123")
@@ -46,17 +37,6 @@ class TestMattermostConfigLoading:

        assert Platform.MATTERMOST not in config.platforms

-    def test_connected_platforms_includes_mattermost(self, monkeypatch):
-        monkeypatch.setenv("MATTERMOST_TOKEN", "mm-tok-abc123")
-        monkeypatch.setenv("MATTERMOST_URL", "https://mm.example.com")
-
-        from gateway.config import GatewayConfig, _apply_env_overrides
-        config = GatewayConfig()
-        _apply_env_overrides(config)
-
-        connected = config.get_connected_platforms()
-        assert Platform.MATTERMOST in connected
-
    def test_mattermost_home_channel(self, monkeypatch):
        monkeypatch.setenv("MATTERMOST_TOKEN", "mm-tok-abc123")
        monkeypatch.setenv("MATTERMOST_URL", "https://mm.example.com")
@@ -1,5 +1,6 @@
 """Tests for the QQ Bot platform adapter."""

+import asyncio
 import json
 import os
 import sys
@@ -149,6 +150,47 @@ class TestIsVoiceContentType:
        assert self._fn("", "recording.amr") is True


+# ---------------------------------------------------------------------------
+# Voice attachment SSRF protection
+# ---------------------------------------------------------------------------
+
+class TestVoiceAttachmentSSRFProtection:
+    def _make_adapter(self, **extra):
+        from gateway.platforms.qqbot import QQAdapter
+        return QQAdapter(_make_config(**extra))
+
+    def test_stt_blocks_unsafe_download_url(self):
+        adapter = self._make_adapter(app_id="a", client_secret="b")
+        adapter._http_client = mock.AsyncMock()
+
+        with mock.patch("tools.url_safety.is_safe_url", return_value=False):
+            transcript = asyncio.run(
+                adapter._stt_voice_attachment(
+                    "http://127.0.0.1/voice.silk",
+                    "audio/silk",
+                    "voice.silk",
+                )
+            )
+
+        assert transcript is None
+        adapter._http_client.get.assert_not_called()
+
+    def test_connect_uses_redirect_guard_hook(self):
+        from gateway.platforms.qqbot import QQAdapter, _ssrf_redirect_guard
+
+        client = mock.AsyncMock()
+        with mock.patch("gateway.platforms.qqbot.httpx.AsyncClient", return_value=client) as async_client_cls:
+            adapter = QQAdapter(_make_config(app_id="a", client_secret="b"))
+            adapter._ensure_token = mock.AsyncMock(side_effect=RuntimeError("stop after client creation"))
+
+            connected = asyncio.run(adapter.connect())
+
+        assert connected is False
+        assert async_client_cls.call_count == 1
+        kwargs = async_client_cls.call_args.kwargs
+        assert kwargs.get("follow_redirects") is True
+        assert kwargs.get("event_hooks", {}).get("response") == [_ssrf_redirect_guard]
+
 # ---------------------------------------------------------------------------
 # _strip_at_mention
 # ---------------------------------------------------------------------------
@@ -458,3 +500,85 @@ class TestBuildTextBody:
        adapter = self._make_adapter(app_id="a", client_secret="b", markdown_support=False)
        body = adapter._build_text_body("reply text", reply_to="msg_123")
        assert body.get("message_reference", {}).get("message_id") == "msg_123"
+
+
+# ---------------------------------------------------------------------------
+# _wait_for_reconnection / send reconnection wait
+# ---------------------------------------------------------------------------
+
+class TestWaitForReconnection:
+    """Test that send() waits for reconnection instead of silently dropping."""
+
+    def _make_adapter(self, **extra):
+        from gateway.platforms.qqbot import QQAdapter
+        return QQAdapter(_make_config(**extra))
+
+    @pytest.mark.asyncio
+    async def test_send_waits_and_succeeds_on_reconnect(self):
+        """send() should wait for reconnection and then deliver the message."""
+        adapter = self._make_adapter(app_id="a", client_secret="b")
+        # Initially disconnected
+        adapter._running = False
+        adapter._http_client = mock.MagicMock()
+
+        # Simulate reconnection after 0.3s (faster than real interval)
+        async def fake_api_request(*args, **kwargs):
+            return {"id": "msg_123"}
+
+        adapter._api_request = fake_api_request
+        adapter._ensure_token = mock.AsyncMock()
+        adapter._RECONNECT_POLL_INTERVAL = 0.1
+        adapter._RECONNECT_WAIT_SECONDS = 5.0
+
+        # Schedule reconnection after a short delay
+        async def reconnect_after_delay():
+            await asyncio.sleep(0.3)
+            adapter._running = True
+
+        asyncio.get_event_loop().create_task(reconnect_after_delay())
+
+        result = await adapter.send("test_openid", "Hello, world!")
+        assert result.success
+        assert result.message_id == "msg_123"
+
+    @pytest.mark.asyncio
+    async def test_send_returns_retryable_after_timeout(self):
+        """send() should return retryable=True if reconnection takes too long."""
+        adapter = self._make_adapter(app_id="a", client_secret="b")
+        adapter._running = False
+        adapter._RECONNECT_POLL_INTERVAL = 0.05
+        adapter._RECONNECT_WAIT_SECONDS = 0.2
+
+        result = await adapter.send("test_openid", "Hello, world!")
+        assert not result.success
+        assert result.retryable is True
+        assert "Not connected" in result.error
+
+    @pytest.mark.asyncio
+    async def test_send_succeeds_immediately_when_connected(self):
+        """send() should not wait when already connected."""
+        adapter = self._make_adapter(app_id="a", client_secret="b")
+        adapter._running = True
+        adapter._http_client = mock.MagicMock()
+
+        async def fake_api_request(*args, **kwargs):
+            return {"id": "msg_immediate"}
+
+        adapter._api_request = fake_api_request
+
+        result = await adapter.send("test_openid", "Hello!")
+        assert result.success
+        assert result.message_id == "msg_immediate"
+
+    @pytest.mark.asyncio
+    async def test_send_media_waits_for_reconnect(self):
+        """_send_media should also wait for reconnection."""
+        adapter = self._make_adapter(app_id="a", client_secret="b")
+        adapter._running = False
+        adapter._RECONNECT_POLL_INTERVAL = 0.05
+        adapter._RECONNECT_WAIT_SECONDS = 0.2
+
+        result = await adapter._send_media("test_openid", "http://example.com/img.jpg", 1, "image")
+        assert not result.success
+        assert result.retryable is True
+        assert "Not connected" in result.error
@@ -42,15 +42,6 @@ def _stub_rpc(return_value):
 # Platform & Config
 # ---------------------------------------------------------------------------

-class TestSignalPlatformEnum:
-    def test_signal_enum_exists(self):
-        assert Platform.SIGNAL.value == "signal"
-
-    def test_signal_in_platform_list(self):
-        platforms = [p.value for p in Platform]
-        assert "signal" in platforms
-
-
 class TestSignalConfigLoading:
    def test_apply_env_overrides_signal(self, monkeypatch):
        monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:9090")
@@ -76,18 +67,6 @@ class TestSignalConfigLoading:

        assert Platform.SIGNAL not in config.platforms

-    def test_connected_platforms_includes_signal(self, monkeypatch):
-        monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:8080")
-        monkeypatch.setenv("SIGNAL_ACCOUNT", "+15551234567")
-
-        from gateway.config import GatewayConfig, _apply_env_overrides
-        config = GatewayConfig()
-        _apply_env_overrides(config)
-
-        connected = config.get_connected_platforms()
-        assert Platform.SIGNAL in connected
-
-
 # ---------------------------------------------------------------------------
 # Adapter Init & Helpers
 # ---------------------------------------------------------------------------
@@ -362,15 +341,6 @@ class TestSignalAuthorization:
 # Send Message Tool
 # ---------------------------------------------------------------------------

-class TestSignalSendMessage:
-    def test_signal_in_platform_map(self):
-        """Signal should be in the send_message tool's platform map."""
-        from tools.send_message_tool import send_message_tool
-        # Just verify the import works and Signal is a valid platform
-        from gateway.config import Platform
-        assert Platform.SIGNAL.value == "signal"
-
-
 # ---------------------------------------------------------------------------
 # send_image_file method (#5105)
 # ---------------------------------------------------------------------------
@@ -20,9 +20,6 @@ from gateway.config import Platform, PlatformConfig, HomeChannel
 class TestSmsConfigLoading:
    """Verify _apply_env_overrides wires SMS correctly."""

-    def test_sms_platform_enum_exists(self):
-        assert Platform.SMS.value == "sms"
-
    def test_env_overrides_create_sms_config(self):
        from gateway.config import load_gateway_config

@@ -56,19 +53,6 @@ class TestSmsConfigLoading:
            assert hc.name == "My Phone"
            assert hc.platform == Platform.SMS

-    def test_sms_in_connected_platforms(self):
-        from gateway.config import load_gateway_config
-
-        env = {
-            "TWILIO_ACCOUNT_SID": "ACtest123",
-            "TWILIO_AUTH_TOKEN": "token_abc",
-        }
-        with patch.dict(os.environ, env, clear=False):
-            config = load_gateway_config()
-            connected = config.get_connected_platforms()
-            assert Platform.SMS in connected
-
-
 # ── Format / truncate ───────────────────────────────────────────────

 class TestSmsFormatAndTruncate:
@@ -180,44 +164,6 @@ class TestSmsRequirements:

 # ── Toolset verification ───────────────────────────────────────────

-class TestSmsToolset:
-    def test_hermes_sms_toolset_exists(self):
-        from toolsets import get_toolset
-
-        ts = get_toolset("hermes-sms")
-        assert ts is not None
-        assert "tools" in ts
-
-    def test_hermes_sms_in_gateway_includes(self):
-        from toolsets import get_toolset
-
-        gw = get_toolset("hermes-gateway")
-        assert gw is not None
-        assert "hermes-sms" in gw["includes"]
-
-    def test_sms_platform_hint_exists(self):
-        from agent.prompt_builder import PLATFORM_HINTS
-
-        assert "sms" in PLATFORM_HINTS
-        assert "concise" in PLATFORM_HINTS["sms"].lower()
-
-    def test_sms_in_scheduler_platform_map(self):
-        """Verify cron scheduler recognizes 'sms' as a valid platform."""
-        # Just check the Platform enum has SMS — the scheduler imports it dynamically
-        assert Platform.SMS.value == "sms"
-
-    def test_sms_in_send_message_platform_map(self):
-        """Verify send_message_tool recognizes 'sms'."""
-        # The platform_map is built inside _handle_send; verify SMS enum exists
-        assert hasattr(Platform, "SMS")
-
-    def test_sms_in_cronjob_deliver_description(self):
-        """Verify cronjob_tools mentions sms in deliver description."""
-        from tools.cronjob_tools import CRONJOB_SCHEMA
-        deliver_desc = CRONJOB_SCHEMA["parameters"]["properties"]["deliver"]["description"]
-        assert "sms" in deliver_desc.lower()
-
-
 # ── Webhook host configuration ─────────────────────────────────────

 class TestWebhookHostConfig:
@@ -21,6 +21,7 @@ def _clear_auth_env(monkeypatch) -> None:
        "MATTERMOST_ALLOWED_USERS",
        "MATRIX_ALLOWED_USERS",
        "DINGTALK_ALLOWED_USERS", "FEISHU_ALLOWED_USERS", "WECOM_ALLOWED_USERS",
+        "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS",
        "GATEWAY_ALLOWED_USERS",
        "TELEGRAM_ALLOW_ALL_USERS",
        "DISCORD_ALLOW_ALL_USERS",
@@ -32,6 +33,7 @@ def _clear_auth_env(monkeypatch) -> None:
        "MATTERMOST_ALLOW_ALL_USERS",
        "MATRIX_ALLOW_ALL_USERS",
        "DINGTALK_ALLOW_ALL_USERS", "FEISHU_ALLOW_ALL_USERS", "WECOM_ALLOW_ALL_USERS",
+        "QQ_ALLOW_ALL_USERS",
        "GATEWAY_ALLOW_ALL_USERS",
    ):
        monkeypatch.delenv(key, raising=False)
@@ -130,6 +132,46 @@ def test_star_wildcard_works_for_any_platform(monkeypatch):
    assert runner._is_user_authorized(source) is True


+def test_qq_group_allowlist_authorizes_group_chat_without_user_allowlist(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("QQ_GROUP_ALLOWED_USERS", "group-openid-1")
+
+    runner, _adapter = _make_runner(
+        Platform.QQBOT,
+        GatewayConfig(platforms={Platform.QQBOT: PlatformConfig(enabled=True)}),
+    )
+
+    source = SessionSource(
+        platform=Platform.QQBOT,
+        user_id="member-openid-999",
+        chat_id="group-openid-1",
+        user_name="tester",
+        chat_type="group",
+    )
+
+    assert runner._is_user_authorized(source) is True
+
+
+def test_qq_group_allowlist_does_not_authorize_other_groups(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("QQ_GROUP_ALLOWED_USERS", "group-openid-1")
+
+    runner, _adapter = _make_runner(
+        Platform.QQBOT,
+        GatewayConfig(platforms={Platform.QQBOT: PlatformConfig(enabled=True)}),
+    )
+
+    source = SessionSource(
+        platform=Platform.QQBOT,
+        user_id="member-openid-999",
+        chat_id="group-openid-2",
+        user_name="tester",
+        chat_type="group",
+    )
+
+    assert runner._is_user_authorized(source) is False
+
+
@pytest.mark.asyncio
 async def test_unauthorized_dm_pairs_by_default(monkeypatch):
    _clear_auth_env(monkeypatch)
@@ -593,7 +593,3 @@ class TestInboundMessages:
        await adapter._on_message(payload)
        adapter.handle_message.assert_not_awaited()

-
-class TestPlatformEnum:
-    def test_wecom_in_platform_enum(self):
-        assert Platform.WECOM.value == "wecom"
@@ -1,12 +1,15 @@
 """Tests for the Weixin platform adapter."""

 import asyncio
+import base64
 import json
 import os
+from pathlib import Path
 from unittest.mock import AsyncMock, patch

 from gateway.config import PlatformConfig
 from gateway.config import GatewayConfig, HomeChannel, Platform, _apply_env_overrides
+from gateway.platforms.base import SendResult
 from gateway.platforms import weixin
 from gateway.platforms.weixin import ContextTokenStore, WeixinAdapter
 from tools.send_message_tool import _parse_target_ref, _send_to_platform
@@ -23,17 +26,14 @@ def _make_adapter() -> WeixinAdapter:


 class TestWeixinFormatting:
-    def test_format_message_preserves_markdown_and_rewrites_headers(self):
+    def test_format_message_preserves_markdown(self):
        adapter = _make_adapter()

        content = "# Title\n\n## Plan\n\nUse **bold** and [docs](https://example.com)."

-        assert (
-            adapter.format_message(content)
-            == "【Title】\n\n**Plan**\n\nUse **bold** and docs (https://example.com)."
-        )
+        assert adapter.format_message(content) == content

-    def test_format_message_rewrites_markdown_tables(self):
+    def test_format_message_preserves_markdown_tables(self):
        adapter = _make_adapter()

        content = (
@@ -43,19 +43,14 @@ class TestWeixinFormatting:
            "| Retries | 3 |\n"
        )

-        assert adapter.format_message(content) == (
-            "- Setting: Timeout\n"
-            "  Value: 30s\n"
-            "- Setting: Retries\n"
-            "  Value: 3"
-        )
+        assert adapter.format_message(content) == content.strip()

    def test_format_message_preserves_fenced_code_blocks(self):
        adapter = _make_adapter()

        content = "## Snippet\n\n```python\nprint('hi')\n```"

-        assert adapter.format_message(content) == "**Snippet**\n\n```python\nprint('hi')\n```"
+        assert adapter.format_message(content) == content

    def test_format_message_returns_empty_string_for_none(self):
        adapter = _make_adapter()
@@ -101,7 +96,7 @@ class TestWeixinChunking:
        content = adapter.format_message("## 结论\n这是正文")
        chunks = adapter._split_text(content)

-        assert chunks == ["**结论**\n这是正文"]
+        assert chunks == ["## 结论\n这是正文"]

    def test_split_text_keeps_short_reformatted_table_in_single_chunk(self):
        adapter = _make_adapter()
@@ -318,6 +313,7 @@ class TestWeixinChunkDelivery:
    def _connected_adapter(self) -> WeixinAdapter:
        adapter = _make_adapter()
        adapter._session = object()
+        adapter._send_session = adapter._session
        adapter._token = "test-token"
        adapter._base_url = "https://weixin.example.com"
        adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
@@ -363,6 +359,115 @@ class TestWeixinChunkDelivery:
        assert first_try["client_id"] == retry["client_id"]


+class TestWeixinOutboundMedia:
+    def test_send_image_file_accepts_keyword_image_path(self):
+        adapter = _make_adapter()
+        expected = SendResult(success=True, message_id="msg-1")
+        adapter.send_document = AsyncMock(return_value=expected)
+
+        result = asyncio.run(
+            adapter.send_image_file(
+                chat_id="wxid_test123",
+                image_path="/tmp/demo.png",
+                caption="截图说明",
+                reply_to="reply-1",
+                metadata={"thread_id": "t-1"},
+            )
+        )
+
+        assert result == expected
+        adapter.send_document.assert_awaited_once_with(
+            chat_id="wxid_test123",
+            file_path="/tmp/demo.png",
+            caption="截图说明",
+            metadata={"thread_id": "t-1"},
+        )
+
+    def test_send_document_accepts_keyword_file_path(self):
+        adapter = _make_adapter()
+        adapter._session = object()
+        adapter._send_session = adapter._session
+        adapter._token = "test-token"
+        adapter._send_file = AsyncMock(return_value="msg-2")
+
+        result = asyncio.run(
+            adapter.send_document(
+                chat_id="wxid_test123",
+                file_path="/tmp/report.pdf",
+                caption="报告请看",
+                file_name="renamed.pdf",
+                reply_to="reply-1",
+                metadata={"thread_id": "t-1"},
+            )
+        )
+
+        assert result.success is True
+        assert result.message_id == "msg-2"
+        adapter._send_file.assert_awaited_once_with("wxid_test123", "/tmp/report.pdf", "报告请看")
+
+    def test_send_file_uses_post_for_upload_full_url_and_hex_encoded_aes_key(self, tmp_path):
+        class _UploadResponse:
+            def __init__(self):
+                self.status = 200
+                self.headers = {"x-encrypted-param": "enc-param"}
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+
+            async def read(self):
+                return b""
+
+            async def text(self):
+                return ""
+
+        class _RecordingSession:
+            def __init__(self):
+                self.post_calls = []
+
+            def post(self, url, **kwargs):
+                self.post_calls.append((url, kwargs))
+                return _UploadResponse()
+
+            def put(self, *_args, **_kwargs):
+                raise AssertionError("upload_full_url branch should use POST")
+
+        image_path = tmp_path / "demo.png"
+        image_path.write_bytes(b"fake-png-bytes")
+
+        adapter = _make_adapter()
+        session = _RecordingSession()
+        adapter._session = session
+        adapter._send_session = session
+        adapter._token = "test-token"
+        adapter._base_url = "https://weixin.example.com"
+        adapter._cdn_base_url = "https://cdn.example.com/c2c"
+        adapter._token_store.get = lambda account_id, chat_id: None
+
+        aes_key = bytes(range(16))
+        expected_aes_key = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
+
+        with patch("gateway.platforms.weixin._get_upload_url", new=AsyncMock(return_value={"upload_full_url": "https://upload.example.com/media"})), \
+             patch("gateway.platforms.weixin._api_post", new_callable=AsyncMock) as api_post_mock, \
+             patch("gateway.platforms.weixin.secrets.token_hex", return_value="filekey-123"), \
+             patch("gateway.platforms.weixin.secrets.token_bytes", return_value=aes_key):
+            message_id = asyncio.run(adapter._send_file("wxid_test123", str(image_path), ""))
+
+        assert message_id.startswith("hermes-weixin-")
+        assert len(session.post_calls) == 1
+        upload_url, upload_kwargs = session.post_calls[0]
+        assert upload_url == "https://upload.example.com/media"
+        assert upload_kwargs["headers"] == {"Content-Type": "application/octet-stream"}
+        assert upload_kwargs["data"]
+        assert upload_kwargs["timeout"].total == 120
+        payload = api_post_mock.await_args.kwargs["payload"]
+        media = payload["msg"]["item_list"][0]["image_item"]["media"]
+        assert media["encrypt_query_param"] == "enc-param"
+        assert media["aes_key"] == expected_aes_key
+
+
 class TestWeixinRemoteMediaSafety:
    def test_download_remote_media_blocks_unsafe_urls(self):
        adapter = _make_adapter()
@@ -377,16 +482,13 @@ class TestWeixinRemoteMediaSafety:


 class TestWeixinMarkdownLinks:
-    """Markdown links should be converted to plaintext since WeChat can't render them."""
+    """Markdown links should be preserved so WeChat can render them natively."""

-    def test_format_message_converts_markdown_links_to_plain_text(self):
+    def test_format_message_preserves_markdown_links(self):
        adapter = _make_adapter()

        content = "Check [the docs](https://example.com) and [GitHub](https://github.com) for details"
-        assert (
-            adapter.format_message(content)
-            == "Check the docs (https://example.com) and GitHub (https://github.com) for details"
-        )
+        assert adapter.format_message(content) == content

    def test_format_message_preserves_links_inside_code_blocks(self):
        adapter = _make_adapter()
@@ -430,6 +532,7 @@ class TestWeixinBlankMessagePrevention:
    def test_send_empty_content_does_not_call_send_message(self, send_message_mock):
        adapter = _make_adapter()
        adapter._session = object()
+        adapter._send_session = adapter._session
        adapter._token = "test-token"
        adapter._base_url = "https://weixin.example.com"
        adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
@@ -500,10 +603,10 @@ class TestWeixinMediaBuilder:
        )
        assert item["video_item"]["video_md5"] == "deadbeef"

-    def test_voice_builder_for_audio_files(self):
+    def test_voice_builder_for_audio_files_uses_file_attachment_type(self):
        adapter = _make_adapter()
        media_type, builder = adapter._outbound_media_builder("note.mp3")
-        assert media_type == weixin.MEDIA_VOICE
+        assert media_type == weixin.MEDIA_FILE

        item = builder(
            encrypt_query_param="eq",
@@ -513,10 +616,145 @@ class TestWeixinMediaBuilder:
            filename="note.mp3",
            rawfilemd5="abc",
        )
-        assert item["type"] == weixin.ITEM_VOICE
-        assert "voice_item" in item
+        assert item["type"] == weixin.ITEM_FILE
+        assert item["file_item"]["file_name"] == "note.mp3"

    def test_voice_builder_for_silk_files(self):
        adapter = _make_adapter()
        media_type, builder = adapter._outbound_media_builder("recording.silk")
        assert media_type == weixin.MEDIA_VOICE
+
+
+class TestWeixinSendImageFileParameterName:
+    """Regression test for send_image_file parameter name mismatch.
+
+    The gateway calls send_image_file(chat_id=..., image_path=...) but the
+    WeixinAdapter previously used 'path' as the parameter name, causing
+    image sending to fail. This test ensures the interface stays correct.
+    """
+
+    @patch.object(WeixinAdapter, "send_document", new_callable=AsyncMock)
+    def test_send_image_file_uses_image_path_parameter(self, send_document_mock):
+        """Verify send_image_file accepts image_path and forwards to send_document."""
+        adapter = _make_adapter()
+        adapter._session = object()
+        adapter._send_session = adapter._session
+        adapter._token = "test-token"
+
+        send_document_mock.return_value = weixin.SendResult(success=True, message_id="test-id")
+
+        # This is the call pattern used by gateway/run.py extract_media
+        result = asyncio.run(
+            adapter.send_image_file(
+                chat_id="wxid_test123",
+                image_path="/tmp/test_image.png",
+                caption="Test caption",
+                metadata={"thread_id": "thread-123"},
+            )
+        )
+
+        assert result.success is True
+        send_document_mock.assert_awaited_once_with(
+            chat_id="wxid_test123",
+            file_path="/tmp/test_image.png",
+            caption="Test caption",
+            metadata={"thread_id": "thread-123"},
+        )
+
+    @patch.object(WeixinAdapter, "send_document", new_callable=AsyncMock)
+    def test_send_image_file_works_without_optional_params(self, send_document_mock):
+        """Verify send_image_file works with minimal required params."""
+        adapter = _make_adapter()
+        adapter._session = object()
+        adapter._send_session = adapter._session
+        adapter._token = "test-token"
+
+        send_document_mock.return_value = weixin.SendResult(success=True, message_id="test-id")
+
+        result = asyncio.run(
+            adapter.send_image_file(
+                chat_id="wxid_test123",
+                image_path="/tmp/test_image.jpg",
+            )
+        )
+
+        assert result.success is True
+        send_document_mock.assert_awaited_once_with(
+            chat_id="wxid_test123",
+            file_path="/tmp/test_image.jpg",
+            caption=None,
+            metadata=None,
+        )
+
+
+class TestWeixinVoiceSending:
+    def _connected_adapter(self) -> WeixinAdapter:
+        adapter = _make_adapter()
+        adapter._session = object()
+        adapter._send_session = adapter._session
+        adapter._token = "test-token"
+        adapter._base_url = "https://weixin.example.com"
+        adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
+        return adapter
+
+    @patch.object(WeixinAdapter, "_send_file", new_callable=AsyncMock)
+    def test_send_voice_downgrades_to_document_attachment(self, send_file_mock, tmp_path):
+        adapter = self._connected_adapter()
+        source = tmp_path / "voice.ogg"
+        source.write_bytes(b"ogg")
+        send_file_mock.return_value = "msg-1"
+
+        result = asyncio.run(adapter.send_voice("wxid_test123", str(source)))
+
+        assert result.success is True
+        send_file_mock.assert_awaited_once_with(
+            "wxid_test123",
+            str(source),
+            "[voice message as attachment]",
+            force_file_attachment=True,
+        )
+
+    def test_voice_builder_for_silk_files_can_be_forced_to_file_attachment(self):
+        adapter = _make_adapter()
+        media_type, builder = adapter._outbound_media_builder(
+            "recording.silk",
+            force_file_attachment=True,
+        )
+        assert media_type == weixin.MEDIA_FILE
+
+        item = builder(
+            encrypt_query_param="eq",
+            aes_key_for_api="fakekey",
+            ciphertext_size=512,
+            plaintext_size=500,
+            filename="recording.silk",
+            rawfilemd5="abc",
+        )
+        assert item["type"] == weixin.ITEM_FILE
+        assert item["file_item"]["file_name"] == "recording.silk"
+
+    @patch.object(weixin, "_api_post", new_callable=AsyncMock)
+    @patch.object(weixin, "_upload_ciphertext", new_callable=AsyncMock)
+    @patch.object(weixin, "_get_upload_url", new_callable=AsyncMock)
+    def test_send_file_sets_voice_metadata_for_silk_payload(
+        self,
+        get_upload_url_mock,
+        upload_ciphertext_mock,
+        api_post_mock,
+        tmp_path,
+    ):
+        adapter = self._connected_adapter()
+        silk = tmp_path / "voice.silk"
+        silk.write_bytes(b"\x02#!SILK_V3\x01\x00")
+        get_upload_url_mock.return_value = {"upload_full_url": "https://cdn.example.com/upload"}
+        upload_ciphertext_mock.return_value = "enc-q"
+        api_post_mock.return_value = {"success": True}
+
+        asyncio.run(adapter._send_file("wxid_test123", str(silk), ""))
+
+        payload = api_post_mock.await_args.kwargs["payload"]
+        voice_item = payload["msg"]["item_list"][0]["voice_item"]
+        assert voice_item.get("playtime", 0) == 0
+        assert voice_item["encode_type"] == 6
+        assert voice_item["sample_rate"] == 24000
+        assert voice_item["bits_per_sample"] == 16
@@ -1,17 +1,9 @@
 """Tests for API-key provider support (z.ai/GLM, Kimi, MiniMax, AI Gateway)."""

 import os
-import sys
-import types

 import pytest

-# Ensure dotenv doesn't interfere
-if "dotenv" not in sys.modules:
-    fake_dotenv = types.ModuleType("dotenv")
-    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
-    sys.modules["dotenv"] = fake_dotenv
-
 from hermes_cli.auth import (
    PROVIDER_REGISTRY,
    ProviderConfig,
@@ -1,15 +1,9 @@
 """Tests for Arcee AI provider support — standard direct API provider."""

-import sys
 import types

 import pytest

-if "dotenv" not in sys.modules:
-    fake_dotenv = types.ModuleType("dotenv")
-    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
-    sys.modules["dotenv"] = fake_dotenv
-
 from hermes_cli.auth import (
    PROVIDER_REGISTRY,
    resolve_provider,
@@ -57,85 +57,6 @@ def _build_parser():
    return parser


-class TestFlagBeforeSubcommand:
-    """Flags placed before 'chat' must propagate through."""
-
-    def test_yolo_before_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["--yolo", "chat"])
-        assert getattr(args, "yolo", False) is True
-
-    def test_worktree_before_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["-w", "chat"])
-        assert getattr(args, "worktree", False) is True
-
-    def test_skills_before_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["-s", "myskill", "chat"])
-        assert getattr(args, "skills", None) == ["myskill"]
-
-    def test_pass_session_id_before_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["--pass-session-id", "chat"])
-        assert getattr(args, "pass_session_id", False) is True
-
-    def test_resume_before_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["-r", "abc123", "chat"])
-        assert getattr(args, "resume", None) == "abc123"
-
-
-class TestFlagAfterSubcommand:
-    """Flags placed after 'chat' must still work."""
-
-    def test_yolo_after_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["chat", "--yolo"])
-        assert getattr(args, "yolo", False) is True
-
-    def test_worktree_after_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["chat", "-w"])
-        assert getattr(args, "worktree", False) is True
-
-    def test_skills_after_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["chat", "-s", "myskill"])
-        assert getattr(args, "skills", None) == ["myskill"]
-
-    def test_resume_after_chat(self):
-        parser = _build_parser()
-        args = parser.parse_args(["chat", "-r", "abc123"])
-        assert getattr(args, "resume", None) == "abc123"
-
-
-class TestNoSubcommandDefaults:
-    """When no subcommand is given, flags must work and defaults must hold."""
-
-    def test_yolo_no_subcommand(self):
-        parser = _build_parser()
-        args = parser.parse_args(["--yolo"])
-        assert args.yolo is True
-        assert args.command is None
-
-    def test_defaults_no_flags(self):
-        parser = _build_parser()
-        args = parser.parse_args([])
-        assert getattr(args, "yolo", False) is False
-        assert getattr(args, "worktree", False) is False
-        assert getattr(args, "skills", None) is None
-        assert getattr(args, "resume", None) is None
-
-    def test_defaults_chat_no_flags(self):
-        parser = _build_parser()
-        args = parser.parse_args(["chat"])
-        # With SUPPRESS, these fall through to parent defaults
-        assert getattr(args, "yolo", False) is False
-        assert getattr(args, "worktree", False) is False
-        assert getattr(args, "skills", None) is None
-
-
 class TestYoloEnvVar:
    """Verify --yolo sets HERMES_YOLO_MODE regardless of flag position.

@@ -703,3 +703,231 @@ def test_auth_remove_claude_code_suppresses_reseed(tmp_path, monkeypatch):
    suppressed = updated.get("suppressed_sources", {})
    assert "anthropic" in suppressed
    assert "claude_code" in suppressed["anthropic"]
+
+
+def test_unsuppress_credential_source_clears_marker(tmp_path, monkeypatch):
+    """unsuppress_credential_source() removes a previously-set marker."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1})
+
+    from hermes_cli.auth import suppress_credential_source, unsuppress_credential_source, is_source_suppressed
+
+    suppress_credential_source("openai-codex", "device_code")
+    assert is_source_suppressed("openai-codex", "device_code") is True
+
+    cleared = unsuppress_credential_source("openai-codex", "device_code")
+    assert cleared is True
+    assert is_source_suppressed("openai-codex", "device_code") is False
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    # Empty suppressed_sources dict should be cleaned up entirely
+    assert "suppressed_sources" not in payload
+
+
+def test_unsuppress_credential_source_returns_false_when_absent(tmp_path, monkeypatch):
+    """unsuppress_credential_source() returns False if no marker exists."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1})
+
+    from hermes_cli.auth import unsuppress_credential_source
+
+    assert unsuppress_credential_source("openai-codex", "device_code") is False
+    assert unsuppress_credential_source("nonexistent", "whatever") is False
+
+
+def test_unsuppress_credential_source_preserves_other_markers(tmp_path, monkeypatch):
+    """Clearing one marker must not affect unrelated markers."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1})
+
+    from hermes_cli.auth import (
+        suppress_credential_source,
+        unsuppress_credential_source,
+        is_source_suppressed,
+    )
+
+    suppress_credential_source("openai-codex", "device_code")
+    suppress_credential_source("anthropic", "claude_code")
+
+    assert unsuppress_credential_source("openai-codex", "device_code") is True
+    assert is_source_suppressed("anthropic", "claude_code") is True
+
+
+def test_auth_remove_codex_device_code_suppresses_reseed(tmp_path, monkeypatch):
+    """Removing an auto-seeded openai-codex credential must mark the source as suppressed."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_singletons",
+        lambda provider, entries: (False, {"device_code"}),
+    )
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+
+    auth_store = {
+        "version": 1,
+        "providers": {
+            "openai-codex": {
+                "tokens": {
+                    "access_token": "acc-1",
+                    "refresh_token": "ref-1",
+                },
+            },
+        },
+        "credential_pool": {
+            "openai-codex": [{
+                "id": "cx1",
+                "label": "codex-auto",
+                "auth_type": "oauth",
+                "priority": 0,
+                "source": "device_code",
+                "access_token": "acc-1",
+                "refresh_token": "ref-1",
+            }]
+        },
+    }
+    (hermes_home / "auth.json").write_text(json.dumps(auth_store))
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
+
+    updated = json.loads((hermes_home / "auth.json").read_text())
+    suppressed = updated.get("suppressed_sources", {})
+    assert "openai-codex" in suppressed
+    assert "device_code" in suppressed["openai-codex"]
+    # Tokens in providers state should also be cleared
+    assert "openai-codex" not in updated.get("providers", {})
+
+
+def test_auth_remove_codex_manual_source_suppresses_reseed(tmp_path, monkeypatch):
+    """Removing a manually-added (`manual:device_code`) openai-codex credential must also suppress."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_singletons",
+        lambda provider, entries: (False, set()),
+    )
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+
+    auth_store = {
+        "version": 1,
+        "providers": {
+            "openai-codex": {
+                "tokens": {
+                    "access_token": "acc-2",
+                    "refresh_token": "ref-2",
+                },
+            },
+        },
+        "credential_pool": {
+            "openai-codex": [{
+                "id": "cx2",
+                "label": "manual-codex",
+                "auth_type": "oauth",
+                "priority": 0,
+                "source": "manual:device_code",
+                "access_token": "acc-2",
+                "refresh_token": "ref-2",
+            }]
+        },
+    }
+    (hermes_home / "auth.json").write_text(json.dumps(auth_store))
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
+
+    updated = json.loads((hermes_home / "auth.json").read_text())
+    suppressed = updated.get("suppressed_sources", {})
+    # Critical: manual:device_code source must also trigger the suppression path
+    assert "openai-codex" in suppressed
+    assert "device_code" in suppressed["openai-codex"]
+    assert "openai-codex" not in updated.get("providers", {})
+
+
+def test_auth_add_codex_clears_suppression_marker(tmp_path, monkeypatch):
+    """Re-linking codex via `hermes auth add openai-codex` must clear any suppression marker."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_singletons",
+        lambda provider, entries: (False, set()),
+    )
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+
+    # Pre-existing suppression (simulating a prior `hermes auth remove`)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"openai-codex": ["device_code"]},
+    }))
+
+    token = _jwt_with_email("codex@example.com")
+    monkeypatch.setattr(
+        "hermes_cli.auth._codex_device_code_login",
+        lambda: {
+            "tokens": {
+                "access_token": token,
+                "refresh_token": "refreshed",
+            },
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "last_refresh": "2026-01-01T00:00:00Z",
+        },
+    )
+
+    from hermes_cli.auth_commands import auth_add_command
+
+    class _Args:
+        provider = "openai-codex"
+        auth_type = "oauth"
+        api_key = None
+        label = None
+
+    auth_add_command(_Args())
+
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    # Suppression marker must be cleared
+    assert "openai-codex" not in payload.get("suppressed_sources", {})
+    # New pool entry must be present
+    entries = payload["credential_pool"]["openai-codex"]
+    assert any(e["source"] == "manual:device_code" for e in entries)
+
+
+def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
+    """_seed_from_singletons() for openai-codex must skip auto-import when suppressed."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+
+    # Suppression marker in place
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"openai-codex": ["device_code"]},
+    }))
+
+    # Make _import_codex_cli_tokens return tokens — these would normally trigger
+    # a re-seed, but suppression must skip it.
+    def _fake_import():
+        return {
+            "access_token": "would-be-reimported",
+            "refresh_token": "would-be-reimported",
+        }
+
+    monkeypatch.setattr("hermes_cli.auth._import_codex_cli_tokens", _fake_import)
+
+    from agent.credential_pool import _seed_from_singletons
+
+    entries = []
+    changed, active_sources = _seed_from_singletons("openai-codex", entries)
+
+    # With suppression in place: nothing changes, no entries added, no sources
+    assert changed is False
+    assert entries == []
+    assert active_sources == set()
+
+    # Verify the auth store was NOT modified (no auto-import happened)
+    after = json.loads((hermes_home / "auth.json").read_text())
+    assert "openai-codex" not in after.get("providers", {})
@@ -299,3 +299,160 @@ def test_mint_retry_uses_latest_rotated_refresh_token(tmp_path, monkeypatch):
    assert creds["api_key"] == "agent-key"
    assert refresh_calls == ["refresh-old", "refresh-1"]

+
+# =============================================================================
+# _login_nous: "Skip (keep current)" must preserve prior provider + model
+# =============================================================================
+
+
+class TestLoginNousSkipKeepsCurrent:
+    """When a user runs `hermes model` → Nous Portal → Skip (keep current) after
+    a successful OAuth login, the prior provider and model MUST be preserved.
+
+    Regression: previously, _update_config_for_provider was called
+    unconditionally after login, which flipped model.provider to "nous" while
+    keeping the old model.default (e.g. anthropic/claude-opus-4.6 from
+    OpenRouter), leaving the user with a mismatched provider/model pair.
+    """
+
+    def _setup_home_with_openrouter(self, tmp_path, monkeypatch):
+        import yaml
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(yaml.safe_dump({
+            "model": {
+                "provider": "openrouter",
+                "default": "anthropic/claude-opus-4.6",
+            },
+        }, sort_keys=False))
+
+        auth_path = hermes_home / "auth.json"
+        auth_path.write_text(json.dumps({
+            "version": 1,
+            "active_provider": "openrouter",
+            "providers": {"openrouter": {"api_key": "sk-or-fake"}},
+        }))
+        return hermes_home, config_path, auth_path
+
+    def _patch_login_internals(self, monkeypatch, *, prompt_returns):
+        """Patch OAuth + model-list + prompt so _login_nous doesn't hit network."""
+        import hermes_cli.auth as auth_mod
+        import hermes_cli.models as models_mod
+        import hermes_cli.nous_subscription as ns
+
+        fake_auth_state = {
+            "access_token": "fake-nous-token",
+            "agent_key": "fake-agent-key",
+            "inference_base_url": "https://inference-api.nousresearch.com",
+            "portal_base_url": "https://portal.nousresearch.com",
+            "refresh_token": "fake-refresh",
+            "token_expires_at": 9999999999,
+        }
+        monkeypatch.setattr(
+            auth_mod, "_nous_device_code_login",
+            lambda **kwargs: dict(fake_auth_state),
+        )
+        monkeypatch.setattr(
+            auth_mod, "_prompt_model_selection",
+            lambda *a, **kw: prompt_returns,
+        )
+        monkeypatch.setattr(models_mod, "get_pricing_for_provider", lambda p: {})
+        monkeypatch.setattr(models_mod, "filter_nous_free_models", lambda ids, p: ids)
+        monkeypatch.setattr(models_mod, "check_nous_free_tier", lambda: None)
+        monkeypatch.setattr(
+            models_mod, "partition_nous_models_by_tier",
+            lambda ids, p, free_tier=False: (ids, []),
+        )
+        monkeypatch.setattr(ns, "prompt_enable_tool_gateway", lambda cfg: None)
+
+    def test_skip_keep_current_preserves_provider_and_model(self, tmp_path, monkeypatch):
+        """User picks Skip → config.yaml untouched, Nous creds still saved."""
+        import argparse
+        import yaml
+        from hermes_cli.auth import PROVIDER_REGISTRY, _login_nous
+
+        hermes_home, config_path, auth_path = self._setup_home_with_openrouter(
+            tmp_path, monkeypatch,
+        )
+        self._patch_login_internals(monkeypatch, prompt_returns=None)
+
+        args = argparse.Namespace(
+            portal_url=None, inference_url=None, client_id=None, scope=None,
+            no_browser=True, timeout=15.0, ca_bundle=None, insecure=False,
+        )
+        _login_nous(args, PROVIDER_REGISTRY["nous"])
+
+        # config.yaml model section must be unchanged
+        cfg_after = yaml.safe_load(config_path.read_text())
+        assert cfg_after["model"]["provider"] == "openrouter"
+        assert cfg_after["model"]["default"] == "anthropic/claude-opus-4.6"
+        assert "base_url" not in cfg_after["model"]
+
+        # auth.json: active_provider restored to openrouter, but Nous creds saved
+        auth_after = json.loads(auth_path.read_text())
+        assert auth_after["active_provider"] == "openrouter"
+        assert "nous" in auth_after["providers"]
+        assert auth_after["providers"]["nous"]["access_token"] == "fake-nous-token"
+        # Existing openrouter creds still intact
+        assert auth_after["providers"]["openrouter"]["api_key"] == "sk-or-fake"
+
+    def test_picking_model_switches_to_nous(self, tmp_path, monkeypatch):
+        """User picks a Nous model → provider flips to nous with that model."""
+        import argparse
+        import yaml
+        from hermes_cli.auth import PROVIDER_REGISTRY, _login_nous
+
+        hermes_home, config_path, auth_path = self._setup_home_with_openrouter(
+            tmp_path, monkeypatch,
+        )
+        self._patch_login_internals(
+            monkeypatch, prompt_returns="xiaomi/mimo-v2-pro",
+        )
+
+        args = argparse.Namespace(
+            portal_url=None, inference_url=None, client_id=None, scope=None,
+            no_browser=True, timeout=15.0, ca_bundle=None, insecure=False,
+        )
+        _login_nous(args, PROVIDER_REGISTRY["nous"])
+
+        cfg_after = yaml.safe_load(config_path.read_text())
+        assert cfg_after["model"]["provider"] == "nous"
+        assert cfg_after["model"]["default"] == "xiaomi/mimo-v2-pro"
+
+        auth_after = json.loads(auth_path.read_text())
+        assert auth_after["active_provider"] == "nous"
+
+    def test_skip_with_no_prior_active_provider_clears_it(self, tmp_path, monkeypatch):
+        """Fresh install (no prior active_provider) → Skip clears active_provider
+        instead of leaving it as nous."""
+        import argparse
+        import yaml
+        from hermes_cli.auth import PROVIDER_REGISTRY, _login_nous
+
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(yaml.safe_dump({"model": {}}, sort_keys=False))
+
+        # No auth.json yet — simulates first-run before any OAuth
+        self._patch_login_internals(monkeypatch, prompt_returns=None)
+
+        args = argparse.Namespace(
+            portal_url=None, inference_url=None, client_id=None, scope=None,
+            no_browser=True, timeout=15.0, ca_bundle=None, insecure=False,
+        )
+        _login_nous(args, PROVIDER_REGISTRY["nous"])
+
+        auth_path = hermes_home / "auth.json"
+        auth_after = json.loads(auth_path.read_text())
+        # active_provider should NOT be set to "nous" after Skip
+        assert auth_after.get("active_provider") in (None, "")
+        # But Nous creds are still saved
+        assert "nous" in auth_after.get("providers", {})
+
+
@@ -449,20 +449,6 @@ class TestRunDebug:
 # Argparse integration
 # ---------------------------------------------------------------------------

-class TestArgparseIntegration:
-    def test_module_imports_clean(self):
-        from hermes_cli.debug import run_debug, run_debug_share
-        assert callable(run_debug)
-        assert callable(run_debug_share)
-
-    def test_cmd_debug_dispatches(self):
-        from hermes_cli.main import cmd_debug
-
-        args = MagicMock()
-        args.debug_command = None
-        cmd_debug(args)
-
-
 # ---------------------------------------------------------------------------
 # Delete / auto-delete
 # ---------------------------------------------------------------------------
@@ -0,0 +1,217 @@
+"""Unit tests for hermes_cli/dingtalk_auth.py (QR device-flow registration)."""
+from __future__ import annotations
+
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# API layer — _api_post + error mapping
+# ---------------------------------------------------------------------------
+
+
+class TestApiPost:
+
+    def test_raises_on_network_error(self):
+        import requests
+        from hermes_cli.dingtalk_auth import _api_post, RegistrationError
+
+        with patch("hermes_cli.dingtalk_auth.requests.post",
+                   side_effect=requests.ConnectionError("nope")):
+            with pytest.raises(RegistrationError, match="Network error"):
+                _api_post("/app/registration/init", {"source": "hermes"})
+
+    def test_raises_on_nonzero_errcode(self):
+        from hermes_cli.dingtalk_auth import _api_post, RegistrationError
+
+        mock_resp = MagicMock()
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {"errcode": 42, "errmsg": "boom"}
+
+        with patch("hermes_cli.dingtalk_auth.requests.post", return_value=mock_resp):
+            with pytest.raises(RegistrationError, match=r"boom \(errcode=42\)"):
+                _api_post("/app/registration/init", {"source": "hermes"})
+
+    def test_returns_data_on_success(self):
+        from hermes_cli.dingtalk_auth import _api_post
+
+        mock_resp = MagicMock()
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {"errcode": 0, "nonce": "abc"}
+
+        with patch("hermes_cli.dingtalk_auth.requests.post", return_value=mock_resp):
+            result = _api_post("/app/registration/init", {"source": "hermes"})
+            assert result["nonce"] == "abc"
+
+
+# ---------------------------------------------------------------------------
+# begin_registration — 2-step nonce → device_code chain
+# ---------------------------------------------------------------------------
+
+
+class TestBeginRegistration:
+
+    def test_chains_init_then_begin(self):
+        from hermes_cli.dingtalk_auth import begin_registration
+
+        responses = [
+            {"errcode": 0, "nonce": "nonce123"},
+            {
+                "errcode": 0,
+                "device_code": "dev-xyz",
+                "verification_uri_complete": "https://open-dev.dingtalk.com/openapp/registration/openClaw?user_code=ABCD",
+                "expires_in": 7200,
+                "interval": 2,
+            },
+        ]
+        with patch("hermes_cli.dingtalk_auth._api_post", side_effect=responses):
+            result = begin_registration()
+
+        assert result["device_code"] == "dev-xyz"
+        assert "verification_uri_complete" in result
+        assert result["interval"] == 2
+        assert result["expires_in"] == 7200
+
+    def test_missing_nonce_raises(self):
+        from hermes_cli.dingtalk_auth import begin_registration, RegistrationError
+
+        with patch("hermes_cli.dingtalk_auth._api_post",
+                   return_value={"errcode": 0, "nonce": ""}):
+            with pytest.raises(RegistrationError, match="missing nonce"):
+                begin_registration()
+
+    def test_missing_device_code_raises(self):
+        from hermes_cli.dingtalk_auth import begin_registration, RegistrationError
+
+        responses = [
+            {"errcode": 0, "nonce": "n1"},
+            {"errcode": 0, "verification_uri_complete": "http://x"},  # no device_code
+        ]
+        with patch("hermes_cli.dingtalk_auth._api_post", side_effect=responses):
+            with pytest.raises(RegistrationError, match="missing device_code"):
+                begin_registration()
+
+    def test_missing_verification_uri_raises(self):
+        from hermes_cli.dingtalk_auth import begin_registration, RegistrationError
+
+        responses = [
+            {"errcode": 0, "nonce": "n1"},
+            {"errcode": 0, "device_code": "dev"},  # no verification_uri_complete
+        ]
+        with patch("hermes_cli.dingtalk_auth._api_post", side_effect=responses):
+            with pytest.raises(RegistrationError,
+                               match="missing verification_uri_complete"):
+                begin_registration()
+
+
+# ---------------------------------------------------------------------------
+# wait_for_registration_success — polling loop
+# ---------------------------------------------------------------------------
+
+
+class TestWaitForSuccess:
+
+    def test_returns_credentials_on_success(self):
+        from hermes_cli.dingtalk_auth import wait_for_registration_success
+
+        responses = [
+            {"status": "WAITING"},
+            {"status": "WAITING"},
+            {"status": "SUCCESS", "client_id": "cid-1", "client_secret": "sec-1"},
+        ]
+        with patch("hermes_cli.dingtalk_auth.poll_registration", side_effect=responses), \
+             patch("hermes_cli.dingtalk_auth.time.sleep"):
+            cid, secret = wait_for_registration_success(
+                device_code="dev", interval=0, expires_in=60
+            )
+            assert cid == "cid-1"
+            assert secret == "sec-1"
+
+    def test_success_without_credentials_raises(self):
+        from hermes_cli.dingtalk_auth import wait_for_registration_success, RegistrationError
+
+        with patch("hermes_cli.dingtalk_auth.poll_registration",
+                   return_value={"status": "SUCCESS", "client_id": "", "client_secret": ""}), \
+             patch("hermes_cli.dingtalk_auth.time.sleep"):
+            with pytest.raises(RegistrationError, match="credentials are missing"):
+                wait_for_registration_success(
+                    device_code="dev", interval=0, expires_in=60
+                )
+
+    def test_invokes_waiting_callback(self):
+        from hermes_cli.dingtalk_auth import wait_for_registration_success
+
+        callback = MagicMock()
+        responses = [
+            {"status": "WAITING"},
+            {"status": "WAITING"},
+            {"status": "SUCCESS", "client_id": "cid", "client_secret": "sec"},
+        ]
+        with patch("hermes_cli.dingtalk_auth.poll_registration", side_effect=responses), \
+             patch("hermes_cli.dingtalk_auth.time.sleep"):
+            wait_for_registration_success(
+                device_code="dev", interval=0, expires_in=60, on_waiting=callback
+            )
+        assert callback.call_count == 2
+
+
+# ---------------------------------------------------------------------------
+# QR rendering — terminal output
+# ---------------------------------------------------------------------------
+
+
+class TestRenderQR:
+
+    def test_returns_false_when_qrcode_missing(self, monkeypatch):
+        from hermes_cli import dingtalk_auth
+
+        # Simulate qrcode import failure
+        monkeypatch.setitem(sys.modules, "qrcode", None)
+        assert dingtalk_auth.render_qr_to_terminal("https://example.com") is False
+
+    def test_prints_when_qrcode_available(self, capsys):
+        """End-to-end: render a real QR and verify SOMETHING got printed."""
+        try:
+            import qrcode  # noqa: F401
+        except ImportError:
+            pytest.skip("qrcode library not available")
+
+        from hermes_cli.dingtalk_auth import render_qr_to_terminal
+        result = render_qr_to_terminal("https://example.com/test")
+        captured = capsys.readouterr()
+        assert result is True
+        assert len(captured.out) > 100  # rendered matrix is non-trivial
+
+
+# ---------------------------------------------------------------------------
+# Configuration — env var overrides
+# ---------------------------------------------------------------------------
+
+
+class TestConfigOverrides:
+
+    def test_base_url_default(self, monkeypatch):
+        monkeypatch.delenv("DINGTALK_REGISTRATION_BASE_URL", raising=False)
+        # Force module reload to pick up current env
+        import importlib
+        import hermes_cli.dingtalk_auth as mod
+        importlib.reload(mod)
+        assert mod.REGISTRATION_BASE_URL == "https://oapi.dingtalk.com"
+
+    def test_base_url_override_via_env(self, monkeypatch):
+        monkeypatch.setenv("DINGTALK_REGISTRATION_BASE_URL",
+                           "https://test.example.com/")
+        import importlib
+        import hermes_cli.dingtalk_auth as mod
+        importlib.reload(mod)
+        # Trailing slash stripped
+        assert mod.REGISTRATION_BASE_URL == "https://test.example.com"
+
+    def test_source_default(self, monkeypatch):
+        monkeypatch.delenv("DINGTALK_REGISTRATION_SOURCE", raising=False)
+        import importlib
+        import hermes_cli.dingtalk_auth as mod
+        importlib.reload(mod)
+        assert mod.REGISTRATION_SOURCE == "openClaw"
@@ -539,3 +539,64 @@ class TestDispatcher:
        mcp_command(_make_args(mcp_action=None))
        out = capsys.readouterr().out
        assert "Commands:" in out or "No MCP servers" in out
+
+
+# ---------------------------------------------------------------------------
+# Tests: Task 7 consolidation — cmd_mcp_remove evicts manager cache,
+# cmd_mcp_login forces re-auth
+# ---------------------------------------------------------------------------
+
+
+class TestMcpRemoveEvictsManager:
+    def test_remove_evicts_in_memory_provider(self, tmp_path, capsys, monkeypatch):
+        """After cmd_mcp_remove, the MCPOAuthManager no longer caches the provider."""
+        _seed_config(tmp_path, {
+            "oauth-srv": {"url": "https://example.com/mcp", "auth": "oauth"},
+        })
+        monkeypatch.setattr("builtins.input", lambda _: "y")
+        monkeypatch.setattr(
+            "hermes_cli.mcp_config.get_hermes_home", lambda: tmp_path
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
+        reset_manager_for_tests()
+
+        mgr = get_manager()
+        mgr.get_or_build_provider(
+            "oauth-srv", "https://example.com/mcp", None,
+        )
+        assert "oauth-srv" in mgr._entries
+
+        from hermes_cli.mcp_config import cmd_mcp_remove
+        cmd_mcp_remove(_make_args(name="oauth-srv"))
+
+        assert "oauth-srv" not in mgr._entries
+
+
+class TestMcpLogin:
+    def test_login_rejects_unknown_server(self, tmp_path, capsys):
+        _seed_config(tmp_path, {})
+        from hermes_cli.mcp_config import cmd_mcp_login
+        cmd_mcp_login(_make_args(name="ghost"))
+        out = capsys.readouterr().out
+        assert "not found" in out
+
+    def test_login_rejects_non_oauth_server(self, tmp_path, capsys):
+        _seed_config(tmp_path, {
+            "srv": {"url": "https://example.com/mcp", "auth": "header"},
+        })
+        from hermes_cli.mcp_config import cmd_mcp_login
+        cmd_mcp_login(_make_args(name="srv"))
+        out = capsys.readouterr().out
+        assert "not configured for OAuth" in out
+
+    def test_login_rejects_stdio_server(self, tmp_path, capsys):
+        _seed_config(tmp_path, {
+            "srv": {"command": "npx", "args": ["some-server"]},
+        })
+        from hermes_cli.mcp_config import cmd_mcp_login
+        cmd_mcp_login(_make_args(name="srv"))
+        out = capsys.readouterr().out
+        assert "no URL" in out or "not an OAuth" in out
+
@@ -93,6 +93,59 @@ class TestCopilotDotPreservation:
        assert result == expected


+# ── Copilot model-name normalization (issue #6879 regression) ──────────
+
+class TestCopilotModelNormalization:
+    """Copilot requires bare dot-notation model IDs.
+
+    Regression coverage for issue #6879 and the broken Copilot branch
+    that previously left vendor-prefixed Anthropic IDs (e.g.
+    ``anthropic/claude-sonnet-4.6``) and dash-notation Claude IDs (e.g.
+    ``claude-sonnet-4-6``) unchanged, causing the Copilot API to reject
+    the request with HTTP 400 "model_not_supported".
+    """
+
+    @pytest.mark.parametrize("model,expected", [
+        # Vendor-prefixed Anthropic IDs — prefix must be stripped.
+        ("anthropic/claude-opus-4.6",   "claude-opus-4.6"),
+        ("anthropic/claude-sonnet-4.6", "claude-sonnet-4.6"),
+        ("anthropic/claude-sonnet-4.5", "claude-sonnet-4.5"),
+        ("anthropic/claude-haiku-4.5",  "claude-haiku-4.5"),
+        # Vendor-prefixed OpenAI IDs — prefix must be stripped.
+        ("openai/gpt-5.4",              "gpt-5.4"),
+        ("openai/gpt-4o",               "gpt-4o"),
+        ("openai/gpt-4o-mini",          "gpt-4o-mini"),
+        # Dash-notation Claude IDs — must be converted to dot-notation.
+        ("claude-opus-4-6",             "claude-opus-4.6"),
+        ("claude-sonnet-4-6",           "claude-sonnet-4.6"),
+        ("claude-sonnet-4-5",           "claude-sonnet-4.5"),
+        ("claude-haiku-4-5",            "claude-haiku-4.5"),
+        # Combined: vendor-prefixed + dash-notation.
+        ("anthropic/claude-opus-4-6",   "claude-opus-4.6"),
+        ("anthropic/claude-sonnet-4-6", "claude-sonnet-4.6"),
+        # Already-canonical inputs pass through unchanged.
+        ("claude-sonnet-4.6",           "claude-sonnet-4.6"),
+        ("gpt-5.4",                     "gpt-5.4"),
+        ("gpt-5-mini",                  "gpt-5-mini"),
+    ])
+    def test_copilot_normalization(self, model, expected):
+        assert normalize_model_for_provider(model, "copilot") == expected
+
+    @pytest.mark.parametrize("model,expected", [
+        ("anthropic/claude-sonnet-4.6", "claude-sonnet-4.6"),
+        ("claude-sonnet-4-6",           "claude-sonnet-4.6"),
+        ("claude-opus-4-6",             "claude-opus-4.6"),
+        ("openai/gpt-5.4",              "gpt-5.4"),
+    ])
+    def test_copilot_acp_normalization(self, model, expected):
+        """Copilot ACP shares the same API expectations as HTTP Copilot."""
+        assert normalize_model_for_provider(model, "copilot-acp") == expected
+
+    def test_openai_codex_still_strips_openai_prefix(self):
+        """Regression: openai-codex must still strip the openai/ prefix."""
+        assert normalize_model_for_provider("openai/gpt-5.4", "openai-codex") == "gpt-5.4"
+
+
 # ── Aggregator providers (regression) ──────────────────────────────────

 class TestAggregatorProviders:
@@ -0,0 +1,62 @@
+"""Tests for the prompt_toolkit /model picker scroll viewport.
+
+Regression for: when a provider exposes many models (e.g. Ollama Cloud's
+36+), the picker rendered every choice into a Window with no max height,
+clipping the bottom border and any items past the terminal's last row.
+The viewport helper now caps visible items and slides the offset to keep
+the cursor on screen.
+"""
+from cli import HermesCLI
+
+
+_compute = HermesCLI._compute_model_picker_viewport
+
+
+class TestPickerViewport:
+    def test_short_list_no_scroll(self):
+        offset, visible = _compute(selected=0, scroll_offset=0, n=5, term_rows=30)
+        assert offset == 0
+        assert visible == 5
+
+    def test_long_list_caps_visible_to_chrome_budget(self):
+        # 30 rows minus reserved_below=6 minus panel_chrome=6 → max_visible=18.
+        offset, visible = _compute(selected=0, scroll_offset=0, n=36, term_rows=30)
+        assert visible == 18
+        assert offset == 0
+
+    def test_cursor_past_window_scrolls_down(self):
+        offset, visible = _compute(selected=22, scroll_offset=0, n=36, term_rows=30)
+        assert visible == 18
+        assert 22 in range(offset, offset + visible)
+
+    def test_cursor_above_window_scrolls_up(self):
+        offset, visible = _compute(selected=3, scroll_offset=15, n=36, term_rows=30)
+        assert offset == 3
+        assert 3 in range(offset, offset + visible)
+
+    def test_offset_clamped_to_bottom(self):
+        # Selected on the last item — offset must keep the visible window
+        # full, not walk past the end of the list.
+        offset, visible = _compute(selected=35, scroll_offset=0, n=36, term_rows=30)
+        assert offset + visible == 36
+        assert 35 in range(offset, offset + visible)
+
+    def test_tiny_terminal_uses_minimum_visible(self):
+        # term_rows below the chrome budget falls back to the floor of 3 rows.
+        _, visible = _compute(selected=0, scroll_offset=0, n=20, term_rows=10)
+        assert visible == 3
+
+    def test_offset_recovers_after_stage_switch(self):
+        # When the user backs out of the model stage and re-enters with
+        # selected=0, a stale offset from the previous stage must collapse.
+        offset, visible = _compute(selected=0, scroll_offset=25, n=36, term_rows=30)
+        assert offset == 0
+        assert 0 in range(offset, offset + visible)
+
+    def test_full_navigation_keeps_cursor_visible(self):
+        offset = 0
+        for cursor in list(range(36)) + list(range(35, -1, -1)):
+            offset, visible = _compute(cursor, offset, n=36, term_rows=30)
+            assert cursor in range(offset, offset + visible), (
+                f"cursor={cursor} out of view: offset={offset} visible={visible}"
+            )
@@ -0,0 +1,252 @@
+"""Regression tests for OpenCode /v1 stripping during /model switch.
+
+When switching to an Anthropic-routed OpenCode model mid-session (e.g.
+``/model minimax-m2.7`` on opencode-go, or ``/model claude-sonnet-4-6``
+on opencode-zen), the resolved base_url must have its trailing ``/v1``
+stripped before being handed to the Anthropic SDK.
+
+Without the strip, the SDK prepends its own ``/v1/messages`` path and
+requests hit ``https://opencode.ai/zen/go/v1/v1/messages`` — a double
+``/v1`` that returns OpenCode's website 404 page with HTML body.
+
+``hermes_cli.runtime_provider.resolve_runtime_provider`` already strips
+``/v1`` at fresh agent init (PR #4918), but the ``/model`` mid-session
+switch path in ``hermes_cli.model_switch.switch_model`` was missing the
+same logic — these tests guard against that regression.
+"""
+
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.model_switch import switch_model
+
+
+_MOCK_VALIDATION = {
+    "accepted": True,
+    "persist": True,
+    "recognized": True,
+    "message": None,
+}
+
+
+def _run_opencode_switch(
+    raw_input: str,
+    current_provider: str,
+    current_model: str,
+    current_base_url: str,
+    explicit_provider: str = "",
+    runtime_base_url: str = "",
+):
+    """Run switch_model with OpenCode mocks and return the result.
+
+    runtime_base_url defaults to current_base_url; tests can override it
+    to simulate the credential resolver returning a base_url different
+    from the session's current one.
+    """
+    effective_runtime_base = runtime_base_url or current_base_url
+    with (
+        patch("hermes_cli.model_switch.resolve_alias", return_value=None),
+        patch("hermes_cli.model_switch.list_provider_models", return_value=[]),
+        patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            return_value={
+                "api_key": "sk-opencode-fake",
+                "base_url": effective_runtime_base,
+                "api_mode": "chat_completions",
+            },
+        ),
+        patch(
+            "hermes_cli.models.validate_requested_model",
+            return_value=_MOCK_VALIDATION,
+        ),
+        patch("hermes_cli.model_switch.get_model_info", return_value=None),
+        patch("hermes_cli.model_switch.get_model_capabilities", return_value=None),
+        patch("hermes_cli.models.detect_provider_for_model", return_value=None),
+    ):
+        return switch_model(
+            raw_input=raw_input,
+            current_provider=current_provider,
+            current_model=current_model,
+            current_base_url=current_base_url,
+            current_api_key="sk-opencode-fake",
+            explicit_provider=explicit_provider,
+        )
+
+
+class TestOpenCodeGoV1Strip:
+    """OpenCode Go: ``/model minimax-*`` must strip /v1."""
+
+    def test_switch_to_minimax_m27_strips_v1(self):
+        """GLM-5 → MiniMax-M2.7: base_url loses trailing /v1."""
+        result = _run_opencode_switch(
+            raw_input="minimax-m2.7",
+            current_provider="opencode-go",
+            current_model="glm-5",
+            current_base_url="https://opencode.ai/zen/go/v1",
+        )
+
+        assert result.success, f"switch_model failed: {result.error_message}"
+        assert result.api_mode == "anthropic_messages"
+        assert result.base_url == "https://opencode.ai/zen/go", (
+            f"Expected /v1 stripped for anthropic_messages; got {result.base_url}"
+        )
+
+    def test_switch_to_minimax_m25_strips_v1(self):
+        """Same behavior for M2.5."""
+        result = _run_opencode_switch(
+            raw_input="minimax-m2.5",
+            current_provider="opencode-go",
+            current_model="kimi-k2.5",
+            current_base_url="https://opencode.ai/zen/go/v1",
+        )
+
+        assert result.success
+        assert result.api_mode == "anthropic_messages"
+        assert result.base_url == "https://opencode.ai/zen/go"
+
+    def test_switch_to_glm_leaves_v1_intact(self):
+        """OpenAI-compatible models (GLM, Kimi, MiMo) keep /v1."""
+        result = _run_opencode_switch(
+            raw_input="glm-5.1",
+            current_provider="opencode-go",
+            current_model="minimax-m2.7",
+            current_base_url="https://opencode.ai/zen/go",  # stripped from previous Anthropic model
+            runtime_base_url="https://opencode.ai/zen/go/v1",
+        )
+
+        assert result.success
+        assert result.api_mode == "chat_completions"
+        assert result.base_url == "https://opencode.ai/zen/go/v1", (
+            f"chat_completions must keep /v1; got {result.base_url}"
+        )
+
+    def test_switch_to_kimi_leaves_v1_intact(self):
+        result = _run_opencode_switch(
+            raw_input="kimi-k2.5",
+            current_provider="opencode-go",
+            current_model="glm-5",
+            current_base_url="https://opencode.ai/zen/go/v1",
+        )
+
+        assert result.success
+        assert result.api_mode == "chat_completions"
+        assert result.base_url == "https://opencode.ai/zen/go/v1"
+
+    def test_trailing_slash_also_stripped(self):
+        """``/v1/`` with trailing slash is also stripped cleanly."""
+        result = _run_opencode_switch(
+            raw_input="minimax-m2.7",
+            current_provider="opencode-go",
+            current_model="glm-5",
+            current_base_url="https://opencode.ai/zen/go/v1/",
+        )
+
+        assert result.success
+        assert result.api_mode == "anthropic_messages"
+        assert result.base_url == "https://opencode.ai/zen/go"
+
+
+class TestOpenCodeZenV1Strip:
+    """OpenCode Zen: ``/model claude-*`` must strip /v1."""
+
+    def test_switch_to_claude_sonnet_strips_v1(self):
+        """Gemini → Claude on opencode-zen: /v1 stripped."""
+        result = _run_opencode_switch(
+            raw_input="claude-sonnet-4-6",
+            current_provider="opencode-zen",
+            current_model="gemini-3-flash",
+            current_base_url="https://opencode.ai/zen/v1",
+        )
+
+        assert result.success
+        assert result.api_mode == "anthropic_messages"
+        assert result.base_url == "https://opencode.ai/zen"
+
+    def test_switch_to_gemini_leaves_v1_intact(self):
+        """Gemini on opencode-zen stays on chat_completions with /v1."""
+        result = _run_opencode_switch(
+            raw_input="gemini-3-flash",
+            current_provider="opencode-zen",
+            current_model="claude-sonnet-4-6",
+            current_base_url="https://opencode.ai/zen",  # stripped from previous Claude
+            runtime_base_url="https://opencode.ai/zen/v1",
+        )
+
+        assert result.success
+        assert result.api_mode == "chat_completions"
+        assert result.base_url == "https://opencode.ai/zen/v1"
+
+    def test_switch_to_gpt_uses_codex_responses_keeps_v1(self):
+        """GPT on opencode-zen uses codex_responses api_mode — /v1 kept."""
+        result = _run_opencode_switch(
+            raw_input="gpt-5.4",
+            current_provider="opencode-zen",
+            current_model="claude-sonnet-4-6",
+            current_base_url="https://opencode.ai/zen",
+            runtime_base_url="https://opencode.ai/zen/v1",
+        )
+
+        assert result.success
+        assert result.api_mode == "codex_responses"
+        assert result.base_url == "https://opencode.ai/zen/v1"
+
+
+class TestAgentSwitchModelDefenseInDepth:
+    """run_agent.AIAgent.switch_model() also strips /v1 as defense-in-depth."""
+
+    def test_agent_switch_model_strips_v1_for_anthropic_messages(self):
+        """Even if a caller hands in a /v1 URL, the agent strips it."""
+        from run_agent import AIAgent
+
+        # Build a bare agent instance without running __init__; we only want
+        # to exercise switch_model's base_url normalization logic.
+        agent = AIAgent.__new__(AIAgent)
+        agent.model = "glm-5"
+        agent.provider = "opencode-go"
+        agent.base_url = "https://opencode.ai/zen/go/v1"
+        agent.api_key = "sk-opencode-fake"
+        agent.api_mode = "chat_completions"
+        agent._client_kwargs = {}
+
+        # Intercept the expensive client rebuild — we only need to verify
+        # that base_url was normalized before it reached the Anthropic
+        # client factory.
+        captured = {}
+
+        def _fake_build_anthropic_client(api_key, base_url):
+            captured["api_key"] = api_key
+            captured["base_url"] = base_url
+            return object()  # placeholder client — no real calls expected
+
+        # The downstream cache/plumbing touches a bunch of private state
+        # that wasn't initialized above; we don't want to rebuild the full
+        # runtime for this single assertion, so short-circuit after the
+        # strip by raising inside the stubbed factory.
+        class _Sentinel(Exception):
+            pass
+
+        def _raise_after_capture(api_key, base_url):
+            captured["api_key"] = api_key
+            captured["base_url"] = base_url
+            raise _Sentinel("strip verified")
+
+        with patch(
+            "agent.anthropic_adapter.build_anthropic_client",
+            side_effect=_raise_after_capture,
+        ), patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=""), patch(
+            "agent.anthropic_adapter._is_oauth_token", return_value=False
+        ):
+            with pytest.raises(_Sentinel):
+                agent.switch_model(
+                    new_model="minimax-m2.7",
+                    new_provider="opencode-go",
+                    api_key="sk-opencode-fake",
+                    base_url="https://opencode.ai/zen/go/v1",
+                    api_mode="anthropic_messages",
+                )
+
+        assert captured.get("base_url") == "https://opencode.ai/zen/go", (
+            f"agent.switch_model did not strip /v1; passed {captured.get('base_url')} "
+            "to build_anthropic_client"
+        )
@@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set():
    opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
    
    assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
-    assert opencode_go["models"] == ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
+    assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
    # opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
    # models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
    # the API is unavailable, e.g. in CI).
@@ -173,60 +173,6 @@ class TestMemoryPluginCliDiscovery:
 # ── Honcho register_cli ──────────────────────────────────────────────────


-class TestHonchoRegisterCli:
-    def test_builds_subcommand_tree(self):
-        """register_cli creates the expected subparser tree."""
-        from plugins.memory.honcho.cli import register_cli
-
-        parser = argparse.ArgumentParser()
-        register_cli(parser)
-
-        # Verify key subcommands exist by parsing them
-        args = parser.parse_args(["status"])
-        assert args.honcho_command == "status"
-
-        args = parser.parse_args(["peer", "--user", "alice"])
-        assert args.honcho_command == "peer"
-        assert args.user == "alice"
-
-        args = parser.parse_args(["mode", "tools"])
-        assert args.honcho_command == "mode"
-        assert args.mode == "tools"
-
-        args = parser.parse_args(["tokens", "--context", "500"])
-        assert args.honcho_command == "tokens"
-        assert args.context == 500
-
-        args = parser.parse_args(["--target-profile", "coder", "status"])
-        assert args.target_profile == "coder"
-        assert args.honcho_command == "status"
-
-    def test_setup_redirects_to_memory_setup(self):
-        """hermes honcho setup redirects to memory setup."""
-        from plugins.memory.honcho.cli import register_cli
-
-        parser = argparse.ArgumentParser()
-        register_cli(parser)
-        args = parser.parse_args(["setup"])
-        assert args.honcho_command == "setup"
-
-    def test_mode_choices_are_recall_modes(self):
-        """Mode subcommand uses recall mode choices (hybrid/context/tools)."""
-        from plugins.memory.honcho.cli import register_cli
-
-        parser = argparse.ArgumentParser()
-        register_cli(parser)
-
-        # Valid recall modes should parse
-        for mode in ("hybrid", "context", "tools"):
-            args = parser.parse_args(["mode", mode])
-            assert args.mode == mode
-
-        # Old memoryMode values should fail
-        with pytest.raises(SystemExit):
-            parser.parse_args(["mode", "honcho"])
-
-
 # ── ProviderCollector no-op ──────────────────────────────────────────────


@@ -644,7 +644,7 @@ class TestPluginCommands:
        manifest = PluginManifest(name="test-plugin", source="user")
        ctx = PluginContext(manifest, mgr)

-        with caplog.at_level(logging.WARNING):
+        with caplog.at_level(logging.WARNING, logger="hermes_cli.plugins"):
            ctx.register_command("", lambda a: a)
        assert len(mgr._plugin_commands) == 0
        assert "empty name" in caplog.text
@@ -655,7 +655,7 @@ class TestPluginCommands:
        manifest = PluginManifest(name="test-plugin", source="user")
        ctx = PluginContext(manifest, mgr)

-        with caplog.at_level(logging.WARNING):
+        with caplog.at_level(logging.WARNING, logger="hermes_cli.plugins"):
            ctx.register_command("help", lambda a: a)
        assert "help" not in mgr._plugin_commands
        assert "conflicts" in caplog.text.lower()
@@ -126,59 +126,6 @@ class TestRepoNameFromUrl:
 # ── plugins_command dispatch ──────────────────────────────────────────────


-class TestPluginsCommandDispatch:
-    """Verify alias routing in plugins_command()."""
-
-    def _make_args(self, action, **extras):
-        args = MagicMock()
-        args.plugins_action = action
-        for k, v in extras.items():
-            setattr(args, k, v)
-        return args
-
-    @patch("hermes_cli.plugins_cmd.cmd_remove")
-    def test_rm_alias(self, mock_remove):
-        args = self._make_args("rm", name="some-plugin")
-        plugins_command(args)
-        mock_remove.assert_called_once_with("some-plugin")
-
-    @patch("hermes_cli.plugins_cmd.cmd_remove")
-    def test_uninstall_alias(self, mock_remove):
-        args = self._make_args("uninstall", name="some-plugin")
-        plugins_command(args)
-        mock_remove.assert_called_once_with("some-plugin")
-
-    @patch("hermes_cli.plugins_cmd.cmd_list")
-    def test_ls_alias(self, mock_list):
-        args = self._make_args("ls")
-        plugins_command(args)
-        mock_list.assert_called_once()
-
-    @patch("hermes_cli.plugins_cmd.cmd_toggle")
-    def test_none_falls_through_to_toggle(self, mock_toggle):
-        args = self._make_args(None)
-        plugins_command(args)
-        mock_toggle.assert_called_once()
-
-    @patch("hermes_cli.plugins_cmd.cmd_install")
-    def test_install_dispatches(self, mock_install):
-        args = self._make_args("install", identifier="owner/repo", force=False)
-        plugins_command(args)
-        mock_install.assert_called_once_with("owner/repo", force=False)
-
-    @patch("hermes_cli.plugins_cmd.cmd_update")
-    def test_update_dispatches(self, mock_update):
-        args = self._make_args("update", name="foo")
-        plugins_command(args)
-        mock_update.assert_called_once_with("foo")
-
-    @patch("hermes_cli.plugins_cmd.cmd_remove")
-    def test_remove_dispatches(self, mock_remove):
-        args = self._make_args("remove", name="bar")
-        plugins_command(args)
-        mock_remove.assert_called_once_with("bar")
-
-
 # ── _read_manifest ────────────────────────────────────────────────────────


@@ -2,7 +2,7 @@ from hermes_cli import setup as setup_mod


 def test_prompt_choice_uses_curses_helper(monkeypatch):
-    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0: 1)
+    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0, description=None: 1)

    idx = setup_mod.prompt_choice("Pick one", ["a", "b", "c"], default=0)

@@ -10,7 +10,7 @@ def test_prompt_choice_uses_curses_helper(monkeypatch):


 def test_prompt_choice_falls_back_to_numbered_input(monkeypatch):
-    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0: -1)
+    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0, description=None: -1)
    monkeypatch.setattr("builtins.input", lambda _prompt="": "2")

    idx = setup_mod.prompt_choice("Pick one", ["a", "b", "c"], default=0)
@@ -64,85 +64,3 @@ def _safe_parse(parser, subparsers, argv):
        subparsers.required = False
        return parser.parse_args(argv)

-
-class TestSubparserRoutingFallback:
-    """Verify the bpo-9338 defensive routing works for all key cases."""
-
-    def test_direct_subcommand(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["model"])
-        assert args.command == "model"
-
-    def test_subcommand_with_flags(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["--yolo", "model"])
-        assert args.command == "model"
-        assert args.yolo is True
-
-    def test_bare_hermes_defaults_to_none(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, [])
-        assert args.command is None
-
-    def test_flags_only_defaults_to_none(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["--yolo"])
-        assert args.command is None
-        assert args.yolo is True
-
-    def test_continue_flag_alone(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["-c"])
-        assert args.command is None
-        assert args.continue_last is True
-
-    def test_continue_with_session_name(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["-c", "myproject"])
-        assert args.command is None
-        assert args.continue_last == "myproject"
-
-    def test_continue_with_subcommand_name_as_session(self):
-        """Edge case: session named 'model' — should be treated as session name, not subcommand."""
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["-c", "model"])
-        assert args.command is None
-        assert args.continue_last == "model"
-
-    def test_continue_with_session_then_subcommand(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["-c", "myproject", "model"])
-        assert args.command == "model"
-        assert args.continue_last == "myproject"
-
-    def test_chat_with_query(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["chat", "-q", "hello"])
-        assert args.command == "chat"
-        assert args.query == "hello"
-
-    def test_resume_flag(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["-r", "abc123"])
-        assert args.command is None
-        assert args.resume == "abc123"
-
-    def test_resume_with_subcommand(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["-r", "abc123", "chat"])
-        assert args.command == "chat"
-        assert args.resume == "abc123"
-
-    def test_skills_flag_with_subcommand(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["-s", "myskill", "chat"])
-        assert args.command == "chat"
-        assert args.skills == ["myskill"]
-
-    def test_all_flags_with_subcommand(self):
-        parser, sub = _build_parser()
-        args = _safe_parse(parser, sub, ["--yolo", "-w", "-s", "myskill", "model"])
-        assert args.command == "model"
-        assert args.yolo is True
-        assert args.worktree is True
-        assert args.skills == ["myskill"]
@@ -40,6 +40,19 @@ def test_get_platform_tools_preserves_explicit_empty_selection():
    assert enabled == set()


+def test_get_platform_tools_handles_null_platform_toolsets():
+    """YAML `platform_toolsets:` with no value parses as None — the old
+    ``config.get("platform_toolsets", {})`` pattern would then crash with
+    ``NoneType has no attribute 'get'`` on the next line. Guard against that.
+    """
+    config = {"platform_toolsets": None}
+
+    enabled = _get_platform_tools(config, "cli")
+
+    # Falls through to defaults instead of raising
+    assert enabled
+
+
 def test_platform_toolset_summary_uses_explicit_platform_list():
    config = {}

@@ -466,3 +479,90 @@ def test_numeric_mcp_server_name_does_not_crash_sorted():

    # sorted() must not raise TypeError
    sorted(enabled)
+
+
+# ─── Imagegen Backend Picker Wiring ────────────────────────────────────────
+
+class TestImagegenBackendRegistry:
+    """IMAGEGEN_BACKENDS tags drive the model picker flow in tools_config."""
+
+    def test_fal_backend_registered(self):
+        from hermes_cli.tools_config import IMAGEGEN_BACKENDS
+        assert "fal" in IMAGEGEN_BACKENDS
+
+    def test_fal_catalog_loads_lazily(self):
+        """catalog_fn should defer import to avoid import cycles."""
+        from hermes_cli.tools_config import IMAGEGEN_BACKENDS
+        catalog, default = IMAGEGEN_BACKENDS["fal"]["catalog_fn"]()
+        assert default == "fal-ai/flux-2/klein/9b"
+        assert "fal-ai/flux-2/klein/9b" in catalog
+        assert "fal-ai/flux-2-pro" in catalog
+
+    def test_image_gen_providers_tagged_with_fal_backend(self):
+        """Both Nous Subscription and FAL.ai providers must carry the
+        imagegen_backend tag so _configure_provider fires the picker."""
+        from hermes_cli.tools_config import TOOL_CATEGORIES
+        providers = TOOL_CATEGORIES["image_gen"]["providers"]
+        for p in providers:
+            assert p.get("imagegen_backend") == "fal", (
+                f"{p['name']} missing imagegen_backend tag"
+            )
+
+
+class TestImagegenModelPicker:
+    """_configure_imagegen_model writes selection to config and respects
+    curses fallback semantics (returns default when stdin isn't a TTY)."""
+
+    def test_picker_writes_chosen_model_to_config(self):
+        from hermes_cli.tools_config import _configure_imagegen_model
+        config = {}
+        # Force _prompt_choice to pick index 1 (second-in-ordered-list).
+        with patch("hermes_cli.tools_config._prompt_choice", return_value=1):
+            _configure_imagegen_model("fal", config)
+        # ordered[0] == current (default klein), ordered[1] == first non-default
+        assert config["image_gen"]["model"] != "fal-ai/flux-2/klein/9b"
+        assert config["image_gen"]["model"].startswith("fal-ai/")
+
+    def test_picker_with_gpt_image_does_not_prompt_quality(self):
+        """GPT-Image quality is pinned to medium in the tool's defaults —
+        no follow-up prompt, no config write for quality_setting."""
+        from hermes_cli.tools_config import (
+            _configure_imagegen_model,
+            IMAGEGEN_BACKENDS,
+        )
+        catalog, default_model = IMAGEGEN_BACKENDS["fal"]["catalog_fn"]()
+        model_ids = list(catalog.keys())
+        ordered = [default_model] + [m for m in model_ids if m != default_model]
+        gpt_idx = ordered.index("fal-ai/gpt-image-1.5")
+
+        # Only ONE picker call is expected (for model) — not two (model + quality).
+        call_count = {"n": 0}
+        def fake_prompt(*a, **kw):
+            call_count["n"] += 1
+            return gpt_idx
+
+        config = {}
+        with patch("hermes_cli.tools_config._prompt_choice", side_effect=fake_prompt):
+            _configure_imagegen_model("fal", config)
+
+        assert call_count["n"] == 1, (
+            f"Expected 1 picker call (model only), got {call_count['n']}"
+        )
+        assert config["image_gen"]["model"] == "fal-ai/gpt-image-1.5"
+        assert "quality_setting" not in config["image_gen"]
+
+    def test_picker_no_op_for_unknown_backend(self):
+        from hermes_cli.tools_config import _configure_imagegen_model
+        config = {}
+        _configure_imagegen_model("nonexistent-backend", config)
+        assert config == {}  # untouched
+
+    def test_picker_repairs_corrupt_config_section(self):
+        """When image_gen is a non-dict (user-edit YAML), the picker should
+        replace it with a fresh dict rather than crash."""
+        from hermes_cli.tools_config import _configure_imagegen_model
+        config = {"image_gen": "some-garbage-string"}
+        with patch("hermes_cli.tools_config._prompt_choice", return_value=0):
+            _configure_imagegen_model("fal", config)
+        assert isinstance(config["image_gen"], dict)
+        assert config["image_gen"]["model"] == "fal-ai/flux-2/klein/9b"
@@ -1,17 +1,9 @@
 """Tests for Xiaomi MiMo provider support."""

 import os
-import sys
-import types

 import pytest

-# Ensure dotenv doesn't interfere
-if "dotenv" not in sys.modules:
-    fake_dotenv = types.ModuleType("dotenv")
-    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
-    sys.modules["dotenv"] = fake_dotenv
-
 from hermes_cli.auth import (
    PROVIDER_REGISTRY,
    resolve_provider,
--- a/Show More
+++ b/Show More