feat: add WorldSim — OSINT-powered personality simulation skill

Rehoboam-class worldsim. Immersive CLI personality simulator that researches real people via 25+ verified platform access methods, builds 6-layer psychometric profiles, finds star threads (personality compression keys), and generates platform-authentic simulated conversations with mechanical verification and adversarial refinement. 26 files | 38K words | 2,283 lines Python - Immersive CLI interface (worldsim> prompt, no assistant framing) - OSINT pipeline: X API, Instagram private API, Bluesky, TikTok, Facebook, Threads, Mastodon, Reddit, GitHub, HN, Medium, Quora, Goodreads, Google Scholar, Crunchbase, podcasts, news/blogs - Star thread: one-sentence personality compression key per person - Deep psychometrics: Big Five + Moral Foundations + Schwartz Values + Cognitive Style + Narrative Framing + Behavioral Metadata - Anti-slop: mechanical detection of LLM writing patterns - GAN-style adversarial refinement loop with mechanical verification - Recursive self-improvement: learned rules grow with each simulation - Rehoboam persistence: SQLite + filesystem for profiles, predictions, social graph, knowledge archives - GEPA/MIPROv2 self-evolution integration tested and working - Knowledge archive: per-person source library with citations and semantic retrieval for context-aware grounding Co-authored-by: Hermes Agent <hermes@nousresearch.com>
feat(tools): add "no_mcp" sentinel to exclude MCP servers per platform
2026-04-08 13:46:20 -04:00 · 2026-04-07 18:00:01 -07:00 · 2026-04-07 18:00:01 -07:00 · 2026-04-07 18:00:01 -07:00 · 2026-04-07 18:00:01 -07:00 · 2026-04-07 17:59:47 -07:00
419 changed files with 32977 additions and 2439 deletions
@@ -14,6 +14,16 @@
 # LLM_MODEL is no longer read from .env — this line is kept for reference only.
 # LLM_MODEL=anthropic/claude-opus-4.6

+# =============================================================================
+# LLM PROVIDER (Google AI Studio / Gemini)
+# =============================================================================
+# Native Gemini API via Google's OpenAI-compatible endpoint.
+# Get your key at: https://aistudio.google.com/app/apikey
+# GOOGLE_API_KEY=your_google_ai_studio_key_here
+# GEMINI_API_KEY=your_gemini_key_here  # alias for GOOGLE_API_KEY
+# Optional base URL override (default: Google's OpenAI-compatible endpoint)
+# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
+
 # =============================================================================
 # LLM PROVIDER (z.ai / GLM)
 # =============================================================================
@@ -19,6 +19,9 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@v4

+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y ripgrep
+
      - name: Install uv
        uses: astral-sh/setup-uv@v5

@@ -15,7 +15,6 @@ Usage::

 import asyncio
 import logging
-import os
 import sys
 from pathlib import Path
 from hermes_constants import get_hermes_home
@@ -262,8 +262,6 @@ class SessionManager:
        if self._db_instance is not None:
            return self._db_instance
        try:
-            import os
-            from pathlib import Path
            from hermes_state import SessionDB
            hermes_home = get_hermes_home()
            self._db_instance = SessionDB(db_path=hermes_home / "state.db")
@@ -39,7 +39,6 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
    "browser_scroll": "execute",
    "browser_press": "execute",
    "browser_back": "execute",
-    "browser_close": "execute",
    "browser_get_images": "read",
    # Agent internals
    "delegate_task": "execute",
@@ -188,9 +188,7 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
    if not base_url:
        return False
    normalized = base_url.rstrip("/").lower()
-    return normalized.startswith("https://api.minimax.io/anthropic") or normalized.startswith(
-        "https://api.minimaxi.com/anthropic"
-    )
+    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))


 def build_anthropic_client(api_key: str, base_url: str = None):
@@ -708,29 +706,6 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
    }


-def run_hermes_oauth_login() -> Optional[str]:
-    """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription.
-
-    Opens a browser to claude.ai for authorization, prompts for the code,
-    exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json.
-
-    Returns the access token on success, None on failure.
-    """
-    result = run_hermes_oauth_login_pure()
-    if not result:
-        return None
-
-    access_token = result["access_token"]
-    refresh_token = result["refresh_token"]
-    expires_at_ms = result["expires_at_ms"]
-
-    _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms)
-    _write_claude_code_credentials(access_token, refresh_token, expires_at_ms)
-
-    print("Authentication successful!")
-    return access_token
-
-
 def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
    """Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
    data = {
@@ -758,38 +733,6 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
    return None


-def refresh_hermes_oauth_token() -> Optional[str]:
-    """Refresh the Hermes-managed OAuth token using the stored refresh token.
-
-    Returns the new access token, or None if refresh fails.
-    """
-    creds = read_hermes_oauth_credentials()
-    if not creds or not creds.get("refreshToken"):
-        return None
-
-    try:
-        refreshed = refresh_anthropic_oauth_pure(
-            creds["refreshToken"],
-            use_json=True,
-        )
-        _save_hermes_oauth_credentials(
-            refreshed["access_token"],
-            refreshed["refresh_token"],
-            refreshed["expires_at_ms"],
-        )
-        _write_claude_code_credentials(
-            refreshed["access_token"],
-            refreshed["refresh_token"],
-            refreshed["expires_at_ms"],
-        )
-        logger.debug("Successfully refreshed Hermes OAuth token")
-        return refreshed["access_token"]
-    except Exception as e:
-        logger.debug("Failed to refresh Hermes OAuth token: %s", e)
-
-    return None
-
-
 # ---------------------------------------------------------------------------
 # Message / tool / response format conversion
 # ---------------------------------------------------------------------------
@@ -847,7 +790,7 @@ def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Di
                },
            }

-    if url.startswith("http://") or url.startswith("https://"):
+    if url.startswith(("http://", "https://")):
        return {
            "type": "image",
            "source": {
@@ -859,35 +802,6 @@ def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Di
    return None


-def _convert_user_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
-    if isinstance(part, dict):
-        ptype = part.get("type")
-        if ptype == "text":
-            block = {"type": "text", "text": part.get("text", "")}
-            if isinstance(part.get("cache_control"), dict):
-                block["cache_control"] = dict(part["cache_control"])
-            return block
-        if ptype == "image_url":
-            return _convert_openai_image_part_to_anthropic(part)
-        if ptype == "image" and part.get("source"):
-            return dict(part)
-        if ptype == "image" and part.get("data"):
-            media_type = part.get("mimeType") or part.get("media_type") or "image/png"
-            return {
-                "type": "image",
-                "source": {
-                    "type": "base64",
-                    "media_type": media_type,
-                    "data": part.get("data", ""),
-                },
-            }
-        if ptype == "tool_result":
-            return dict(part)
-    elif part is not None:
-        return {"type": "text", "text": str(part)}
-    return None
-
-
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -34,6 +34,12 @@ than the provider's default.
 Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
 AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
 custom OpenAI-compatible endpoint without touching the main model settings.
+
+Payment / credit exhaustion fallback:
+  When a resolved provider returns HTTP 402 or a credit-related error,
+  call_llm() automatically retries with the next available provider in the
+  auto-detection chain.  This handles the common case where a user depletes
+  their OpenRouter balance but has Codex OAuth or another provider available.
 """

 import json
@@ -55,6 +61,7 @@ logger = logging.getLogger(__name__)

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
+    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7-highspeed",
@@ -84,6 +91,7 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
+_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -201,7 +209,6 @@ class _CodexCompletionsAdapter:
    def create(self, **kwargs) -> Any:
        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
-        temperature = kwargs.get("temperature")

        # Separate system/instructions from conversation messages.
        # Convert chat.completions multimodal content blocks to Responses
@@ -253,26 +260,73 @@ class _CodexCompletionsAdapter:
        usage = None

        try:
+            # Collect output items and text deltas during streaming —
+            # the Codex backend can return empty response.output from
+            # get_final_response() even when items were streamed.
+            collected_output_items: List[Any] = []
+            collected_text_deltas: List[str] = []
+            has_function_calls = False
            with self._client.responses.stream(**resp_kwargs) as stream:
                for _event in stream:
-                    pass
+                    _etype = getattr(_event, "type", "")
+                    if _etype == "response.output_item.done":
+                        _done = getattr(_event, "item", None)
+                        if _done is not None:
+                            collected_output_items.append(_done)
+                    elif "output_text.delta" in _etype:
+                        _delta = getattr(_event, "delta", "")
+                        if _delta:
+                            collected_text_deltas.append(_delta)
+                    elif "function_call" in _etype:
+                        has_function_calls = True
                final = stream.get_final_response()

-            # Extract text and tool calls from the Responses output
+            # Backfill empty output from collected stream events
+            _output = getattr(final, "output", None)
+            if isinstance(_output, list) and not _output:
+                if collected_output_items:
+                    final.output = list(collected_output_items)
+                    logger.debug(
+                        "Codex auxiliary: backfilled %d output items from stream events",
+                        len(collected_output_items),
+                    )
+                elif collected_text_deltas and not has_function_calls:
+                    # Only synthesize text when no tool calls were streamed —
+                    # a function_call response with incidental text should not
+                    # be collapsed into a plain-text message.
+                    assembled = "".join(collected_text_deltas)
+                    final.output = [SimpleNamespace(
+                        type="message", role="assistant", status="completed",
+                        content=[SimpleNamespace(type="output_text", text=assembled)],
+                    )]
+                    logger.debug(
+                        "Codex auxiliary: synthesized from %d deltas (%d chars)",
+                        len(collected_text_deltas), len(assembled),
+                    )
+
+            # Extract text and tool calls from the Responses output.
+            # Items may be SDK objects (attrs) or dicts (raw/fallback paths),
+            # so use a helper that handles both shapes.
+            def _item_get(obj: Any, key: str, default: Any = None) -> Any:
+                val = getattr(obj, key, None)
+                if val is None and isinstance(obj, dict):
+                    val = obj.get(key, default)
+                return val if val is not None else default
+
            for item in getattr(final, "output", []):
-                item_type = getattr(item, "type", None)
+                item_type = _item_get(item, "type")
                if item_type == "message":
-                    for part in getattr(item, "content", []):
-                        ptype = getattr(part, "type", None)
+                    for part in (_item_get(item, "content") or []):
+                        ptype = _item_get(part, "type")
                        if ptype in ("output_text", "text"):
-                            text_parts.append(getattr(part, "text", ""))
+                            text_parts.append(_item_get(part, "text", ""))
                elif item_type == "function_call":
                    tool_calls_raw.append(SimpleNamespace(
-                        id=getattr(item, "call_id", ""),
+                        id=_item_get(item, "call_id", ""),
                        type="function",
                        function=SimpleNamespace(
-                            name=getattr(item, "name", ""),
-                            arguments=getattr(item, "arguments", "{}"),
+                            name=_item_get(item, "name", ""),
+                            arguments=_item_get(item, "arguments", "{}"),
                        ),
                    ))

@@ -666,7 +720,19 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    model = "gemini-3-flash" if nous.get("source") == "pool" else _NOUS_MODEL
+    if nous.get("source") == "pool":
+        model = "gemini-3-flash"
+    else:
+        model = _NOUS_MODEL
+    # Free-tier users can't use paid auxiliary models — use the free
+    # multimodal model instead so vision/browser-vision still works.
+    try:
+        from hermes_cli.models import check_nous_free_tier
+        if check_nous_free_tier():
+            model = _NOUS_FREE_TIER_VISION_MODEL
+            logger.debug("Free-tier Nous account — using %s for auxiliary/vision", model)
+    except Exception:
+        pass
    return (
        OpenAI(
            api_key=_nous_api_key(nous),
@@ -842,7 +908,7 @@ def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[st
    if forced == "nous":
        client, model = _try_nous()
        if client is None:
-            logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)")
+            logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes auth)")
        return client, model

    if forced == "codex":
@@ -873,10 +939,90 @@ _AUTO_PROVIDER_LABELS = {
    "_resolve_api_key_provider": "api-key",
 }

-
 _AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})


+def _get_provider_chain() -> List[tuple]:
+    """Return the ordered provider detection chain.
+
+    Built at call time (not module level) so that test patches
+    on the ``_try_*`` functions are picked up correctly.
+    """
+    return [
+        ("openrouter", _try_openrouter),
+        ("nous", _try_nous),
+        ("local/custom", _try_custom_endpoint),
+        ("openai-codex", _try_codex),
+        ("api-key", _resolve_api_key_provider),
+    ]
+
+
+def _is_payment_error(exc: Exception) -> bool:
+    """Detect payment/credit/quota exhaustion errors.
+
+    Returns True for HTTP 402 (Payment Required) and for 429/other errors
+    whose message indicates billing exhaustion rather than rate limiting.
+    """
+    status = getattr(exc, "status_code", None)
+    if status == 402:
+        return True
+    err_lower = str(exc).lower()
+    # OpenRouter and other providers include "credits" or "afford" in 402 bodies,
+    # but sometimes wrap them in 429 or other codes.
+    if status in (402, 429, None):
+        if any(kw in err_lower for kw in ("credits", "insufficient funds",
+                                           "can only afford", "billing",
+                                           "payment required")):
+            return True
+    return False
+
+
+def _try_payment_fallback(
+    failed_provider: str,
+    task: str = None,
+) -> Tuple[Optional[Any], Optional[str], str]:
+    """Try alternative providers after a payment/credit error.
+
+    Iterates the standard auto-detection chain, skipping the provider that
+    returned a payment error.
+
+    Returns:
+        (client, model, provider_label) or (None, None, "") if no fallback.
+    """
+    # Normalise the failed provider label for matching.
+    skip = failed_provider.lower().strip()
+    # Also skip Step-1 main-provider path if it maps to the same backend.
+    # (e.g. main_provider="openrouter" → skip "openrouter" in chain)
+    main_provider = _read_main_provider()
+    skip_labels = {skip}
+    if main_provider and main_provider.lower() in skip:
+        skip_labels.add(main_provider.lower())
+    # Map common resolved_provider values back to chain labels.
+    _alias_to_label = {"openrouter": "openrouter", "nous": "nous",
+                       "openai-codex": "openai-codex", "codex": "openai-codex",
+                       "custom": "local/custom", "local/custom": "local/custom"}
+    skip_chain_labels = {_alias_to_label.get(s, s) for s in skip_labels}
+
+    tried = []
+    for label, try_fn in _get_provider_chain():
+        if label in skip_chain_labels:
+            continue
+        client, model = try_fn()
+        if client is not None:
+            logger.info(
+                "Auxiliary %s: payment error on %s — falling back to %s (%s)",
+                task or "call", failed_provider, label, model or "default",
+            )
+            return client, model, label
+        tried.append(label)
+
+    logger.warning(
+        "Auxiliary %s: payment error on %s and no fallback available (tried: %s)",
+        task or "call", failed_provider, ", ".join(tried),
+    )
+    return None, None, ""
+
+
 def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

@@ -904,10 +1050,7 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:

    # ── Step 2: aggregator / fallback chain ──────────────────────────────
    tried = []
-    for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
-                   _try_codex, _resolve_api_key_provider):
-        fn_name = getattr(try_fn, "__name__", "unknown")
-        label = _AUTO_PROVIDER_LABELS.get(fn_name, fn_name)
+    for label, try_fn in _get_provider_chain():
        client, model = try_fn()
        if client is not None:
            if tried:
@@ -999,7 +1142,13 @@ def resolve_provider_client(
    if provider == "codex":
        provider = "openai-codex"
    if provider == "main":
-        provider = "custom"
+        # Resolve to the user's actual main provider so named custom providers
+        # and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
+        main_prov = _read_main_provider()
+        if main_prov and main_prov not in ("auto", "main", ""):
+            provider = main_prov
+        else:
+            provider = "custom"

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
@@ -1035,7 +1184,7 @@ def resolve_provider_client(
        client, default = _try_nous()
        if client is None:
            logger.warning("resolve_provider_client: nous requested "
-                           "but Nous Portal not configured (run: hermes login)")
+                           "but Nous Portal not configured (run: hermes auth)")
            return None, None
        final_model = model or default
        return (_to_async_client(client, final_model) if async_mode
@@ -1095,6 +1244,28 @@ def resolve_provider_client(
                       "but no endpoint credentials found")
        return None, None

+    # ── Named custom providers (config.yaml custom_providers list) ───
+    try:
+        from hermes_cli.runtime_provider import _get_named_custom_provider
+        custom_entry = _get_named_custom_provider(provider)
+        if custom_entry:
+            custom_base = custom_entry.get("base_url", "").strip()
+            custom_key = custom_entry.get("api_key", "").strip() or "no-key-required"
+            if custom_base:
+                final_model = model or _read_main_model() or "gpt-4o-mini"
+                client = OpenAI(api_key=custom_key, base_url=custom_base)
+                logger.debug(
+                    "resolve_provider_client: named custom provider %r (%s)",
+                    provider, final_model)
+                return (_to_async_client(client, final_model) if async_mode
+                        else (client, final_model))
+            logger.warning(
+                "resolve_provider_client: named custom provider %r has no base_url",
+                provider)
+            return None, None
+    except ImportError:
+        pass
+
    # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
    try:
        from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
@@ -1215,6 +1386,11 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
    if provider == "codex":
        return "openai-codex"
    if provider == "main":
+        # Resolve to actual main provider — named custom providers and
+        # non-aggregator providers need to pass through as their real name.
+        main_prov = _read_main_provider()
+        if main_prov and main_prov not in ("auto", "main", ""):
+            return main_prov
        return "custom"
    return provider

@@ -1785,12 +1961,15 @@ def call_llm(
                    f"was found. Set the {_explicit.upper()}_API_KEY environment "
                    f"variable, or switch to a different provider with `hermes model`."
                )
-            # For auto/custom, fall back to OpenRouter
+            # For auto/custom with no credentials, try the full auto chain
+            # rather than hardcoding OpenRouter (which may be depleted).
+            # Pass model=None so each provider uses its own default —
+            # resolved_model may be an OpenRouter-format slug that doesn't
+            # work on other providers.
            if not resolved_base_url:
-                logger.info("Auxiliary %s: provider %s unavailable, falling back to openrouter",
+                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client(
-                    "openrouter", resolved_model or _OPENROUTER_MODEL)
+                client, final_model = _get_cached_client("auto")
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -1811,7 +1990,7 @@ def call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

-    # Handle max_tokens vs max_completion_tokens retry
+    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
    try:
        return client.chat.completions.create(**kwargs)
    except Exception as first_err:
@@ -1819,7 +1998,30 @@ def call_llm(
        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
-            return client.chat.completions.create(**kwargs)
+            try:
+                return client.chat.completions.create(**kwargs)
+            except Exception as retry_err:
+                # If the max_tokens retry also hits a payment error,
+                # fall through to the payment fallback below.
+                if not _is_payment_error(retry_err):
+                    raise
+                first_err = retry_err
+
+        # ── Payment / credit exhaustion fallback ──────────────────────
+        # When the resolved provider returns 402 or a credit-related error,
+        # try alternative providers instead of giving up.  This handles the
+        # common case where a user runs out of OpenRouter credits but has
+        # Codex OAuth or another provider available.
+        if _is_payment_error(first_err):
+            fb_client, fb_model, fb_label = _try_payment_fallback(
+                resolved_provider, task)
+            if fb_client is not None:
+                fb_kwargs = _build_call_kwargs(
+                    fb_label, fb_model, messages,
+                    temperature=temperature, max_tokens=max_tokens,
+                    tools=tools, timeout=effective_timeout,
+                    extra_body=extra_body)
+                return fb_client.chat.completions.create(**fb_kwargs)
        raise


@@ -13,9 +13,10 @@ from __future__ import annotations

 import json
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List

 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error

 logger = logging.getLogger(__name__)

@@ -92,7 +93,7 @@ class BuiltinMemoryProvider(MemoryProvider):

    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
        """Not used — the memory tool is intercepted in run_agent.py."""
-        return json.dumps({"error": "Built-in memory tool is handled by the agent loop"})
+        return tool_error("Built-in memory tool is handled by the agent loop")

    def shutdown(self) -> None:
        """No cleanup needed — files are saved on every write."""
@@ -14,6 +14,7 @@ Improvements over v1:
 """

 import logging
+import time
 from typing import Any, Dict, List, Optional

 from agent.auxiliary_client import call_llm
@@ -46,6 +47,7 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"

 # Chars per token rough estimate
 _CHARS_PER_TOKEN = 4
+_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


 class ContextCompressor:
@@ -118,6 +120,7 @@ class ContextCompressor:

        # Stores the previous compaction summary for iterative updates
        self._previous_summary: Optional[str] = None
+        self._summary_failure_cooldown_until: float = 0.0

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -258,6 +261,14 @@ class ContextCompressor:
        the middle turns without a summary rather than inject a useless
        placeholder.
        """
+        now = time.monotonic()
+        if now < self._summary_failure_cooldown_until:
+            logger.debug(
+                "Skipping context summary during cooldown (%.0fs remaining)",
+                self._summary_failure_cooldown_until - now,
+            )
+            return None
+
        summary_budget = self._compute_summary_budget(turns_to_summarize)
        content_to_summarize = self._serialize_for_summary(turns_to_summarize)

@@ -345,7 +356,6 @@ Write only the summary body. Do not include any preamble or prefix."""
            call_kwargs = {
                "task": "compression",
                "messages": [{"role": "user", "content": prompt}],
-                "temperature": 0.3,
                "max_tokens": summary_budget * 2,
                # timeout resolved from auxiliary.compression.timeout config by call_llm
            }
@@ -359,13 +369,23 @@ Write only the summary body. Do not include any preamble or prefix."""
            summary = content.strip()
            # Store for iterative updates on next compaction
            self._previous_summary = summary
+            self._summary_failure_cooldown_until = 0.0
            return self._with_summary_prefix(summary)
        except RuntimeError:
+            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
            logging.warning("Context compression: no provider available for "
-                            "summary. Middle turns will be dropped without summary.")
+                            "summary. Middle turns will be dropped without summary "
+                            "for %d seconds.",
+                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
            return None
        except Exception as e:
-            logging.warning("Failed to generate context summary: %s", e)
+            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            logging.warning(
+                "Failed to generate context summary: %s. "
+                "Further summary attempts paused for %d seconds.",
+                e,
+                _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+            )
            return None

    @staticmethod
@@ -648,7 +668,7 @@ Write only the summary body. Do not include any preamble or prefix."""
                compressed.append({"role": summary_role, "content": summary})
        else:
            if not self.quiet_mode:
-                logger.warning("No summary model available — middle turns dropped without summary")
+                logger.debug("No summary model available — middle turns dropped without summary")

        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
@@ -343,10 +343,9 @@ def _resolve_path(cwd: Path, target: str, *, allowed_root: Path | None = None) -


 def _ensure_reference_path_allowed(path: Path) -> None:
+    from hermes_constants import get_hermes_home
    home = Path(os.path.expanduser("~")).resolve()
-    hermes_home = Path(
-        os.getenv("HERMES_HOME", str(home / ".hermes"))
-    ).expanduser().resolve()
+    hermes_home = get_hermes_home().resolve()

    blocked_exact = {home / rel for rel in _SENSITIVE_HOME_FILES}
    blocked_exact.add(hermes_home / ".env")
@@ -10,22 +10,21 @@ import uuid
 import os
 import re
 from dataclasses import dataclass, fields, replace
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
-    ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
    DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
    PROVIDER_REGISTRY,
-    _agent_key_is_usable,
    _codex_access_token_is_expiring,
    _decode_jwt_claims,
-    _is_expiring,
+    _import_codex_cli_tokens,
    _load_auth_store,
    _load_provider_state,
+    _resolve_zai_base_url,
    read_credential_pool,
    write_credential_pool,
 )
@@ -347,6 +346,9 @@ def get_pool_strategy(provider: str) -> str:
    return STRATEGY_FILL_FIRST


+DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
+
+
 class CredentialPool:
    def __init__(self, provider: str, entries: List[PooledCredential]):
        self.provider = provider
@@ -354,6 +356,8 @@ class CredentialPool:
        self._current_id: Optional[str] = None
        self._strategy = get_pool_strategy(provider)
        self._lock = threading.Lock()
+        self._active_leases: Dict[str, int] = {}
+        self._max_concurrent = DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL

    def has_credentials(self) -> bool:
        return bool(self._entries)
@@ -440,6 +444,39 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

+    def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential:
+        """Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ.
+
+        OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
+        When the Codex CLI (or another Hermes profile) refreshes its token,
+        the pool entry's refresh_token becomes stale.  This method detects that
+        by comparing against ~/.codex/auth.json and syncing the fresh pair.
+        """
+        if self.provider != "openai-codex":
+            return entry
+        try:
+            cli_tokens = _import_codex_cli_tokens()
+            if not cli_tokens:
+                return entry
+            cli_refresh = cli_tokens.get("refresh_token", "")
+            cli_access = cli_tokens.get("access_token", "")
+            if cli_refresh and cli_refresh != entry.refresh_token:
+                logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id)
+                updated = replace(
+                    entry,
+                    access_token=cli_access,
+                    refresh_token=cli_refresh,
+                    last_status=None,
+                    last_status_at=None,
+                    last_error_code=None,
+                )
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
+        return entry
+
    def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]:
        if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token:
            if force:
@@ -629,6 +666,16 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
+            # For openai-codex entries, sync from ~/.codex/auth.json before
+            # any status/refresh checks.  This picks up tokens refreshed by
+            # the Codex CLI or another Hermes profile.
+            if (self.provider == "openai-codex"
+                    and entry.last_status == STATUS_EXHAUSTED
+                    and entry.refresh_token):
+                synced = self._sync_codex_entry_from_cli(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -716,6 +763,51 @@ class CredentialPool:
                logger.info("credential pool: rotated to %s", _next_label)
            return next_entry

+    def acquire_lease(self, credential_id: Optional[str] = None) -> Optional[str]:
+        """Acquire a soft lease on a credential.
+
+        If a specific credential_id is provided, lease that entry directly.
+        Otherwise prefer the least-leased available credential, using priority as
+        a stable tie-breaker. When every credential is already at the soft cap,
+        still return the least-leased one instead of blocking.
+        """
+        with self._lock:
+            if credential_id:
+                self._active_leases[credential_id] = self._active_leases.get(credential_id, 0) + 1
+                self._current_id = credential_id
+                return credential_id
+
+            available = self._available_entries(clear_expired=True, refresh=True)
+            if not available:
+                return None
+
+            below_cap = [
+                entry for entry in available
+                if self._active_leases.get(entry.id, 0) < self._max_concurrent
+            ]
+            candidates = below_cap if below_cap else available
+            chosen = min(
+                candidates,
+                key=lambda entry: (self._active_leases.get(entry.id, 0), entry.priority),
+            )
+            self._active_leases[chosen.id] = self._active_leases.get(chosen.id, 0) + 1
+            self._current_id = chosen.id
+            return chosen.id
+
+    def release_lease(self, credential_id: str) -> None:
+        """Release a previously acquired credential lease."""
+        with self._lock:
+            count = self._active_leases.get(credential_id, 0)
+            if count <= 1:
+                self._active_leases.pop(credential_id, None)
+            else:
+                self._active_leases[credential_id] = count - 1
+
+    def active_lease_count(self, credential_id: str) -> int:
+        """Return the number of active leases for a credential."""
+        with self._lock:
+            return self._active_leases.get(credential_id, 0)
+
    def try_refresh_current(self) -> Optional[PooledCredential]:
        with self._lock:
            return self._try_refresh_current_unlocked()
@@ -992,6 +1084,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
+        if provider == "zai":
+            base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
        changed |= _upsert_entry(
            entries,
            provider,
@@ -890,8 +890,6 @@ def get_cute_tool_message(
        return _wrap(f"┊ ◀️  back      {dur}")
    if tool_name == "browser_press":
        return _wrap(f"┊ ⌨️  press     {args.get('key', '?')}  {dur}")
-    if tool_name == "browser_close":
-        return _wrap(f"┊ 🚪 close     browser  {dur}")
    if tool_name == "browser_get_images":
        return _wrap(f"┊ 🖼️  images    extracting  {dur}")
    if tool_name == "browser_vision":
@@ -988,24 +986,6 @@ def _osc8_link(url: str, text: str) -> str:
    return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"


-def honcho_session_line(workspace: str, session_name: str) -> str:
-    """One-line session indicator: `Honcho session: <clickable name>`."""
-    url = honcho_session_url(workspace, session_name)
-    linked_name = _osc8_link(url, f"{_SKY_BLUE}{session_name}{_ANSI_RESET}")
-    return f"{_DIM}Honcho session:{_ANSI_RESET} {linked_name}"
-
-
-def write_tty(text: str) -> None:
-    """Write directly to /dev/tty, bypassing stdout capture."""
-    try:
-        fd = os.open("/dev/tty", os.O_WRONLY)
-        os.write(fd, text.encode("utf-8"))
-        os.close(fd)
-    except OSError:
-        sys.stdout.write(text)
-        sys.stdout.flush()
-
-
 # =========================================================================
 # Context pressure display (CLI user-facing warnings)
 # =========================================================================
@@ -34,6 +34,7 @@ import re
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error

 logger = logging.getLogger(__name__)

@@ -249,7 +250,7 @@ class MemoryManager:
        """
        provider = self._tool_to_provider.get(tool_name)
        if provider is None:
-            return json.dumps({"error": f"No memory provider handles tool '{tool_name}'"})
+            return tool_error(f"No memory provider handles tool '{tool_name}'")
        try:
            return provider.handle_tool_call(tool_name, args, **kwargs)
        except Exception as e:
@@ -257,7 +258,7 @@ class MemoryManager:
                "Memory provider '%s' handle_tool_call(%s) failed: %s",
                provider.name, tool_name, e,
            )
-            return json.dumps({"error": f"Memory tool '{tool_name}' failed: {e}"})
+            return tool_error(f"Memory tool '{tool_name}' failed: {e}")

    # -- Lifecycle hooks -----------------------------------------------------

@@ -34,7 +34,7 @@ from __future__ import annotations

 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List

 logger = logging.getLogger(__name__)

@@ -24,10 +24,11 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "custom", "local",
    # Common aliases
+    "google", "google-gemini", "google-ai-studio",
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "claude", "deep-seek",
    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
@@ -101,6 +102,11 @@ DEFAULT_CONTEXT_LENGTHS = {
    "gpt-4": 128000,
    # Google
    "gemini": 1048576,
+    # Gemma (open models served via AI Studio)
+    "gemma-4-31b": 256000,
+    "gemma-4-26b": 256000,
+    "gemma-3": 131072,
+    "gemma": 8192,  # fallback for older gemma models
    # DeepSeek
    "deepseek": 128000,
    # Meta
@@ -175,7 +181,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "dashscope.aliyuncs.com": "alibaba",
    "dashscope-intl.aliyuncs.com": "alibaba",
    "openrouter.ai": "openrouter",
-    "generativelanguage.googleapis.com": "google",
+    "generativelanguage.googleapis.com": "gemini",
    "inference-api.nousresearch.com": "nous",
    "api.deepseek.com": "deepseek",
    "api.githubcopilot.com": "copilot",
@@ -504,8 +510,8 @@ def fetch_endpoint_model_metadata(

 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
-    hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
-    return hermes_home / "context_length_cache.yaml"
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "context_length_cache.yaml"


 def _load_context_cache() -> Dict[str, int]:
@@ -23,9 +23,9 @@ import json
 import logging
 import os
 import time
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple

 from utils import atomic_json_write

@@ -160,6 +160,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "kilocode": "kilo",
    "fireworks": "fireworks-ai",
    "huggingface": "huggingface",
+    "gemini": "google",
    "google": "google",
    "xai": "xai",
    "nvidia": "nvidia",
@@ -184,9 +185,8 @@ def _get_reverse_mapping() -> Dict[str, str]:

 def _get_cache_path() -> Path:
    """Return path to disk cache file."""
-    env_val = os.environ.get("HERMES_HOME", "")
-    hermes_home = Path(env_val) if env_val else Path.home() / ".hermes"
-    return hermes_home / "models_dev_cache.json"
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "models_dev_cache.json"


 def _load_disk_cache() -> Dict[str, Any]:
@@ -230,7 +230,7 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
        response = requests.get(MODELS_DEV_URL, timeout=15)
        response.raise_for_status()
        data = response.json()
-        if isinstance(data, dict) and len(data) > 0:
+        if isinstance(data, dict) and data:
            _models_dev_cache = data
            _models_dev_cache_time = time.time()
            _save_disk_cache(data)
@@ -422,6 +422,39 @@ def list_provider_models(provider: str) -> List[str]:
    return list(models.keys())


+# Patterns that indicate non-agentic or noise models (TTS, embedding,
+# dated preview snapshots, live/streaming-only, image-only).
+import re
+_NOISE_PATTERNS: re.Pattern = re.compile(
+    r"-tts\b|embedding|live-|-(preview|exp)-\d{2,4}[-_]|"
+    r"-image\b|-image-preview\b|-customtools\b",
+    re.IGNORECASE,
+)
+
+
+def list_agentic_models(provider: str) -> List[str]:
+    """Return model IDs suitable for agentic use from models.dev.
+
+    Filters for tool_call=True and excludes noise (TTS, embedding,
+    dated preview snapshots, live/streaming, image-only models).
+    Returns an empty list on any failure.
+    """
+    models = _get_provider_models(provider)
+    if models is None:
+        return []
+
+    result = []
+    for mid, entry in models.items():
+        if not isinstance(entry, dict):
+            continue
+        if not entry.get("tool_call", False):
+            continue
+        if _NOISE_PATTERNS.search(mid):
+            continue
+        result.append(mid)
+    return result
+
+
 def search_models_dev(
    query: str, provider: str = None, limit: int = 5
 ) -> List[Dict[str, Any]]:
@@ -187,7 +187,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (

 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
-TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma")
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")

 # OpenAI GPT/Codex-specific execution guidance.  Addresses known failure modes
 # where GPT models abandon work on partial results, skip prerequisite lookups,
@@ -744,7 +744,6 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
        "browser_type",
        "browser_scroll",
        "browser_console",
-        "browser_close",
        "browser_press",
        "browser_get_images",
        "browser_vision",
@@ -774,13 +773,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -

    lines = [
        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
        "Current capability status:",
    ]
    lines.extend(_status_line(feature) for feature in features.items())
    lines.extend(
        [
-            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
+            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
            "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
            "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
            "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
@@ -16,6 +16,9 @@ logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
 _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
+# Patterns for sanitizing skill names into clean hyphen-separated slugs.
+_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
+_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")


 def build_plan_path(
@@ -76,6 +79,45 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
    return loaded_skill, skill_dir, skill_name


+def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None:
+    """Resolve and inject skill-declared config values into the message parts.
+
+    If the loaded skill's frontmatter declares ``metadata.hermes.config``
+    entries, their current values (from config.yaml or defaults) are appended
+    as a ``[Skill config: ...]`` block so the agent knows the configured values
+    without needing to read config.yaml itself.
+    """
+    try:
+        from agent.skill_utils import (
+            extract_skill_config_vars,
+            parse_frontmatter,
+            resolve_skill_config_values,
+        )
+
+        # The loaded_skill dict contains the raw content which includes frontmatter
+        raw_content = str(loaded_skill.get("raw_content") or loaded_skill.get("content") or "")
+        if not raw_content:
+            return
+
+        frontmatter, _ = parse_frontmatter(raw_content)
+        config_vars = extract_skill_config_vars(frontmatter)
+        if not config_vars:
+            return
+
+        resolved = resolve_skill_config_values(config_vars)
+        if not resolved:
+            return
+
+        lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
+        for key, value in resolved.items():
+            display_val = str(value) if value else "(not set)"
+            lines.append(f"  {key} = {display_val}")
+        lines.append("]")
+        parts.extend(lines)
+    except Exception:
+        pass  # Non-critical — skill still loads without config injection
+
+
 def _build_skill_message(
    loaded_skill: dict[str, Any],
    skill_dir: Path | None,
@@ -90,6 +132,9 @@ def _build_skill_message(

    parts = [activation_note, "", content.strip()]

+    # ── Inject resolved skill config values ──
+    _inject_skill_config(loaded_skill, parts)
+
    if loaded_skill.get("setup_skipped"):
        parts.extend(
            [
@@ -196,7 +241,14 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
                                description = line[:80]
                                break
                    seen_names.add(name)
+                    # Normalize to hyphen-separated slug, stripping
+                    # non-alnum chars (e.g. +, /) to avoid invalid
+                    # Telegram command names downstream.
                    cmd_name = name.lower().replace(' ', '-').replace('_', '-')
+                    cmd_name = _SKILL_INVALID_CHARS.sub('', cmd_name)
+                    cmd_name = _SKILL_MULTI_HYPHEN.sub('-', cmd_name).strip('-')
+                    if not cmd_name:
+                        continue
                    _skill_commands[f"/{cmd_name}"] = {
                        "name": name,
                        "description": description or f"Invoke the {name} skill",
@@ -10,7 +10,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Set, Tuple

 from hermes_constants import get_hermes_home

@@ -254,6 +254,163 @@ def extract_skill_conditions(frontmatter: Dict[str, Any]) -> Dict[str, List]:
    }


+# ── Skill config extraction ───────────────────────────────────────────────
+
+
+def extract_skill_config_vars(frontmatter: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Extract config variable declarations from parsed frontmatter.
+
+    Skills declare config.yaml settings they need via::
+
+        metadata:
+          hermes:
+            config:
+              - key: wiki.path
+                description: Path to the LLM Wiki knowledge base directory
+                default: "~/wiki"
+                prompt: Wiki directory path
+
+    Returns a list of dicts with keys: ``key``, ``description``, ``default``,
+    ``prompt``.  Invalid or incomplete entries are silently skipped.
+    """
+    metadata = frontmatter.get("metadata")
+    if not isinstance(metadata, dict):
+        return []
+    hermes = metadata.get("hermes")
+    if not isinstance(hermes, dict):
+        return []
+    raw = hermes.get("config")
+    if not raw:
+        return []
+    if isinstance(raw, dict):
+        raw = [raw]
+    if not isinstance(raw, list):
+        return []
+
+    result: List[Dict[str, Any]] = []
+    seen: set = set()
+    for item in raw:
+        if not isinstance(item, dict):
+            continue
+        key = str(item.get("key", "")).strip()
+        if not key or key in seen:
+            continue
+        # Must have at least key and description
+        desc = str(item.get("description", "")).strip()
+        if not desc:
+            continue
+        entry: Dict[str, Any] = {
+            "key": key,
+            "description": desc,
+        }
+        default = item.get("default")
+        if default is not None:
+            entry["default"] = default
+        prompt_text = item.get("prompt")
+        if isinstance(prompt_text, str) and prompt_text.strip():
+            entry["prompt"] = prompt_text.strip()
+        else:
+            entry["prompt"] = desc
+        seen.add(key)
+        result.append(entry)
+    return result
+
+
+def discover_all_skill_config_vars() -> List[Dict[str, Any]]:
+    """Scan all enabled skills and collect their config variable declarations.
+
+    Walks every skills directory, parses each SKILL.md frontmatter, and returns
+    a deduplicated list of config var dicts.  Each dict also includes a
+    ``skill`` key with the skill name for attribution.
+
+    Disabled and platform-incompatible skills are excluded.
+    """
+    all_vars: List[Dict[str, Any]] = []
+    seen_keys: set = set()
+
+    disabled = get_disabled_skill_names()
+    for skills_dir in get_all_skills_dirs():
+        if not skills_dir.is_dir():
+            continue
+        for skill_file in iter_skill_index_files(skills_dir, "SKILL.md"):
+            try:
+                raw = skill_file.read_text(encoding="utf-8")
+                frontmatter, _ = parse_frontmatter(raw)
+            except Exception:
+                continue
+
+            skill_name = frontmatter.get("name") or skill_file.parent.name
+            if str(skill_name) in disabled:
+                continue
+            if not skill_matches_platform(frontmatter):
+                continue
+
+            config_vars = extract_skill_config_vars(frontmatter)
+            for var in config_vars:
+                if var["key"] not in seen_keys:
+                    var["skill"] = str(skill_name)
+                    all_vars.append(var)
+                    seen_keys.add(var["key"])
+
+    return all_vars
+
+
+# Storage prefix: all skill config vars are stored under skills.config.*
+# in config.yaml.  Skill authors declare logical keys (e.g. "wiki.path");
+# the system adds this prefix for storage and strips it for display.
+SKILL_CONFIG_PREFIX = "skills.config"
+
+
+def _resolve_dotpath(config: Dict[str, Any], dotted_key: str):
+    """Walk a nested dict following a dotted key.  Returns None if any part is missing."""
+    parts = dotted_key.split(".")
+    current = config
+    for part in parts:
+        if isinstance(current, dict) and part in current:
+            current = current[part]
+        else:
+            return None
+    return current
+
+
+def resolve_skill_config_values(
+    config_vars: List[Dict[str, Any]],
+) -> Dict[str, Any]:
+    """Resolve current values for skill config vars from config.yaml.
+
+    Skill config is stored under ``skills.config.<key>`` in config.yaml.
+    Returns a dict mapping **logical** keys (as declared by skills) to their
+    current values (or the declared default if the key isn't set).
+    Path values are expanded via ``os.path.expanduser``.
+    """
+    config_path = get_hermes_home() / "config.yaml"
+    config: Dict[str, Any] = {}
+    if config_path.exists():
+        try:
+            parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+            if isinstance(parsed, dict):
+                config = parsed
+        except Exception:
+            pass
+
+    resolved: Dict[str, Any] = {}
+    for var in config_vars:
+        logical_key = var["key"]
+        storage_key = f"{SKILL_CONFIG_PREFIX}.{logical_key}"
+        value = _resolve_dotpath(config, storage_key)
+
+        if value is None or (isinstance(value, str) and not value.strip()):
+            value = var.get("default", "")
+
+        # Expand ~ in path-like values
+        if isinstance(value, str) and ("~" in value or "${" in value):
+            value = os.path.expanduser(os.path.expandvars(value))
+
+        resolved[logical_key] = value
+
+    return resolved
+
+
 # ── Description extraction ────────────────────────────────────────────────


@@ -15,7 +15,6 @@ Inspired by Block/goose's SubdirectoryHintTracker.

 import logging
 import os
-import re
 import shlex
 from pathlib import Path
 from typing import Dict, Any, Optional, Set
@@ -31,6 +31,8 @@ from multiprocessing import Pool, Lock
 import traceback
 from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeRemainingColumn, MofNCompleteColumn
 from rich.console import Console
+
+logger = logging.getLogger(__name__)
 import fire

 from run_agent import AIAgent
@@ -1016,7 +1018,7 @@ class BatchRunner:
                            tool_stats = data.get('tool_stats', {})
                            
                            # Check for invalid tool names (model hallucinations)
-                            invalid_tools = [k for k in tool_stats.keys() if k not in VALID_TOOLS]
+                            invalid_tools = [k for k in tool_stats if k not in VALID_TOOLS]
                            
                            if invalid_tools:
                                filtered_entries += 1
@@ -18,7 +18,8 @@ model:
  #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
  #   "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
  #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
-  #   "zai"          - z.ai / ZhipuAI GLM (requires: GLM_API_KEY)
+  #   "gemini"      - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
+  #   "zai"         - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
  #   "kimi-coding"  - Kimi / Moonshot AI (requires: KIMI_API_KEY)
  #   "minimax"      - MiniMax global (requires: MINIMAX_API_KEY)
  #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
@@ -315,7 +316,8 @@ compression:
 #   "auto"       - Best available: OpenRouter → Nous Portal → main endpoint (default)
 #   "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
 #   "nous"       - Force Nous Portal (requires: hermes login)
-#   "codex"      - Force Codex OAuth (requires: hermes model → Codex).
+#   "gemini"      - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
+#   "codex"       - Force Codex OAuth (requires: hermes model → Codex).
 #                  Uses gpt-5.3-codex which supports vision.
 #   "main"       - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
 #                  Works with OpenAI API, local models, or any OpenAI-compatible
@@ -537,7 +539,7 @@ platform_toolsets:
 #   terminal     - terminal, process
 #   file         - read_file, write_file, patch, search
 #   browser      - browser_navigate, browser_snapshot, browser_click, browser_type,
-#                  browser_scroll, browser_back, browser_press, browser_close,
+#                  browser_scroll, browser_back, browser_press,
 #                  browser_get_images, browser_vision  (requires BROWSERBASE_API_KEY)
 #   vision       - vision_analyze  (requires OPENROUTER_API_KEY)
 #   image_gen    - image_generate  (requires FAL_KEY)
@@ -63,14 +63,14 @@ from agent.usage_pricing import (
    format_duration_compact,
    format_token_count_compact,
 )
-from hermes_cli.banner import _format_context_length
+from hermes_cli.banner import _format_context_length, format_banner_version_label

 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")


 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
-from hermes_constants import get_hermes_home, display_hermes_home, OPENROUTER_BASE_URL
+from hermes_constants import get_hermes_home, display_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv

 _hermes_home = get_hermes_home()
@@ -120,6 +120,63 @@ def _parse_reasoning_config(effort: str) -> dict | None:
    return result


+def _get_chrome_debug_candidates(system: str) -> list[str]:
+    """Return likely browser executables for local CDP auto-launch."""
+    candidates: list[str] = []
+    seen: set[str] = set()
+
+    def _add_candidate(path: str | None) -> None:
+        if not path:
+            return
+        normalized = os.path.normcase(os.path.normpath(path))
+        if normalized in seen:
+            return
+        if os.path.isfile(path):
+            candidates.append(path)
+            seen.add(normalized)
+
+    def _add_from_path(*names: str) -> None:
+        for name in names:
+            _add_candidate(shutil.which(name))
+
+    if system == "Darwin":
+        for app in (
+            "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+            "/Applications/Chromium.app/Contents/MacOS/Chromium",
+            "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
+            "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
+        ):
+            _add_candidate(app)
+    elif system == "Windows":
+        _add_from_path(
+            "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
+            "chrome", "msedge", "brave", "chromium",
+        )
+
+        for base in (
+            os.environ.get("ProgramFiles"),
+            os.environ.get("ProgramFiles(x86)"),
+            os.environ.get("LOCALAPPDATA"),
+        ):
+            if not base:
+                continue
+            for parts in (
+                ("Google", "Chrome", "Application", "chrome.exe"),
+                ("Chromium", "Application", "chrome.exe"),
+                ("Chromium", "Application", "chromium.exe"),
+                ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
+                ("Microsoft", "Edge", "Application", "msedge.exe"),
+            ):
+                _add_candidate(os.path.join(base, *parts))
+    else:
+        _add_from_path(
+            "google-chrome", "google-chrome-stable", "chromium-browser",
+            "chromium", "brave-browser", "microsoft-edge",
+        )
+
+    return candidates
+
+
 def load_cli_config() -> Dict[str, Any]:
    """
    Load CLI configuration from config files.
@@ -979,21 +1036,44 @@ COMPACT_BANNER = """

 def _build_compact_banner() -> str:
    """Build a compact banner that fits the current terminal width."""
-    w = min(shutil.get_terminal_size().columns - 2, 64)
+    try:
+        from hermes_cli.skin_engine import get_active_skin
+        _skin = get_active_skin()
+    except Exception:
+        _skin = None
+
+    skin_name = getattr(_skin, "name", "default") if _skin else "default"
+    border_color = _skin.get_color("banner_border", "#FFD700") if _skin else "#FFD700"
+    title_color = _skin.get_color("banner_title", "#FFBF00") if _skin else "#FFBF00"
+    dim_color = _skin.get_color("banner_dim", "#B8860B") if _skin else "#B8860B"
+
+    if skin_name == "default":
+        line1 = "⚕ NOUS HERMES - AI Agent Framework"
+        tiny_line = "⚕ NOUS HERMES"
+    else:
+        agent_name = _skin.get_branding("agent_name", "Hermes Agent") if _skin else "Hermes Agent"
+        line1 = f"{agent_name} - AI Agent Framework"
+        tiny_line = agent_name
+
+    version_line = format_banner_version_label()
+
+    w = min(shutil.get_terminal_size().columns - 2, 88)
    if w < 30:
-        return "\n[#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- Nous Research[/]\n"
+        return f"\n[{title_color}]{tiny_line}[/] [dim {dim_color}]- Nous Research[/]\n"
+
    inner = w - 2  # inside the box border
    bar = "═" * w
-    line1 = "⚕ NOUS HERMES - AI Agent Framework"
-    line2 = "Messenger of the Digital Gods  ·  Nous Research"
+    content_width = inner - 2
+
    # Truncate and pad to fit
-    line1 = line1[:inner - 2].ljust(inner - 2)
-    line2 = line2[:inner - 2].ljust(inner - 2)
+    line1 = line1[:content_width].ljust(content_width)
+    line2 = version_line[:content_width].ljust(content_width)
+
    return (
-        f"\n[bold #FFD700]╔{bar}╗[/]\n"
-        f"[bold #FFD700]║[/] [#FFBF00]{line1}[/] [bold #FFD700]║[/]\n"
-        f"[bold #FFD700]║[/] [dim #B8860B]{line2}[/] [bold #FFD700]║[/]\n"
-        f"[bold #FFD700]╚{bar}╝[/]\n"
+        f"\n[bold {border_color}]╔{bar}╗[/]\n"
+        f"[bold {border_color}]║[/] [{title_color}]{line1}[/] [bold {border_color}]║[/]\n"
+        f"[bold {border_color}]║[/] [dim {dim_color}]{line2}[/] [bold {border_color}]║[/]\n"
+        f"[bold {border_color}]╚{bar}╝[/]\n"
    )


@@ -1863,6 +1943,12 @@ class HermesCLI:
            _cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}")
            self._reasoning_box_opened = False

+            # Flush any content that was deferred while reasoning was rendering.
+            deferred = getattr(self, "_deferred_content", "")
+            if deferred:
+                self._deferred_content = ""
+                self._emit_stream_text(deferred)
+
    def _stream_delta(self, text) -> None:
        """Line-buffered streaming callback for real-time token rendering.

@@ -1965,6 +2051,13 @@ class HermesCLI:
        if not text:
            return

+        # When show_reasoning is on and reasoning is still rendering,
+        # defer content until the reasoning box closes.  This ensures the
+        # reasoning block always appears BEFORE the response in the terminal.
+        if self.show_reasoning and getattr(self, "_reasoning_box_opened", False):
+            self._deferred_content = getattr(self, "_deferred_content", "") + text
+            return
+
        # Close the live reasoning box before opening the response box
        self._close_reasoning_box()

@@ -2031,6 +2124,7 @@ class HermesCLI:
        self._reasoning_box_opened = False
        self._reasoning_buf = ""
        self._reasoning_preview_buf = ""
+        self._deferred_content = ""

    def _slow_command_status(self, command: str) -> str:
        """Return a user-facing status message for slower slash commands."""
@@ -2092,7 +2186,7 @@ class HermesCLI:
            )
        except Exception as exc:
            message = format_runtime_provider_error(exc)
-            self.console.print(f"[bold red]{message}[/]")
+            ChatConsole().print(f"[bold red]{message}[/]")
            return False

        api_key = runtime.get("api_key")
@@ -2307,7 +2401,7 @@ class HermesCLI:
                    self._pending_title = None
            return True
        except Exception as e:
-            self.console.print(f"[bold red]Failed to initialize agent: {e}[/]")
+            ChatConsole().print(f"[bold red]Failed to initialize agent: {e}[/]")
            return False
    
    def show_banner(self):
@@ -3465,13 +3559,6 @@ class HermesCLI:
        _cprint(f"  Original session: {parent_session_id}")
        _cprint(f"  Branch session:   {new_session_id}")

-    def reset_conversation(self):
-        """Reset the conversation by starting a new session."""
-        # Shut down memory provider before resetting — actual session boundary
-        if hasattr(self, 'agent') and self.agent:
-            self.agent.shutdown_memory_provider(self.conversation_history)
-        self.new_session()
-    
    def save_conversation(self):
        """Save the current conversation to a file."""
        if not self.conversation_history:
@@ -3721,7 +3808,7 @@ class HermesCLI:

        # Persistence
        if persist_global:
-            save_config_value("model.name", result.new_model)
+            save_config_value("model.default", result.new_model)
            if result.provider_changed:
                save_config_value("model.provider", result.target_provider)
            _cprint("    Saved to config.yaml (--global)")
@@ -4175,7 +4262,6 @@ class HermesCLI:
        
        try:
            config = load_gateway_config()
-            connected = config.get_connected_platforms()
            
            print("  Messaging Platform Configuration:")
            print("  " + "-" * 55)
@@ -4467,13 +4553,13 @@ class HermesCLI:
                            if output:
                                self.console.print(_rich_text_from_ansi(output))
                            else:
-                                self.console.print("[dim]Command returned no output[/]")
+                                ChatConsole().print("[dim]Command returned no output[/]")
                        except subprocess.TimeoutExpired:
-                            self.console.print("[bold red]Quick command timed out (30s)[/]")
+                            ChatConsole().print("[bold red]Quick command timed out (30s)[/]")
                        except Exception as e:
-                            self.console.print(f"[bold red]Quick command error: {e}[/]")
+                            ChatConsole().print(f"[bold red]Quick command error: {e}[/]")
                    else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+                        ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
                elif qcmd.get("type") == "alias":
                    target = qcmd.get("target", "").strip()
                    if target:
@@ -4482,9 +4568,9 @@ class HermesCLI:
                        aliased_command = f"{target} {user_args}".strip()
                        return self.process_command(aliased_command)
                    else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
+                        ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
                else:
-                    self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
+                    ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
            # Check for plugin-registered slash commands
            elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
                from hermes_cli.plugins import get_plugin_command_handler
@@ -4509,7 +4595,7 @@ class HermesCLI:
                    if hasattr(self, '_pending_input'):
                        self._pending_input.put(msg)
                else:
-                    self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]")
+                    ChatConsole().print(f"[bold red]Failed to load skill for {base_cmd}[/]")
            else:
                # Prefix matching: if input uniquely identifies one command, execute it.
                # Matches against both built-in COMMANDS and installed skill commands so
@@ -4570,14 +4656,14 @@ class HermesCLI:
        )

        if not msg:
-            self.console.print("[bold red]Failed to load the bundled /plan skill[/]")
+            ChatConsole().print("[bold red]Failed to load the bundled /plan skill[/]")
            return

        _cprint(f"  📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
        if hasattr(self, '_pending_input'):
            self._pending_input.put(msg)
        else:
-            self.console.print("[bold red]Plan mode unavailable: input queue not initialized[/]")
+            ChatConsole().print("[bold red]Plan mode unavailable: input queue not initialized[/]")
    
    def _handle_background_command(self, cmd: str):
        """Handle /background <prompt> — run a prompt in a separate background session.
@@ -4838,27 +4924,9 @@ class HermesCLI:

        Returns True if a launch command was executed (doesn't guarantee success).
        """
-        import shutil
        import subprocess as _sp

-        candidates = []
-        if system == "Darwin":
-            # macOS: try common app bundle locations
-            for app in (
-                "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
-                "/Applications/Chromium.app/Contents/MacOS/Chromium",
-                "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
-                "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
-            ):
-                if os.path.isfile(app):
-                    candidates.append(app)
-        else:
-            # Linux: try common binary names
-            for name in ("google-chrome", "google-chrome-stable", "chromium-browser",
-                         "chromium", "brave-browser", "microsoft-edge"):
-                path = shutil.which(name)
-                if path:
-                    candidates.append(path)
+        candidates = _get_chrome_debug_candidates(system)

        if not candidates:
            return False
@@ -4984,13 +5052,13 @@ class HermesCLI:
                    pass
                print()
                print("🌐 Browser disconnected from live Chrome")
-                print("   Browser tools reverted to default mode (local headless or Browserbase)")
+                print("   Browser tools reverted to default mode (local headless or cloud provider)")
                print()

                if hasattr(self, '_pending_input'):
                    self._pending_input.put(
                        "[System note: The user has disconnected the browser tools from their live Chrome. "
-                        "Browser tools are back to default mode (headless local browser or Browserbase cloud).]"
+                        "Browser tools are back to default mode (headless local browser or cloud provider).]"
                    )
            else:
                print()
@@ -5017,10 +5085,17 @@ class HermesCLI:
                    print("   Status: ✓ reachable")
                except (OSError, Exception):
                    print("   Status: ⚠ not reachable (Chrome may not be running)")
-            elif os.environ.get("BROWSERBASE_API_KEY"):
-                print("🌐 Browser: Browserbase (cloud)")
            else:
-                print("🌐 Browser: local headless Chromium (agent-browser)")
+                try:
+                    from tools.browser_tool import _get_cloud_provider
+                    provider = _get_cloud_provider()
+                except Exception:
+                    provider = None
+
+                if provider is not None:
+                    print(f"🌐 Browser: {provider.provider_name()} (cloud)")
+                else:
+                    print("🌐 Browser: local headless Chromium (agent-browser)")
            print()
            print("   /browser connect      — connect to your live Chrome")
            print("   /browser disconnect   — revert to default")
@@ -5948,7 +6023,7 @@ class HermesCLI:

        timeout = CLI_CONFIG.get("clarify", {}).get("timeout", 120)
        response_queue = queue.Queue()
-        is_open_ended = not choices or len(choices) == 0
+        is_open_ended = not choices

        self._clarify_state = {
            "question": question,
@@ -6231,14 +6306,6 @@ class HermesCLI:
            except Exception:
                pass

-    def _clear_current_input(self) -> None:
-        if getattr(self, "_app", None):
-            try:
-                self._app.current_buffer.text = ""
-            except Exception:
-                pass
-
-
    def chat(self, message, images: list = None) -> Optional[str]:
        """
        Send a message to the agent and get a response.
@@ -7469,18 +7536,26 @@ class HermesCLI:
        # wrapping of long lines so the input area always fits its content.
        def _input_height():
            try:
+                from prompt_toolkit.application import get_app
+                from prompt_toolkit.utils import get_cwidth
+
                doc = input_area.buffer.document
-                prompt_width = max(2, len(self._get_tui_prompt_text()))
-                available_width = shutil.get_terminal_size().columns - prompt_width
+                prompt_width = max(2, get_cwidth(self._get_tui_prompt_text()))
+                try:
+                    available_width = get_app().output.get_size().columns - prompt_width
+                except Exception:
+                    available_width = shutil.get_terminal_size((80, 24)).columns - prompt_width
                if available_width < 10:
                    available_width = 40
                visual_lines = 0
                for line in doc.lines:
-                    # Each logical line takes at least 1 visual row; long lines wrap
-                    if len(line) == 0:
+                    # Each logical line takes at least 1 visual row; long lines wrap.
+                    # Use prompt_toolkit's cell width so CJK wide characters count as 2.
+                    line_width = get_cwidth(line)
+                    if line_width <= 0:
                        visual_lines += 1
                    else:
-                        visual_lines += max(1, -(-len(line) // available_width))  # ceil division
+                        visual_lines += max(1, -(-line_width // available_width))  # ceil division
                return min(max(visual_lines, 1), 8)
            except Exception:
                return 1
@@ -7771,7 +7846,6 @@ class HermesCLI:
            title = '🔐 Sudo Password Required'
            body = 'Enter password below (hidden), or press Enter to skip'
            box_width = _panel_box_width(title, [body])
-            inner = max(0, box_width - 2)
            lines = []
            lines.append(('class:sudo-border', '╭─ '))
            lines.append(('class:sudo-title', title))
@@ -8073,6 +8147,25 @@ class HermesCLI:
                        # Periodic config watcher — auto-reload MCP on mcp_servers change
                        if not self._agent_running:
                            self._check_config_mcp_changes()
+                            # Check for background process completion notifications
+                            # while the agent is idle (user hasn't typed anything yet).
+                            try:
+                                from tools.process_registry import process_registry
+                                if not process_registry.completion_queue.empty():
+                                    completion = process_registry.completion_queue.get_nowait()
+                                    _exit = completion.get("exit_code", "?")
+                                    _cmd = completion.get("command", "unknown")
+                                    _sid = completion.get("session_id", "unknown")
+                                    _out = completion.get("output", "")
+                                    _synth = (
+                                        f"[SYSTEM: Background process {_sid} completed "
+                                        f"(exit code {_exit}).\n"
+                                        f"Command: {_cmd}\n"
+                                        f"Output:\n{_out}]"
+                                    )
+                                    self._pending_input.put(_synth)
+                            except Exception:
+                                pass
                        continue
                    
                    if not user_input:
@@ -8186,7 +8279,29 @@ class HermesCLI:
                                except Exception as e:
                                    _cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}")
                            threading.Thread(target=_restart_recording, daemon=True).start()
-                    
+
+                        # Drain process completion notifications — any background
+                        # process that finished with notify_on_complete while the
+                        # agent was running (or before) gets auto-injected as a
+                        # new user message so the agent can react to it.
+                        try:
+                            from tools.process_registry import process_registry
+                            while not process_registry.completion_queue.empty():
+                                completion = process_registry.completion_queue.get_nowait()
+                                _exit = completion.get("exit_code", "?")
+                                _cmd = completion.get("command", "unknown")
+                                _sid = completion.get("session_id", "unknown")
+                                _out = completion.get("output", "")
+                                _synth = (
+                                    f"[SYSTEM: Background process {_sid} completed "
+                                    f"(exit code {_exit}).\n"
+                                    f"Command: {_cmd}\n"
+                                    f"Output:\n{_out}]"
+                                )
+                                self._pending_input.put(_synth)
+                        except Exception:
+                            pass  # Non-fatal — don't break the main loop
+
                except Exception as e:
                    print(f"Error: {e}")
        
@@ -25,7 +25,6 @@ except ImportError:
        import msvcrt
    except ImportError:
        msvcrt = None
-import time
 from pathlib import Path
 from typing import Optional

@@ -159,6 +158,44 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
    }


+# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
+_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
+_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
+_IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})
+
+
+def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
+    """Send extracted MEDIA files as native platform attachments via a live adapter.
+
+    Routes each file to the appropriate adapter method (send_voice, send_image_file,
+    send_video, send_document) based on file extension — mirroring the routing logic
+    in ``BasePlatformAdapter._process_message_background``.
+    """
+    from pathlib import Path
+
+    for media_path, _is_voice in media_files:
+        try:
+            ext = Path(media_path).suffix.lower()
+            if ext in _AUDIO_EXTS:
+                coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
+            elif ext in _VIDEO_EXTS:
+                coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
+            elif ext in _IMAGE_EXTS:
+                coro = adapter.send_image_file(chat_id=chat_id, image_path=media_path, metadata=metadata)
+            else:
+                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
+
+            future = asyncio.run_coroutine_threadsafe(coro, loop)
+            result = future.result(timeout=30)
+            if result and not getattr(result, "success", True):
+                logger.warning(
+                    "Job '%s': media send failed for %s: %s",
+                    job.get("id", "?"), media_path, getattr(result, "error", "unknown"),
+                )
+        except Exception as e:
+            logger.warning("Job '%s': failed to send media %s: %s", job.get("id", "?"), media_path, e)
+
+
 def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
    """
    Deliver job output to the configured target (origin chat, specific platform, etc.).
@@ -237,24 +274,38 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
    else:
        delivery_content = content

+    # Extract MEDIA: tags so attachments are forwarded as files, not raw text
+    from gateway.platforms.base import BasePlatformAdapter
+    media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)
+
    # Prefer the live adapter when the gateway is running — this supports E2EE
    # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
    runtime_adapter = (adapters or {}).get(platform)
    if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
        send_metadata = {"thread_id": thread_id} if thread_id else None
        try:
-            future = asyncio.run_coroutine_threadsafe(
-                runtime_adapter.send(chat_id, delivery_content, metadata=send_metadata),
-                loop,
-            )
-            send_result = future.result(timeout=60)
-            if send_result and not getattr(send_result, "success", True):
-                err = getattr(send_result, "error", "unknown")
-                logger.warning(
-                    "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
-                    job["id"], platform_name, chat_id, err,
+            # Send cleaned text (MEDIA tags stripped) — not the raw content
+            text_to_send = cleaned_delivery_content.strip()
+            adapter_ok = True
+            if text_to_send:
+                future = asyncio.run_coroutine_threadsafe(
+                    runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
+                    loop,
                )
-            else:
+                send_result = future.result(timeout=60)
+                if send_result and not getattr(send_result, "success", True):
+                    err = getattr(send_result, "error", "unknown")
+                    logger.warning(
+                        "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
+                        job["id"], platform_name, chat_id, err,
+                    )
+                    adapter_ok = False  # fall through to standalone path
+
+            # Send extracted media files as native attachments via the live adapter
+            if adapter_ok and media_files:
+                _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
+
+            if adapter_ok:
                logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
                return
        except Exception as e:
@@ -264,7 +315,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
            )

    # Standalone path: run the async send in a fresh event loop (safe from any thread)
-    coro = _send_to_platform(platform, pconfig, chat_id, delivery_content, thread_id=thread_id)
+    coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
    try:
        result = asyncio.run(coro)
    except RuntimeError:
@@ -275,7 +326,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
        coro.close()
        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, delivery_content, thread_id=thread_id))
+            future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
            result = future.result(timeout=30)
    except Exception as e:
        logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
@@ -293,8 +344,15 @@ _SCRIPT_TIMEOUT = 120  # seconds
 def _run_job_script(script_path: str) -> tuple[bool, str]:
    """Execute a cron job's data-collection script and capture its output.

+    Scripts must reside within HERMES_HOME/scripts/.  Both relative and
+    absolute paths are resolved and validated against this directory to
+    prevent arbitrary script execution via path traversal or absolute
+    path injection.
+
    Args:
-        script_path: Path to a Python script (resolved via HERMES_HOME/scripts/ or absolute).
+        script_path: Path to a Python script.  Relative paths are resolved
+            against HERMES_HOME/scripts/.  Absolute and ~-prefixed paths
+            are also validated to ensure they stay within the scripts dir.

    Returns:
        (success, output) — on failure *output* contains the error message so the
@@ -302,16 +360,25 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
    """
    from hermes_constants import get_hermes_home

-    path = Path(script_path).expanduser()
-    if not path.is_absolute():
-        # Resolve relative paths against HERMES_HOME/scripts/
-        scripts_dir = get_hermes_home() / "scripts"
-        path = (scripts_dir / path).resolve()
-        # Guard against path traversal (e.g. "../../etc/passwd")
-        try:
-            path.relative_to(scripts_dir.resolve())
-        except ValueError:
-            return False, f"Script path escapes the scripts directory: {script_path!r}"
+    scripts_dir = get_hermes_home() / "scripts"
+    scripts_dir.mkdir(parents=True, exist_ok=True)
+    scripts_dir_resolved = scripts_dir.resolve()
+
+    raw = Path(script_path).expanduser()
+    if raw.is_absolute():
+        path = raw.resolve()
+    else:
+        path = (scripts_dir / raw).resolve()
+
+    # Guard against path traversal, absolute path injection, and symlink
+    # escape — scripts MUST reside within HERMES_HOME/scripts/.
+    try:
+        path.relative_to(scripts_dir_resolved)
+    except ValueError:
+        return False, (
+            f"Blocked: script path resolves outside the scripts directory "
+            f"({scripts_dir_resolved}): {script_path!r}"
+        )

    if not path.exists():
        return False, f"Script not found: {path}"
@@ -469,14 +536,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])

-    # Inject origin context so the agent's send_message tool knows the chat
-    if origin:
-        os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
-        os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
-        if origin.get("chat_name"):
-            os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
-
    try:
+        # Inject origin context so the agent's send_message tool knows the chat.
+        # Must be INSIDE the try block so the finally cleanup always runs.
+        if origin:
+            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
+            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
+            if origin.get("chat_name"):
+                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
@@ -797,7 +864,7 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                # output is already saved above).  Failed jobs always deliver.
                deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
                should_deliver = bool(deliver_content)
-                if should_deliver and success and deliver_content.strip().upper().startswith(SILENT_MARKER):
+                if should_deliver and success and SILENT_MARKER in deliver_content.strip().upper():
                    logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
                    should_deliver = False

@@ -44,7 +44,7 @@ import tempfile
 import time
 import uuid
 from collections import defaultdict
-from pathlib import Path
+from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import Any, Dict, List, Optional, Tuple, Union

 # Ensure repo root is on sys.path for imports
@@ -148,6 +148,62 @@ MODAL_INCOMPATIBLE_TASKS = {
 # Tar extraction helper
 # =============================================================================

+def _normalize_tar_member_parts(member_name: str) -> list:
+    """Return safe path components for a tar member or raise ValueError."""
+    normalized_name = member_name.replace("\\", "/")
+    posix_path = PurePosixPath(normalized_name)
+    windows_path = PureWindowsPath(member_name)
+
+    if (
+        not normalized_name
+        or posix_path.is_absolute()
+        or windows_path.is_absolute()
+        or windows_path.drive
+    ):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+
+    parts = [part for part in posix_path.parts if part not in ("", ".")]
+    if not parts or any(part == ".." for part in parts):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+    return parts
+
+
+def _safe_extract_tar(tar: tarfile.TarFile, target_dir: Path) -> None:
+    """Extract a tar archive without allowing traversal or link entries."""
+    target_dir.mkdir(parents=True, exist_ok=True)
+    target_root = target_dir.resolve()
+
+    for member in tar.getmembers():
+        parts = _normalize_tar_member_parts(member.name)
+        target = target_dir.joinpath(*parts)
+        target_real = target.resolve(strict=False)
+
+        try:
+            target_real.relative_to(target_root)
+        except ValueError as exc:
+            raise ValueError(f"Unsafe archive member path: {member.name}") from exc
+
+        if member.isdir():
+            target_real.mkdir(parents=True, exist_ok=True)
+            continue
+
+        if not member.isfile():
+            raise ValueError(f"Unsupported archive member type: {member.name}")
+
+        target_real.parent.mkdir(parents=True, exist_ok=True)
+        extracted = tar.extractfile(member)
+        if extracted is None:
+            raise ValueError(f"Cannot read archive member: {member.name}")
+
+        with extracted, open(target_real, "wb") as dst:
+            shutil.copyfileobj(extracted, dst)
+
+        try:
+            os.chmod(target_real, member.mode & 0o777)
+        except OSError:
+            pass
+
+
 def _extract_base64_tar(b64_data: str, target_dir: Path):
    """Extract a base64-encoded tar.gz archive into target_dir."""
    if not b64_data:
@@ -155,7 +211,7 @@ def _extract_base64_tar(b64_data: str, target_dir: Path):
    raw = base64.b64decode(b64_data)
    buf = io.BytesIO(raw)
    with tarfile.open(fileobj=buf, mode="r:gz") as tar:
-        tar.extractall(path=str(target_dir))
+        _safe_extract_tar(tar, target_dir)


 # =============================================================================
@@ -24,7 +24,8 @@ from pathlib import Path

 logger = logging.getLogger("hooks.boot-md")

-HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+from hermes_constants import get_hermes_home
+HERMES_HOME = get_hermes_home()
 BOOT_FILE = HERMES_HOME / "BOOT.md"


@@ -12,6 +12,7 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional

 from hermes_cli.config import get_hermes_home
+from utils import atomic_json_write

 logger = logging.getLogger(__name__)

@@ -86,9 +87,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
    }

    try:
-        DIRECTORY_PATH.parent.mkdir(parents=True, exist_ok=True)
-        with open(DIRECTORY_PATH, "w", encoding="utf-8") as f:
-            json.dump(directory, f, indent=2, ensure_ascii=False)
+        atomic_json_write(DIRECTORY_PATH, directory)
    except Exception as e:
        logger.warning("Channel directory: failed to write: %s", e)

@@ -125,7 +124,6 @@ def _build_discord(adapter) -> List[Dict[str, str]]:

 def _build_slack(adapter) -> List[Dict[str, str]]:
    """List Slack channels the bot has joined."""
-    channels = []
    # Slack adapter may expose a web client
    client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
    if not client:
@@ -556,6 +556,18 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
                if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
                    os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()
+                # ignored_channels: channels where bot never responds (even when mentioned)
+                ic = discord_cfg.get("ignored_channels")
+                if ic is not None and not os.getenv("DISCORD_IGNORED_CHANNELS"):
+                    if isinstance(ic, list):
+                        ic = ",".join(str(v) for v in ic)
+                    os.environ["DISCORD_IGNORED_CHANNELS"] = str(ic)
+                # no_thread_channels: channels where bot responds directly without creating thread
+                ntc = discord_cfg.get("no_thread_channels")
+                if ntc is not None and not os.getenv("DISCORD_NO_THREAD_CHANNELS"):
+                    if isinstance(ntc, list):
+                        ntc = ",".join(str(v) for v in ntc)
+                    os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)

            # Telegram settings → env vars (env vars take precedence)
            telegram_cfg = yaml_cfg.get("telegram", {})
@@ -570,6 +582,8 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["TELEGRAM_FREE_RESPONSE_CHATS"] = str(frc)
+                if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
+                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()

            whatsapp_cfg = yaml_cfg.get("whatsapp", {})
            if isinstance(whatsapp_cfg, dict):
@@ -779,6 +793,9 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            config.platforms[Platform.MATRIX].extra["password"] = matrix_password
        matrix_e2ee = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
        config.platforms[Platform.MATRIX].extra["encryption"] = matrix_e2ee
+        matrix_device_id = os.getenv("MATRIX_DEVICE_ID", "")
+        if matrix_device_id:
+            config.platforms[Platform.MATRIX].extra["device_id"] = matrix_device_id
    matrix_home = os.getenv("MATRIX_HOME_ROOM")
    if matrix_home and Platform.MATRIX in config.platforms:
        config.platforms[Platform.MATRIX].home_channel = HomeChannel(
@@ -314,38 +314,4 @@ def parse_deliver_spec(
    return deliver


-def build_delivery_context_for_tool(
-    config: GatewayConfig,
-    origin: Optional[SessionSource] = None
-) -> Dict[str, Any]:
-    """
-    Build context for the unified cronjob tool to understand delivery options.
-    
-    This is passed to the tool so it can validate and explain delivery targets.
-    """
-    connected = config.get_connected_platforms()
-    
-    options = {
-        "origin": {
-            "description": "Back to where this job was created",
-            "available": origin is not None,
-        },
-        "local": {
-            "description": "Save to local files only",
-            "available": True,
-        }
-    }
-    
-    for platform in connected:
-        home = config.get_home_channel(platform)
-        options[platform.value] = {
-            "description": f"{platform.value.title()} home channel",
-            "available": True,
-            "home_channel": home.to_dict() if home else None,
-        }
-    
-    return {
-        "origin": origin.to_dict() if origin else None,
-        "options": options,
-        "always_log_local": config.always_log_local,
-    }
+
@@ -21,6 +21,8 @@ Storage: ~/.hermes/pairing/
 import json
 import os
 import secrets
+import tempfile
+import threading
 import time
 from pathlib import Path
 from typing import Optional
@@ -45,13 +47,29 @@ PAIRING_DIR = get_hermes_dir("platforms/pairing", "pairing")


 def _secure_write(path: Path, data: str) -> None:
-    """Write data to file with restrictive permissions (owner read/write only)."""
+    """Write data to file with restrictive permissions (owner read/write only).
+
+    Uses a temp-file + atomic rename so readers always see either the old
+    complete file or the new one — never a partial write.
+    """
    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(data, encoding="utf-8")
+    fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
    try:
-        os.chmod(path, 0o600)
-    except OSError:
-        pass  # Windows doesn't support chmod the same way
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(data)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, str(path))
+        try:
+            os.chmod(path, 0o600)
+        except OSError:
+            pass  # Windows doesn't support chmod the same way
+    except BaseException:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise


 class PairingStore:
@@ -66,6 +84,9 @@ class PairingStore:

    def __init__(self):
        PAIRING_DIR.mkdir(parents=True, exist_ok=True)
+        # Protects all read-modify-write cycles. The gateway runs multiple
+        # platform adapters concurrently in threads sharing one PairingStore.
+        self._lock = threading.RLock()

    def _pending_path(self, platform: str) -> Path:
        return PAIRING_DIR / f"{platform}-pending.json"
@@ -105,7 +126,7 @@ class PairingStore:
        return results

    def _approve_user(self, platform: str, user_id: str, user_name: str = "") -> None:
-        """Add a user to the approved list."""
+        """Add a user to the approved list. Must be called under self._lock."""
        approved = self._load_json(self._approved_path(platform))
        approved[user_id] = {
            "user_name": user_name,
@@ -116,11 +137,12 @@ class PairingStore:
    def revoke(self, platform: str, user_id: str) -> bool:
        """Remove a user from the approved list. Returns True if found."""
        path = self._approved_path(platform)
-        approved = self._load_json(path)
-        if user_id in approved:
-            del approved[user_id]
-            self._save_json(path, approved)
-            return True
+        with self._lock:
+            approved = self._load_json(path)
+            if user_id in approved:
+                del approved[user_id]
+                self._save_json(path, approved)
+                return True
        return False

    # ----- Pending codes -----
@@ -136,36 +158,37 @@ class PairingStore:
          - Max pending codes reached for this platform
          - User/platform is in lockout due to failed attempts
        """
-        self._cleanup_expired(platform)
+        with self._lock:
+            self._cleanup_expired(platform)

-        # Check lockout
-        if self._is_locked_out(platform):
-            return None
+            # Check lockout
+            if self._is_locked_out(platform):
+                return None

-        # Check rate limit for this specific user
-        if self._is_rate_limited(platform, user_id):
-            return None
+            # Check rate limit for this specific user
+            if self._is_rate_limited(platform, user_id):
+                return None

-        # Check max pending
-        pending = self._load_json(self._pending_path(platform))
-        if len(pending) >= MAX_PENDING_PER_PLATFORM:
-            return None
+            # Check max pending
+            pending = self._load_json(self._pending_path(platform))
+            if len(pending) >= MAX_PENDING_PER_PLATFORM:
+                return None

-        # Generate cryptographically random code
-        code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
+            # Generate cryptographically random code
+            code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))

-        # Store pending request
-        pending[code] = {
-            "user_id": user_id,
-            "user_name": user_name,
-            "created_at": time.time(),
-        }
-        self._save_json(self._pending_path(platform), pending)
+            # Store pending request
+            pending[code] = {
+                "user_id": user_id,
+                "user_name": user_name,
+                "created_at": time.time(),
+            }
+            self._save_json(self._pending_path(platform), pending)

-        # Record rate limit
-        self._record_rate_limit(platform, user_id)
+            # Record rate limit
+            self._record_rate_limit(platform, user_id)

-        return code
+            return code

    def approve_code(self, platform: str, code: str) -> Optional[dict]:
        """
@@ -173,24 +196,25 @@ class PairingStore:

        Returns {user_id, user_name} on success, None if code is invalid/expired.
        """
-        self._cleanup_expired(platform)
-        code = code.upper().strip()
+        with self._lock:
+            self._cleanup_expired(platform)
+            code = code.upper().strip()

-        pending = self._load_json(self._pending_path(platform))
-        if code not in pending:
-            self._record_failed_attempt(platform)
-            return None
+            pending = self._load_json(self._pending_path(platform))
+            if code not in pending:
+                self._record_failed_attempt(platform)
+                return None

-        entry = pending.pop(code)
-        self._save_json(self._pending_path(platform), pending)
+            entry = pending.pop(code)
+            self._save_json(self._pending_path(platform), pending)

-        # Add to approved list
-        self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
+            # Add to approved list
+            self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))

-        return {
-            "user_id": entry["user_id"],
-            "user_name": entry.get("user_name", ""),
-        }
+            return {
+                "user_id": entry["user_id"],
+                "user_name": entry.get("user_name", ""),
+            }

    def list_pending(self, platform: str = None) -> list:
        """List pending pairing requests, optionally filtered by platform."""
@@ -212,12 +236,13 @@ class PairingStore:

    def clear_pending(self, platform: str = None) -> int:
        """Clear all pending requests. Returns count removed."""
-        count = 0
-        platforms = [platform] if platform else self._all_platforms("pending")
-        for p in platforms:
-            pending = self._load_json(self._pending_path(p))
-            count += len(pending)
-            self._save_json(self._pending_path(p), {})
+        with self._lock:
+            count = 0
+            platforms = [platform] if platform else self._all_platforms("pending")
+            for p in platforms:
+                pending = self._load_json(self._pending_path(p))
+                count += len(pending)
+                self._save_json(self._pending_path(p), {})
        return count

    # ----- Rate limiting and lockout -----
@@ -20,6 +20,7 @@ Requires:
 """

 import asyncio
+import hmac
 import json
 import logging
 import os
@@ -370,7 +371,7 @@ class APIServerAdapter(BasePlatformAdapter):
        auth_header = request.headers.get("Authorization", "")
        if auth_header.startswith("Bearer "):
            token = auth_header[7:].strip()
-            if token == self._api_key:
+            if hmac.compare_digest(token, self._api_key):
                return None  # Auth OK

        return web.json_response(
@@ -563,8 +564,10 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

-            def _on_tool_progress(name, preview, args):
+            def _on_tool_progress(event_type, name, preview, args, **kwargs):
                """Inject tool progress into the SSE stream for Open WebUI."""
+                if event_type != "tool.started":
+                    return  # Only show tool start events in chat stream
                if name.startswith("_"):
                    return  # Skip internal events (_thinking)
                from agent.display import get_tool_emoji
@@ -815,9 +818,29 @@ class APIServerAdapter(BasePlatformAdapter):
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)

-        # Reconstruct conversation history from previous_response_id
+        # Accept explicit conversation_history from the request body.
+        # This lets stateless clients supply their own history instead of
+        # relying on server-side response chaining via previous_response_id.
+        # Precedence: explicit conversation_history > previous_response_id.
        conversation_history: List[Dict[str, str]] = []
-        if previous_response_id:
+        raw_history = body.get("conversation_history")
+        if raw_history:
+            if not isinstance(raw_history, list):
+                return web.json_response(
+                    _openai_error("'conversation_history' must be an array of message objects"),
+                    status=400,
+                )
+            for i, entry in enumerate(raw_history):
+                if not isinstance(entry, dict) or "role" not in entry or "content" not in entry:
+                    return web.json_response(
+                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
+                        status=400,
+                    )
+                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
+            if previous_response_id:
+                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
+
+        if not conversation_history and previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored is None:
                return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
@@ -1403,14 +1426,49 @@ class APIServerAdapter(BasePlatformAdapter):

        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")
+
+        # Accept explicit conversation_history from the request body.
+        # Precedence: explicit conversation_history > previous_response_id.
        conversation_history: List[Dict[str, str]] = []
-        if previous_response_id:
+        raw_history = body.get("conversation_history")
+        if raw_history:
+            if not isinstance(raw_history, list):
+                return web.json_response(
+                    _openai_error("'conversation_history' must be an array of message objects"),
+                    status=400,
+                )
+            for i, entry in enumerate(raw_history):
+                if not isinstance(entry, dict) or "role" not in entry or "content" not in entry:
+                    return web.json_response(
+                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
+                        status=400,
+                    )
+                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
+            if previous_response_id:
+                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
+
+        if not conversation_history and previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored:
                conversation_history = list(stored.get("conversation_history", []))
                if instructions is None:
                    instructions = stored.get("instructions")

+        # When input is a multi-message array, extract all but the last
+        # message as conversation history (the last becomes user_message).
+        # Only fires when no explicit history was provided.
+        if not conversation_history and isinstance(raw_input, list) and len(raw_input) > 1:
+            for msg in raw_input[:-1]:
+                if isinstance(msg, dict) and msg.get("role") and msg.get("content"):
+                    content = msg["content"]
+                    if isinstance(content, list):
+                        # Flatten multi-part content blocks to text
+                        content = " ".join(
+                            part.get("text", "") for part in content
+                            if isinstance(part, dict) and part.get("type") == "text"
+                        )
+                    conversation_history.append({"role": msg["role"], "content": str(content)})
+
        session_id = body.get("session_id") or run_id
        ephemeral_system_prompt = instructions

@@ -12,6 +12,7 @@ import random
 import re
 import uuid
 from abc import ABC, abstractmethod
+from urllib.parse import urlsplit

 logger = logging.getLogger(__name__)
 from dataclasses import dataclass, field
@@ -26,7 +27,6 @@ sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))

 from gateway.config import Platform, PlatformConfig
 from gateway.session import SessionSource, build_session_key
-from hermes_cli.config import get_hermes_home
 from hermes_constants import get_hermes_dir


@@ -36,6 +36,43 @@ GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
 )


+def _safe_url_for_log(url: str, max_len: int = 80) -> str:
+    """Return a URL string safe for logs (no query/fragment/userinfo)."""
+    if max_len <= 0:
+        return ""
+
+    if url is None:
+        return ""
+
+    raw = str(url)
+    if not raw:
+        return ""
+
+    try:
+        parsed = urlsplit(raw)
+    except Exception:
+        return raw[:max_len]
+
+    if parsed.scheme and parsed.netloc:
+        # Strip potential embedded credentials (user:pass@host).
+        netloc = parsed.netloc.rsplit("@", 1)[-1]
+        base = f"{parsed.scheme}://{netloc}"
+        path = parsed.path or ""
+        if path and path != "/":
+            basename = path.rsplit("/", 1)[-1]
+            safe = f"{base}/.../{basename}" if basename else f"{base}/..."
+        else:
+            safe = base
+    else:
+        safe = raw
+
+    if len(safe) <= max_len:
+        return safe
+    if max_len <= 3:
+        return "." * max_len
+    return f"{safe[:max_len - 3]}..."
+
+
 # ---------------------------------------------------------------------------
 # Image cache utilities
 #
@@ -87,7 +124,14 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->

    Returns:
        Absolute path to the cached image file as a string.
+
+    Raises:
+        ValueError: If the URL targets a private/internal network (SSRF protection).
    """
+    from tools.url_safety import is_safe_url
+    if not is_safe_url(url):
+        raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
+
    import asyncio
    import httpx
    import logging as _logging
@@ -112,8 +156,14 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    raise
                if attempt < retries:
                    wait = 1.5 * (attempt + 1)
-                    _log.debug("Media cache retry %d/%d for %s (%.1fs): %s",
-                               attempt + 1, retries, url[:80], wait, exc)
+                    _log.debug(
+                        "Media cache retry %d/%d for %s (%.1fs): %s",
+                        attempt + 1,
+                        retries,
+                        _safe_url_for_log(url),
+                        wait,
+                        exc,
+                    )
                    await asyncio.sleep(wait)
                    continue
                raise
@@ -189,7 +239,14 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->

    Returns:
        Absolute path to the cached audio file as a string.
+
+    Raises:
+        ValueError: If the URL targets a private/internal network (SSRF protection).
    """
+    from tools.url_safety import is_safe_url
+    if not is_safe_url(url):
+        raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
+
    import asyncio
    import httpx
    import logging as _logging
@@ -214,8 +271,14 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    raise
                if attempt < retries:
                    wait = 1.5 * (attempt + 1)
-                    _log.debug("Audio cache retry %d/%d for %s (%.1fs): %s",
-                               attempt + 1, retries, url[:80], wait, exc)
+                    _log.debug(
+                        "Audio cache retry %d/%d for %s (%.1fs): %s",
+                        attempt + 1,
+                        retries,
+                        _safe_url_for_log(url),
+                        wait,
+                        exc,
+                    )
                    await asyncio.sleep(wait)
                    continue
                raise
@@ -435,6 +498,9 @@ class BasePlatformAdapter(ABC):
        self._background_tasks: set[asyncio.Task] = set()
        # Chats where auto-TTS on voice input is disabled (set by /voice off)
        self._auto_tts_disabled_chats: set = set()
+        # Chats where typing indicator is paused (e.g. during approval waits).
+        # _keep_typing skips send_typing when the chat_id is in this set.
+        self._typing_paused: set = set()

    @property
    def has_fatal_error(self) -> bool:
@@ -519,6 +585,16 @@ class BasePlatformAdapter(ABC):
        """
        self._message_handler = handler
    
+    def set_session_store(self, session_store: Any) -> None:
+        """
+        Set the session store for checking active sessions.
+        
+        Used by adapters that need to check if a thread/conversation
+        has an active session before processing messages (e.g., Slack
+        thread replies without explicit mentions).
+        """
+        self._session_store = session_store
+    
    @abstractmethod
    async def connect(self) -> bool:
        """
@@ -884,10 +960,16 @@ class BasePlatformAdapter(ABC):
        
        Telegram/Discord typing status expires after ~5 seconds, so we refresh every 2
        to recover quickly after progress messages interrupt it.
+        
+        Skips send_typing when the chat is in ``_typing_paused`` (e.g. while
+        the agent is waiting for dangerous-command approval).  This is critical
+        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
+        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
        """
        try:
            while True:
-                await self.send_typing(chat_id, metadata=metadata)
+                if chat_id not in self._typing_paused:
+                    await self.send_typing(chat_id, metadata=metadata)
                await asyncio.sleep(interval)
        except asyncio.CancelledError:
            pass  # Normal cancellation when handler completes
@@ -901,7 +983,20 @@ class BasePlatformAdapter(ABC):
                    await self.stop_typing(chat_id)
                except Exception:
                    pass
-    
+            self._typing_paused.discard(chat_id)
+
+    def pause_typing_for_chat(self, chat_id: str) -> None:
+        """Pause typing indicator for a chat (e.g. during approval waits).
+
+        Thread-safe (CPython GIL) — can be called from the sync agent thread
+        while ``_keep_typing`` runs on the async event loop.
+        """
+        self._typing_paused.add(chat_id)
+
+    def resume_typing_for_chat(self, chat_id: str) -> None:
+        """Resume typing indicator for a chat after approval resolves."""
+        self._typing_paused.discard(chat_id)
+
    # ── Processing lifecycle hooks ──────────────────────────────────────────
    # Subclasses override these to react to message processing events
    # (e.g. Discord adds 👀/✅/❌ reactions).
@@ -1024,6 +1119,22 @@ class BasePlatformAdapter(ABC):
            logger.error("[%s] Fallback send also failed: %s", self.name, fallback_result.error)
        return fallback_result

+    @staticmethod
+    def _merge_caption(existing_text: Optional[str], new_text: str) -> str:
+        """Merge a new caption into existing text, avoiding duplicates.
+
+        Uses line-by-line exact match (not substring) to prevent false positives
+        where a shorter caption is silently dropped because it appears as a
+        substring of a longer one (e.g. "Meeting" inside "Meeting agenda").
+        Whitespace is normalised for comparison.
+        """
+        if not existing_text:
+            return new_text
+        existing_captions = [c.strip() for c in existing_text.split("\n\n")]
+        if new_text.strip() not in existing_captions:
+            return f"{existing_text}\n\n{new_text}".strip()
+        return existing_text
+
    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
@@ -1043,16 +1154,20 @@ class BasePlatformAdapter(ABC):
        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
-            # /approve and /deny must bypass the active-session guard.
-            # The agent thread is blocked on threading.Event.wait() inside
-            # tools/approval.py — queuing these commands creates a deadlock:
-            # the agent waits for approval, approval waits for agent to finish.
-            # Dispatch directly to the message handler without touching session
-            # lifecycle (no competing background task, no session guard removal).
+            # Certain commands must bypass the active-session guard and be
+            # dispatched directly to the gateway runner.  Without this, they
+            # are queued as pending messages and either:
+            #   - leak into the conversation as user text (/stop, /new), or
+            #   - deadlock (/approve, /deny — agent is blocked on Event.wait)
+            #
+            # Dispatch inline: call the message handler directly and send the
+            # response.  Do NOT use _process_message_background — it manages
+            # session lifecycle and its cleanup races with the running task
+            # (see PR #4926).
            cmd = event.get_command()
-            if cmd in ("approve", "deny"):
+            if cmd in ("approve", "deny", "status", "stop", "new", "reset"):
                logger.debug(
-                    "[%s] Approval command '/%s' bypassing active-session guard for %s",
+                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
                )
                try:
@@ -1066,29 +1181,7 @@ class BasePlatformAdapter(ABC):
                            metadata=_thread_meta,
                        )
                except Exception as e:
-                    logger.error("[%s] Approval dispatch failed: %s", self.name, e, exc_info=True)
-                return
-
-            # /status must also bypass the active-session guard so it always
-            # returns a system-generated response instead of being queued as
-            # user text and passed to the agent (#5046).
-            if cmd == "status":
-                logger.debug(
-                    "[%s] Status command bypassing active-session guard for %s",
-                    self.name, session_key,
-                )
-                try:
-                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-                    response = await self._message_handler(event)
-                    if response:
-                        await self._send_with_retry(
-                            chat_id=event.source.chat_id,
-                            content=response,
-                            reply_to=event.message_id,
-                            metadata=_thread_meta,
-                        )
-                except Exception as e:
-                    logger.error("[%s] Status dispatch failed: %s", self.name, e, exc_info=True)
+                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                return

            # Special case: photo bursts/albums frequently arrive as multiple near-
@@ -1101,10 +1194,7 @@ class BasePlatformAdapter(ABC):
                    existing.media_urls.extend(event.media_urls)
                    existing.media_types.extend(event.media_types)
                    if event.text:
-                        if not existing.text:
-                            existing.text = event.text
-                        elif event.text not in existing.text:
-                            existing.text = f"{existing.text}\n\n{event.text}".strip()
+                        existing.text = self._merge_caption(existing.text, event.text)
                else:
                    self._pending_messages[session_key] = event
                return  # Don't interrupt now - will run after current task completes
@@ -1266,7 +1356,12 @@ class BasePlatformAdapter(ABC):
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
-                        logger.info("[%s] Sending image: %s (alt=%s)", self.name, image_url[:80], alt_text[:30] if alt_text else "")
+                        logger.info(
+                            "[%s] Sending image: %s (alt=%s)",
+                            self.name,
+                            _safe_url_for_log(image_url),
+                            alt_text[:30] if alt_text else "",
+                        )
                        # Route animated GIFs through send_animation for proper playback
                        if self._is_animation_url(image_url):
                            img_result = await self.send_animation(
@@ -55,6 +55,7 @@ from gateway.platforms.base import (
    cache_document_from_bytes,
    SUPPORTED_DOCUMENT_TYPES,
 )
+from tools.url_safety import is_safe_url


 def _clean_discord_id(entry: str) -> str:
@@ -1285,6 +1286,10 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

+        if not is_safe_url(image_url):
+            logger.warning("[%s] Blocked unsafe image URL during Discord send_image", self.name)
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
        try:
            import aiohttp

@@ -1695,6 +1700,47 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_btw(interaction: discord.Interaction, question: str):
            await self._run_simple_slash(interaction, f"/btw {question}")

+        # Register installed skills as native slash commands (parity with
+        # Telegram, which uses telegram_menu_commands() in commands.py).
+        # Discord allows up to 100 application commands globally.
+        _DISCORD_CMD_LIMIT = 100
+        try:
+            from hermes_cli.commands import discord_skill_commands
+
+            existing_names = {cmd.name for cmd in tree.get_commands()}
+            remaining_slots = max(0, _DISCORD_CMD_LIMIT - len(existing_names))
+
+            skill_entries, skipped = discord_skill_commands(
+                max_slots=remaining_slots,
+                reserved_names=existing_names,
+            )
+
+            for discord_name, description, cmd_key in skill_entries:
+                # Closure factory to capture cmd_key per iteration
+                def _make_skill_handler(_key: str):
+                    async def _skill_slash(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(interaction, f"{_key} {args}".strip())
+                    return _skill_slash
+
+                handler = _make_skill_handler(cmd_key)
+                handler.__name__ = f"skill_{discord_name.replace('-', '_')}"
+
+                cmd = discord.app_commands.Command(
+                    name=discord_name,
+                    description=description,
+                    callback=handler,
+                )
+                discord.app_commands.describe(args="Optional arguments for the skill")(cmd)
+                tree.add_command(cmd)
+
+            if skipped:
+                logger.warning(
+                    "[%s] Discord slash command limit reached (%d): %d skill(s) not registered",
+                    self.name, _DISCORD_CMD_LIMIT, skipped,
+                )
+        except Exception as exc:
+            logger.warning("[%s] Failed to register skill slash commands: %s", self.name, exc)
+
    def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
        """Build a MessageEvent from a Discord slash command interaction."""
        is_dm = isinstance(interaction.channel, discord.DMChannel)
@@ -1998,6 +2044,66 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            return SendResult(success=False, error=str(e))

+    async def send_model_picker(
+        self,
+        chat_id: str,
+        providers: list,
+        current_model: str,
+        current_provider: str,
+        session_key: str,
+        on_model_selected,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an interactive select-menu model picker.
+
+        Two-step drill-down: provider dropdown → model dropdown.
+        Uses Discord embeds + Select menus via ``ModelPickerView``.
+        """
+        if not self._client or not DISCORD_AVAILABLE:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            # Resolve target channel (use thread_id if present)
+            target_id = chat_id
+            if metadata and metadata.get("thread_id"):
+                target_id = metadata["thread_id"]
+
+            channel = self._client.get_channel(int(target_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(target_id))
+
+            try:
+                from hermes_cli.providers import get_label
+                provider_label = get_label(current_provider)
+            except Exception:
+                provider_label = current_provider
+
+            embed = discord.Embed(
+                title="⚙ Model Configuration",
+                description=(
+                    f"Current model: `{current_model or 'unknown'}`\n"
+                    f"Provider: {provider_label}\n\n"
+                    f"Select a provider:"
+                ),
+                color=discord.Color.blue(),
+            )
+
+            view = ModelPickerView(
+                providers=providers,
+                current_model=current_model,
+                current_provider=current_provider,
+                session_key=session_key,
+                on_model_selected=on_model_selected,
+                allowed_user_ids=self._allowed_user_ids,
+            )
+
+            msg = await channel.send(embed=embed, view=view)
+            return SendResult(success=True, message_id=str(msg.id))
+
+        except Exception as e:
+            logger.warning("[%s] send_model_picker failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
    def _get_parent_channel_id(self, channel: Any) -> Optional[str]:
        """Return the parent channel ID for a Discord thread-like channel, if present."""
        parent = getattr(channel, "parent", None)
@@ -2087,9 +2193,11 @@ class DiscordAdapter(BasePlatformAdapter):
        # UNLESS the channel is in the free-response list or the message is
        # in a thread where the bot has already participated.
        #
-        # Config (all settable via discord.* in config.yaml):
+        # Config (all settable via discord.* in config.yaml or DISCORD_* env vars):
        #   discord.require_mention: Require @mention in server channels (default: true)
        #   discord.free_response_channels: Channel IDs where bot responds without mention
+        #   discord.ignored_channels: Channel IDs where bot NEVER responds (even when mentioned)
+        #   discord.no_thread_channels: Channel IDs where bot responds directly without creating thread
        #   discord.auto_thread: Auto-create thread on @mention in channels (default: true)

        thread_id = None
@@ -2100,9 +2208,18 @@ class DiscordAdapter(BasePlatformAdapter):
            parent_channel_id = self._get_parent_channel_id(message.channel)

        if not isinstance(message.channel, discord.DMChannel):
+            # Check ignored channels first - never respond even when mentioned
+            ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
+            ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
+            channel_ids = {str(message.channel.id)}
+            if parent_channel_id:
+                channel_ids.add(parent_channel_id)
+            if channel_ids & ignored_channels:
+                logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
+                return
+
            free_channels_raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
            free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()}
-            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
                channel_ids.add(parent_channel_id)

@@ -2124,10 +2241,14 @@ class DiscordAdapter(BasePlatformAdapter):
        # Auto-thread: when enabled, automatically create a thread for every
        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
+        # no_thread_channels: channels where bot responds directly without thread.
        auto_threaded_channel = None
        if not is_thread and not isinstance(message.channel, discord.DMChannel):
+            no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "")
+            no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
+            skip_thread = bool(channel_ids & no_thread_channels)
            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
-            if auto_thread:
+            if auto_thread and not skip_thread:
                thread = await self._auto_create_thread(message)
                if thread:
                    is_thread = True
@@ -2489,3 +2610,218 @@ if DISCORD_AVAILABLE:
            self.resolved = True
            for child in self.children:
                child.disabled = True
+
+    class ModelPickerView(discord.ui.View):
+        """Interactive select-menu view for model switching.
+
+        Two-step drill-down: provider dropdown → model dropdown.
+        Edits the original message in-place as the user navigates.
+        Times out after 2 minutes.
+        """
+
+        def __init__(
+            self,
+            providers: list,
+            current_model: str,
+            current_provider: str,
+            session_key: str,
+            on_model_selected,
+            allowed_user_ids: set,
+        ):
+            super().__init__(timeout=120)
+            self.providers = providers
+            self.current_model = current_model
+            self.current_provider = current_provider
+            self.session_key = session_key
+            self.on_model_selected = on_model_selected
+            self.allowed_user_ids = allowed_user_ids
+            self.resolved = False
+            self._selected_provider: str = ""
+
+            self._build_provider_select()
+
+        def _check_auth(self, interaction: discord.Interaction) -> bool:
+            if not self.allowed_user_ids:
+                return True
+            return str(interaction.user.id) in self.allowed_user_ids
+
+        def _build_provider_select(self):
+            """Build the provider dropdown menu."""
+            self.clear_items()
+            options = []
+            for p in self.providers:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count} models)"
+                desc = "current" if p.get("is_current") else None
+                options.append(
+                    discord.SelectOption(
+                        label=label[:100],
+                        value=p["slug"],
+                        description=desc,
+                    )
+                )
+            if not options:
+                return
+
+            select = discord.ui.Select(
+                placeholder="Choose a provider...",
+                options=options[:25],
+                custom_id="model_provider_select",
+            )
+            select.callback = self._on_provider_selected
+            self.add_item(select)
+
+            cancel_btn = discord.ui.Button(
+                label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel"
+            )
+            cancel_btn.callback = self._on_cancel
+            self.add_item(cancel_btn)
+
+        def _build_model_select(self, provider_slug: str):
+            """Build the model dropdown for a specific provider."""
+            self.clear_items()
+            provider = next(
+                (p for p in self.providers if p["slug"] == provider_slug), None
+            )
+            if not provider:
+                return
+
+            models = provider.get("models", [])
+            options = []
+            for model_id in models[:25]:
+                short = model_id.split("/")[-1] if "/" in model_id else model_id
+                options.append(
+                    discord.SelectOption(
+                        label=short[:100],
+                        value=model_id[:100],
+                    )
+                )
+            if not options:
+                return
+
+            select = discord.ui.Select(
+                placeholder=f"Choose a model from {provider.get('name', provider_slug)}...",
+                options=options,
+                custom_id="model_model_select",
+            )
+            select.callback = self._on_model_selected
+            self.add_item(select)
+
+            back_btn = discord.ui.Button(
+                label="◀ Back", style=discord.ButtonStyle.grey, custom_id="model_back"
+            )
+            back_btn.callback = self._on_back
+            self.add_item(back_btn)
+
+            cancel_btn = discord.ui.Button(
+                label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel2"
+            )
+            cancel_btn.callback = self._on_cancel
+            self.add_item(cancel_btn)
+
+        async def _on_provider_selected(self, interaction: discord.Interaction):
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            provider_slug = interaction.data["values"][0]
+            self._selected_provider = provider_slug
+            provider = next(
+                (p for p in self.providers if p["slug"] == provider_slug), None
+            )
+            pname = provider.get("name", provider_slug) if provider else provider_slug
+
+            self._build_model_select(provider_slug)
+
+            total = provider.get("total_models", 0) if provider else 0
+            shown = min(len(provider.get("models", [])), 25) if provider else 0
+            extra = f"\n*{total - shown} more available — type `/model <name>` directly*" if total > shown else ""
+
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description=f"Provider: **{pname}**\nSelect a model:{extra}",
+                    color=discord.Color.blue(),
+                ),
+                view=self,
+            )
+
+        async def _on_model_selected(self, interaction: discord.Interaction):
+            if self.resolved:
+                await interaction.response.send_message(
+                    "Already resolved~", ephemeral=True
+                )
+                return
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            self.resolved = True
+            model_id = interaction.data["values"][0]
+
+            try:
+                result_text = await self.on_model_selected(
+                    str(interaction.channel_id),
+                    model_id,
+                    self._selected_provider,
+                )
+            except Exception as exc:
+                result_text = f"Error switching model: {exc}"
+
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Switched",
+                    description=result_text,
+                    color=discord.Color.green(),
+                ),
+                view=self,
+            )
+
+        async def _on_back(self, interaction: discord.Interaction):
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            self._build_provider_select()
+
+            try:
+                from hermes_cli.providers import get_label
+                provider_label = get_label(self.current_provider)
+            except Exception:
+                provider_label = self.current_provider
+
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description=(
+                        f"Current model: `{self.current_model or 'unknown'}`\n"
+                        f"Provider: {provider_label}\n\n"
+                        f"Select a provider:"
+                    ),
+                    color=discord.Color.blue(),
+                ),
+                view=self,
+            )
+
+        async def _on_cancel(self, interaction: discord.Interaction):
+            self.resolved = True
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description="Model selection cancelled.",
+                    color=discord.Color.greyple(),
+                ),
+                view=self,
+            )
+
+        async def on_timeout(self):
+            self.resolved = True
+            self.clear_items()
@@ -60,7 +60,6 @@ try:
        CreateMessageRequestBody,
        GetChatRequest,
        GetMessageRequest,
-        GetImageRequest,
        GetMessageResourceRequest,
        P2ImMessageMessageReadV1,
        ReplyMessageRequest,
@@ -270,6 +269,22 @@ class FeishuAdapterSettings:
    webhook_host: str
    webhook_port: int
    webhook_path: str
+    ws_reconnect_nonce: int = 30
+    ws_reconnect_interval: int = 120
+    ws_ping_interval: Optional[int] = None
+    ws_ping_timeout: Optional[int] = None
+    admins: frozenset[str] = frozenset()
+    default_group_policy: str = ""
+    group_rules: Dict[str, FeishuGroupRule] = field(default_factory=dict)
+
+
+@dataclass
+class FeishuGroupRule:
+    """Per-group policy rule for controlling which users may interact with the bot."""
+
+    policy: str  # "open" | "allowlist" | "blacklist" | "admin_only" | "disabled"
+    allowlist: set[str] = field(default_factory=set)
+    blacklist: set[str] = field(default_factory=set)


@dataclass
@@ -358,6 +373,20 @@ def _strip_markdown_to_plain_text(text: str) -> str:
    return plain.strip()


+def _coerce_int(value: Any, default: Optional[int] = None, min_value: int = 0) -> Optional[int]:
+    """Coerce value to int with optional default and minimum constraint."""
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        return default
+    return parsed if parsed >= min_value else default
+
+
+def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
+    parsed = _coerce_int(value, default=default, min_value=min_value)
+    return default if parsed is None else parsed
+
+
 # ---------------------------------------------------------------------------
 # Post payload builders and parsers
 # ---------------------------------------------------------------------------
@@ -913,14 +942,66 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


-def _run_official_feishu_ws_client(ws_client: Any) -> None:
+def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module

    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    ws_client_module.loop = loop
-    ws_client.start()
+    adapter._ws_thread_loop = loop
+
+    original_connect = ws_client_module.websockets.connect
+    original_configure = getattr(ws_client, "_configure", None)
+
+    def _apply_runtime_ws_overrides() -> None:
+        try:
+            setattr(ws_client, "_reconnect_nonce", adapter._ws_reconnect_nonce)
+            setattr(ws_client, "_reconnect_interval", adapter._ws_reconnect_interval)
+            if adapter._ws_ping_interval is not None:
+                setattr(ws_client, "_ping_interval", adapter._ws_ping_interval)
+        except Exception:
+            logger.debug("[Feishu] Failed to apply websocket runtime overrides", exc_info=True)
+
+    async def _connect_with_overrides(*args: Any, **kwargs: Any) -> Any:
+        if adapter._ws_ping_interval is not None and "ping_interval" not in kwargs:
+            kwargs["ping_interval"] = adapter._ws_ping_interval
+        if adapter._ws_ping_timeout is not None and "ping_timeout" not in kwargs:
+            kwargs["ping_timeout"] = adapter._ws_ping_timeout
+        return await original_connect(*args, **kwargs)
+
+    def _configure_with_overrides(conf: Any) -> Any:
+        assert original_configure is not None
+        result = original_configure(conf)
+        _apply_runtime_ws_overrides()
+        return result
+
+    ws_client_module.websockets.connect = _connect_with_overrides
+    if original_configure is not None:
+        setattr(ws_client, "_configure", _configure_with_overrides)
+    _apply_runtime_ws_overrides()
+    try:
+        ws_client.start()
+    except Exception:
+        pass
+    finally:
+        ws_client_module.websockets.connect = original_connect
+        if original_configure is not None:
+            setattr(ws_client, "_configure", original_configure)
+        pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
+        for task in pending:
+            task.cancel()
+        if pending:
+            loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
+        try:
+            loop.stop()
+        except Exception:
+            pass
+        try:
+            loop.close()
+        except Exception:
+            pass
+        adapter._ws_thread_loop = None


 def check_feishu_requirements() -> bool:
@@ -945,10 +1026,11 @@ class FeishuAdapter(BasePlatformAdapter):
        self._client: Optional[Any] = None
        self._ws_client: Optional[Any] = None
        self._ws_future: Optional[asyncio.Future] = None
+        self._ws_thread_loop: Optional[asyncio.AbstractEventLoop] = None
        self._loop: Optional[asyncio.AbstractEventLoop] = None
        self._webhook_runner: Optional[Any] = None
        self._webhook_site: Optional[Any] = None
-        self._event_handler = self._build_event_handler()
+        self._event_handler: Optional[Any] = None
        self._seen_message_ids: Dict[str, float] = {}  # message_id → seen_at (time.time())
        self._seen_message_order: List[str] = []
        self._dedup_state_path = get_hermes_home() / "feishu_seen_message_ids.json"
@@ -974,6 +1056,26 @@ class FeishuAdapter(BasePlatformAdapter):

    @staticmethod
    def _load_settings(extra: Dict[str, Any]) -> FeishuAdapterSettings:
+        # Parse per-group rules from config
+        raw_group_rules = extra.get("group_rules", {})
+        group_rules: Dict[str, FeishuGroupRule] = {}
+        if isinstance(raw_group_rules, dict):
+            for chat_id, rule_cfg in raw_group_rules.items():
+                if not isinstance(rule_cfg, dict):
+                    continue
+                group_rules[str(chat_id)] = FeishuGroupRule(
+                    policy=str(rule_cfg.get("policy", "open")).strip().lower(),
+                    allowlist=set(str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()),
+                    blacklist=set(str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()),
+                )
+
+        # Bot-level admins
+        raw_admins = extra.get("admins", [])
+        admins = frozenset(str(u).strip() for u in raw_admins if str(u).strip())
+
+        # Default group policy (for groups not in group_rules)
+        default_group_policy = str(extra.get("default_group_policy", "")).strip().lower()
+
        return FeishuAdapterSettings(
            app_id=str(extra.get("app_id") or os.getenv("FEISHU_APP_ID", "")).strip(),
            app_secret=str(extra.get("app_secret") or os.getenv("FEISHU_APP_SECRET", "")).strip(),
@@ -1020,6 +1122,13 @@ class FeishuAdapter(BasePlatformAdapter):
                str(extra.get("webhook_path") or os.getenv("FEISHU_WEBHOOK_PATH", _DEFAULT_WEBHOOK_PATH)).strip()
                or _DEFAULT_WEBHOOK_PATH
            ),
+            ws_reconnect_nonce=_coerce_required_int(extra.get("ws_reconnect_nonce"), default=30, min_value=0),
+            ws_reconnect_interval=_coerce_required_int(extra.get("ws_reconnect_interval"), default=120, min_value=1),
+            ws_ping_interval=_coerce_int(extra.get("ws_ping_interval"), default=None, min_value=1),
+            ws_ping_timeout=_coerce_int(extra.get("ws_ping_timeout"), default=None, min_value=1),
+            admins=admins,
+            default_group_policy=default_group_policy,
+            group_rules=group_rules,
        )

    def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
@@ -1031,6 +1140,9 @@ class FeishuAdapter(BasePlatformAdapter):
        self._verification_token = settings.verification_token
        self._group_policy = settings.group_policy
        self._allowed_group_users = set(settings.allowed_group_users)
+        self._admins = set(settings.admins)
+        self._default_group_policy = settings.default_group_policy or settings.group_policy
+        self._group_rules = settings.group_rules
        self._bot_open_id = settings.bot_open_id
        self._bot_user_id = settings.bot_user_id
        self._bot_name = settings.bot_name
@@ -1042,6 +1154,10 @@ class FeishuAdapter(BasePlatformAdapter):
        self._webhook_host = settings.webhook_host
        self._webhook_port = settings.webhook_port
        self._webhook_path = settings.webhook_path
+        self._ws_reconnect_nonce = settings.ws_reconnect_nonce
+        self._ws_reconnect_interval = settings.ws_reconnect_interval
+        self._ws_ping_interval = settings.ws_ping_interval
+        self._ws_ping_timeout = settings.ws_ping_timeout

    def _build_event_handler(self) -> Any:
        if EventDispatcherHandler is None:
@@ -1116,8 +1232,37 @@ class FeishuAdapter(BasePlatformAdapter):
        self._reset_batch_buffers()
        self._disable_websocket_auto_reconnect()
        await self._stop_webhook_server()
+
+        ws_thread_loop = self._ws_thread_loop
+        if ws_thread_loop is not None and not ws_thread_loop.is_closed():
+            logger.debug("[Feishu] Cancelling websocket thread tasks and stopping loop")
+
+            def cancel_all_tasks() -> None:
+                tasks = [t for t in asyncio.all_tasks(ws_thread_loop) if not t.done()]
+                logger.debug("[Feishu] Found %d pending tasks in websocket thread", len(tasks))
+                for task in tasks:
+                    task.cancel()
+                ws_thread_loop.call_later(0.1, ws_thread_loop.stop)
+
+            ws_thread_loop.call_soon_threadsafe(cancel_all_tasks)
+
+        ws_future = self._ws_future
+        if ws_future is not None:
+            try:
+                logger.debug("[Feishu] Waiting for websocket thread to exit (timeout=10s)")
+                await asyncio.wait_for(asyncio.shield(ws_future), timeout=10.0)
+                logger.debug("[Feishu] Websocket thread exited cleanly")
+            except asyncio.TimeoutError:
+                logger.warning("[Feishu] Websocket thread did not exit within 10s - may be stuck")
+            except asyncio.CancelledError:
+                logger.debug("[Feishu] Websocket thread cancelled during disconnect")
+            except Exception as exc:
+                logger.debug("[Feishu] Websocket thread exited with error: %s", exc, exc_info=True)
+
        self._ws_future = None
+        self._ws_thread_loop = None
        self._loop = None
+        self._event_handler = None
        self._persist_seen_message_ids()
        await self._release_app_lock()

@@ -1476,12 +1621,13 @@ class FeishuAdapter(BasePlatformAdapter):

    def _on_message_event(self, data: Any) -> None:
        """Normalize Feishu inbound events into MessageEvent."""
-        if self._loop is None:
+        loop = self._loop
+        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
            logger.warning("[Feishu] Dropping inbound message before adapter loop is ready")
            return
        future = asyncio.run_coroutine_threadsafe(
            self._handle_message_event_data(data),
-            self._loop,
+            loop,
        )
        future.add_done_callback(self._log_background_failure)

@@ -1504,7 +1650,8 @@ class FeishuAdapter(BasePlatformAdapter):
            return

        chat_type = getattr(message, "chat_type", "p2p")
-        if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id):
+        chat_id = getattr(message, "chat_id", "") or ""
+        if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id, chat_id):
            logger.debug("[Feishu] Dropping group message that failed mention/policy gate: %s", message_id)
            return
        await self._process_inbound_message(
@@ -1553,27 +1700,30 @@ class FeishuAdapter(BasePlatformAdapter):
        )
        # Only process reactions from real users. Ignore app/bot-generated reactions
        # and Hermes' own ACK emoji to avoid feedback loops.
+        loop = self._loop
        if (
            operator_type in {"bot", "app"}
            or emoji_type == _FEISHU_ACK_EMOJI
            or not message_id
-            or self._loop is None
+            or loop is None
+            or bool(getattr(loop, "is_closed", lambda: False)())
        ):
            return
        future = asyncio.run_coroutine_threadsafe(
            self._handle_reaction_event(event_type, data),
-            self._loop,
+            loop,
        )
        future.add_done_callback(self._log_background_failure)

    def _on_card_action_trigger(self, data: Any) -> Any:
        """Schedule Feishu card actions on the adapter loop and acknowledge immediately."""
-        if self._loop is None:
+        loop = self._loop
+        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
            logger.warning("[Feishu] Dropping card action before adapter loop is ready")
        else:
            future = asyncio.run_coroutine_threadsafe(
                self._handle_card_action_event(data),
-                self._loop,
+                loop,
            )
            future.add_done_callback(self._log_background_failure)
        if P2CardActionTriggerResponse is None:
@@ -1915,10 +2065,7 @@ class FeishuAdapter(BasePlatformAdapter):
        existing.media_urls.extend(event.media_urls)
        existing.media_types.extend(event.media_types)
        if event.text:
-            if not existing.text:
-                existing.text = event.text
-            elif event.text not in existing.text.split("\n\n"):
-                existing.text = f"{existing.text}\n\n{event.text}"
+            existing.text = self._merge_caption(existing.text, event.text)
        existing.timestamp = event.timestamp
        if event.message_id:
            existing.message_id = event.message_id
@@ -1962,6 +2109,10 @@ class FeishuAdapter(BasePlatformAdapter):
        default_ext: str,
        preferred_name: str,
    ) -> tuple[str, str]:
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(file_url):
+            raise ValueError(f"Blocked unsafe URL (SSRF protection): {file_url[:80]}")
+
        import httpx

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
@@ -2083,7 +2234,7 @@ class FeishuAdapter(BasePlatformAdapter):
        event_type = str((payload.get("header") or {}).get("event_type") or "")
        data = self._namespace_from_mapping(payload)
        if event_type == "im.message.receive_v1":
-            await self._handle_message_event_data(data)
+            self._on_message_event(data)
        elif event_type == "im.message.message_read_v1":
            self._on_message_read_event(data)
        elif event_type == "im.chat.member.bot.added_v1":
@@ -2093,7 +2244,7 @@ class FeishuAdapter(BasePlatformAdapter):
        elif event_type in ("im.message.reaction.created_v1", "im.message.reaction.deleted_v1"):
            self._on_reaction_event(event_type, data)
        elif event_type == "card.action.trigger":
-            asyncio.ensure_future(self._handle_card_action_event(data))
+            self._on_card_action_trigger(data)
        else:
            logger.debug("[Feishu] Ignoring webhook event type: %s", event_type or "unknown")
        return web.json_response({"code": 0, "msg": "ok"})
@@ -2657,18 +2808,41 @@ class FeishuAdapter(BasePlatformAdapter):
    # Group policy and mention gating
    # =========================================================================

-    def _allow_group_message(self, sender_id: Any) -> bool:
-        """Current group policy gate for non-DM traffic."""
-        if self._group_policy == "disabled":
-            return False
-        sender_open_id = getattr(sender_id, "open_id", None) or getattr(sender_id, "user_id", None)
-        if self._group_policy == "open":
-            return True
-        return bool(sender_open_id and sender_open_id in self._allowed_group_users)
+    def _allow_group_message(self, sender_id: Any, chat_id: str = "") -> bool:
+        """Per-group policy gate for non-DM traffic."""
+        sender_open_id = getattr(sender_id, "open_id", None)
+        sender_user_id = getattr(sender_id, "user_id", None)
+        sender_ids = {sender_open_id, sender_user_id} - {None}

-    def _should_accept_group_message(self, message: Any, sender_id: Any) -> bool:
+        if sender_ids and self._admins and (sender_ids & self._admins):
+            return True
+
+        rule = self._group_rules.get(chat_id) if chat_id else None
+        if rule:
+            policy = rule.policy
+            allowlist = rule.allowlist
+            blacklist = rule.blacklist
+        else:
+            policy = self._default_group_policy or self._group_policy
+            allowlist = self._allowed_group_users
+            blacklist = set()
+
+        if policy == "disabled":
+            return False
+        if policy == "open":
+            return True
+        if policy == "admin_only":
+            return False
+        if policy == "allowlist":
+            return bool(sender_ids and (sender_ids & allowlist))
+        if policy == "blacklist":
+            return bool(sender_ids and not (sender_ids & blacklist))
+
+        return bool(sender_ids and (sender_ids & self._allowed_group_users))
+
+    def _should_accept_group_message(self, message: Any, sender_id: Any, chat_id: str = "") -> bool:
        """Require an explicit @mention before group messages enter the agent."""
-        if not self._allow_group_message(sender_id):
+        if not self._allow_group_message(sender_id, chat_id):
            return False
        # @_all is Feishu's @everyone placeholder — always route to the bot.
        raw_content = getattr(message, "content", "") or ""
@@ -2965,6 +3139,12 @@ class FeishuAdapter(BasePlatformAdapter):
            raise RuntimeError("websockets not installed; websocket mode unavailable")
        domain = FEISHU_DOMAIN if self._domain_name != "lark" else LARK_DOMAIN
        self._client = self._build_lark_client(domain)
+        self._event_handler = self._build_event_handler()
+        if self._event_handler is None:
+            raise RuntimeError("failed to build Feishu event handler")
+        loop = self._loop
+        if loop is None or loop.is_closed():
+            raise RuntimeError("adapter loop is not ready")
        await self._hydrate_bot_identity()
        self._ws_client = FeishuWSClient(
            app_id=self._app_id,
@@ -2973,10 +3153,11 @@ class FeishuAdapter(BasePlatformAdapter):
            event_handler=self._event_handler,
            domain=domain,
        )
-        self._ws_future = self._loop.run_in_executor(
+        self._ws_future = loop.run_in_executor(
            None,
            _run_official_feishu_ws_client,
            self._ws_client,
+            self,
        )

    async def _connect_webhook(self) -> None:
@@ -2984,6 +3165,9 @@ class FeishuAdapter(BasePlatformAdapter):
            raise RuntimeError("aiohttp not installed; webhook mode unavailable")
        domain = FEISHU_DOMAIN if self._domain_name != "lark" else LARK_DOMAIN
        self._client = self._build_lark_client(domain)
+        self._event_handler = self._build_event_handler()
+        if self._event_handler is None:
+            raise RuntimeError("failed to build Feishu event handler")
        await self._hydrate_bot_identity()
        app = web.Application()
        app.router.add_post(self._webhook_path, self._handle_webhook_request)
@@ -10,6 +10,7 @@ Environment variables:
    MATRIX_USER_ID              Full user ID (@bot:server) — required for password login
    MATRIX_PASSWORD             Password (alternative to access token)
    MATRIX_ENCRYPTION           Set "true" to enable E2EE
+    MATRIX_DEVICE_ID            Stable device ID for E2EE persistence across restarts
    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
    MATRIX_REACTIONS        Set "false" to disable processing lifecycle reactions
@@ -65,6 +66,21 @@ _MAX_PENDING_EVENTS = 100
 _PENDING_EVENT_TTL = 300  # seconds — stop retrying after 5 min


+_E2EE_INSTALL_HINT = (
+    "Install with: pip install 'matrix-nio[e2e]'  "
+    "(requires libolm C library)"
+)
+
+
+def _check_e2ee_deps() -> bool:
+    """Return True if matrix-nio E2EE dependencies (python-olm) are available."""
+    try:
+        from nio.crypto import ENCRYPTION_ENABLED
+        return bool(ENCRYPTION_ENABLED)
+    except (ImportError, AttributeError):
+        return False
+
+
 def check_matrix_requirements() -> bool:
    """Return True if the Matrix adapter can be used."""
    token = os.getenv("MATRIX_ACCESS_TOKEN", "")
@@ -79,7 +95,6 @@ def check_matrix_requirements() -> bool:
        return False
    try:
        import nio  # noqa: F401
-        return True
    except ImportError:
        logger.warning(
            "Matrix: matrix-nio not installed. "
@@ -87,6 +102,20 @@ def check_matrix_requirements() -> bool:
        )
        return False

+    # If encryption is requested, verify E2EE deps are available at startup
+    # rather than silently degrading to plaintext-only at connect time.
+    encryption_requested = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
+    if encryption_requested and not _check_e2ee_deps():
+        logger.error(
+            "Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. "
+            "Without this, encrypted rooms will not work. "
+            "Set MATRIX_ENCRYPTION=false to disable E2EE.",
+            _E2EE_INSTALL_HINT,
+        )
+        return False
+
+    return True
+

 class MatrixAdapter(BasePlatformAdapter):
    """Gateway adapter for Matrix (any homeserver)."""
@@ -111,6 +140,10 @@ class MatrixAdapter(BasePlatformAdapter):
            "encryption",
            os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes"),
        )
+        self._device_id: str = (
+            config.extra.get("device_id", "")
+            or os.getenv("MATRIX_DEVICE_ID", "")
+        )

        self._client: Any = None  # nio.AsyncClient
        self._sync_task: Optional[asyncio.Task] = None
@@ -169,24 +202,42 @@ class MatrixAdapter(BasePlatformAdapter):
        _STORE_DIR.mkdir(parents=True, exist_ok=True)

        # Create the client.
+        # When a stable device_id is configured, pass it to the constructor
+        # so matrix-nio binds to it from the start (important for E2EE
+        # crypto-store persistence across restarts).
+        ctor_device_id = self._device_id or None
        if self._encryption:
+            if not _check_e2ee_deps():
+                logger.error(
+                    "Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. "
+                    "Refusing to connect — encrypted rooms would silently fail.",
+                    _E2EE_INSTALL_HINT,
+                )
+                return False
            try:
                client = nio.AsyncClient(
                    self._homeserver,
                    self._user_id or "",
+                    device_id=ctor_device_id,
                    store_path=store_path,
                )
-                logger.info("Matrix: E2EE enabled (store: %s)", store_path)
-            except Exception as exc:
-                logger.warning(
-                    "Matrix: failed to create E2EE client (%s), "
-                    "falling back to plain client. Install: "
-                    "pip install 'matrix-nio[e2e]'",
-                    exc,
+                logger.info(
+                    "Matrix: E2EE enabled (store: %s%s)",
+                    store_path,
+                    f", device_id={self._device_id}" if self._device_id else "",
                )
-                client = nio.AsyncClient(self._homeserver, self._user_id or "")
+            except Exception as exc:
+                logger.error(
+                    "Matrix: failed to create E2EE client: %s. %s",
+                    exc, _E2EE_INSTALL_HINT,
+                )
+                return False
        else:
-            client = nio.AsyncClient(self._homeserver, self._user_id or "")
+            client = nio.AsyncClient(
+                self._homeserver,
+                self._user_id or "",
+                device_id=ctor_device_id,
+            )

        self._client = client

@@ -205,30 +256,36 @@ class MatrixAdapter(BasePlatformAdapter):
                if resolved_user_id:
                    self._user_id = resolved_user_id

+                # Prefer the user-configured device_id (MATRIX_DEVICE_ID) so
+                # the bot reuses a stable identity across restarts.  Fall back
+                # to whatever whoami returned.
+                effective_device_id = self._device_id or resolved_device_id
+
                # restore_login() is the matrix-nio path that binds the access
                # token to a specific device and loads the crypto store.
-                if resolved_device_id and hasattr(client, "restore_login"):
+                if effective_device_id and hasattr(client, "restore_login"):
                    client.restore_login(
                        self._user_id or resolved_user_id,
-                        resolved_device_id,
+                        effective_device_id,
                        self._access_token,
                    )
                else:
                    if self._user_id:
                        client.user_id = self._user_id
-                    if resolved_device_id:
-                        client.device_id = resolved_device_id
+                    if effective_device_id:
+                        client.device_id = effective_device_id
                    client.access_token = self._access_token
                    if self._encryption:
                        logger.warning(
                            "Matrix: access-token login did not restore E2EE state; "
-                            "encrypted rooms may fail until a device_id is available"
+                            "encrypted rooms may fail until a device_id is available. "
+                            "Set MATRIX_DEVICE_ID to a stable value."
                        )

                logger.info(
                    "Matrix: using access token for %s%s",
                    self._user_id or "(unknown user)",
-                    f" (device {resolved_device_id})" if resolved_device_id else "",
+                    f" (device {effective_device_id})" if effective_device_id else "",
                )
            else:
                logger.error(
@@ -271,10 +328,15 @@ class MatrixAdapter(BasePlatformAdapter):
                except Exception as exc:
                    logger.debug("Matrix: could not import keys: %s", exc)
        elif self._encryption:
-            logger.warning(
-                "Matrix: E2EE requested but crypto store is not loaded; "
-                "encrypted rooms may fail"
+            # E2EE was requested but the crypto store failed to load —
+            # this means encrypted rooms will silently not work.  Hard-fail.
+            logger.error(
+                "Matrix: E2EE requested but crypto store is not loaded — "
+                "cannot decrypt or encrypt messages. %s",
+                _E2EE_INSTALL_HINT,
            )
+            await client.close()
+            return False

        # Register event callbacks.
        client.add_event_callback(self._on_room_message, nio.RoomMessageText)
@@ -524,6 +586,11 @@ class MatrixAdapter(BasePlatformAdapter):
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Download an image URL and upload it to Matrix."""
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(image_url):
+            logger.warning("Matrix: blocked unsafe image URL (SSRF protection)")
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
        try:
            # Try aiohttp first (always available), fall back to httpx
            try:
@@ -995,7 +1062,7 @@ class MatrixAdapter(BasePlatformAdapter):

        # Message type.
        msg_type = MessageType.TEXT
-        if body.startswith("!") or body.startswith("/"):
+        if body.startswith(("!", "/")):
            msg_type = MessageType.COMMAND

        source = self.build_source(
@@ -407,6 +407,11 @@ class MattermostAdapter(BasePlatformAdapter):
        kind: str = "file",
    ) -> SendResult:
        """Download a URL and upload it as a file attachment."""
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(url):
+            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
+            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
+
        import asyncio
        import aiohttp

@@ -430,7 +435,6 @@ class MattermostAdapter(BasePlatformAdapter):
                    ct = resp.content_type or "application/octet-stream"
                    break
            except (aiohttp.ClientError, asyncio.TimeoutError) as exc:
-                last_exc = exc
                if attempt < 2:
                    await asyncio.sleep(1.5 * (attempt + 1))
                    continue
@@ -701,6 +705,15 @@ class MattermostAdapter(BasePlatformAdapter):
            except Exception as exc:
                logger.warning("Mattermost: error downloading file %s: %s", fid, exc)

+        # Set message type based on downloaded media types.
+        if media_types and msg_type == MessageType.TEXT:
+            if any(m.startswith("image/") for m in media_types):
+                msg_type = MessageType.PHOTO
+            elif any(m.startswith("audio/") for m in media_types):
+                msg_type = MessageType.VOICE
+            elif media_types:
+                msg_type = MessageType.DOCUMENT
+
        source = self.build_source(
            chat_id=channel_id,
            chat_type=chat_type,
@@ -717,19 +717,27 @@ class SignalAdapter(BasePlatformAdapter):
            return SendResult(success=True)
        return SendResult(success=False, error="RPC send with attachment failed")

-    async def send_document(
+    async def _send_attachment(
        self,
        chat_id: str,
        file_path: str,
+        media_label: str,
        caption: Optional[str] = None,
-        filename: Optional[str] = None,
-        **kwargs,
    ) -> SendResult:
-        """Send a document/file attachment."""
+        """Send any file as a Signal attachment via RPC.
+
+        Shared implementation for send_document, send_image_file, send_voice,
+        and send_video — avoids duplicating the validation/routing/RPC logic.
+        """
        await self._stop_typing_indicator(chat_id)

-        if not Path(file_path).exists():
-            return SendResult(success=False, error="File not found")
+        try:
+            file_size = Path(file_path).stat().st_size
+        except FileNotFoundError:
+            return SendResult(success=False, error=f"{media_label} file not found: {file_path}")
+
+        if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
+            return SendResult(success=False, error=f"{media_label} too large ({file_size} bytes)")

        params: Dict[str, Any] = {
            "account": self.account,
@@ -746,7 +754,59 @@ class SignalAdapter(BasePlatformAdapter):
        if result is not None:
            self._track_sent_timestamp(result)
            return SendResult(success=True)
-        return SendResult(success=False, error="RPC send document failed")
+        return SendResult(success=False, error=f"RPC send {media_label.lower()} failed")
+
+    async def send_document(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str] = None,
+        filename: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send a document/file attachment."""
+        return await self._send_attachment(chat_id, file_path, "File", caption)
+
+    async def send_image_file(
+        self,
+        chat_id: str,
+        image_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send a local image file as a native Signal attachment.
+
+        Called by the gateway media delivery flow when MEDIA: tags containing
+        image paths are extracted from agent responses.
+        """
+        return await self._send_attachment(chat_id, image_path, "Image", caption)
+
+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send an audio file as a Signal attachment.
+
+        Signal does not distinguish voice messages from file attachments at
+        the API level, so this routes through the same RPC send path.
+        """
+        return await self._send_attachment(chat_id, audio_path, "Audio", caption)
+
+    async def send_video(
+        self,
+        chat_id: str,
+        video_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send a video file as a Signal attachment."""
+        return await self._send_attachment(chat_id, video_path, "Video", caption)

    # ------------------------------------------------------------------
    # Typing Indicators
@@ -84,6 +84,17 @@ class SlackAdapter(BasePlatformAdapter):
        self._seen_messages: Dict[str, float] = {}
        self._SEEN_TTL = 300   # 5 minutes
        self._SEEN_MAX = 2000  # prune threshold
+        # Track pending approval message_ts → resolved flag to prevent
+        # double-clicks on approval buttons.
+        self._approval_resolved: Dict[str, bool] = {}
+        # Track timestamps of messages sent by the bot so we can respond
+        # to thread replies even without an explicit @mention.
+        self._bot_message_ts: set = set()
+        self._BOT_TS_MAX = 5000  # cap to avoid unbounded growth
+        # Track threads where the bot has been @mentioned — once mentioned,
+        # respond to ALL subsequent messages in that thread automatically.
+        self._mentioned_threads: set = set()
+        self._MENTIONED_THREADS_MAX = 5000

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -176,6 +187,15 @@ class SlackAdapter(BasePlatformAdapter):
                await ack()
                await self._handle_slash_command(command)

+            # Register Block Kit action handlers for approval buttons
+            for _action_id in (
+                "hermes_approve_once",
+                "hermes_approve_session",
+                "hermes_approve_always",
+                "hermes_deny",
+            ):
+                self._app.action(_action_id)(self._handle_approval_action)
+
            # Start Socket Mode handler in background
            self._handler = AsyncSocketModeHandler(self._app, app_token)
            self._socket_mode_task = asyncio.create_task(self._handler.start_async())
@@ -256,9 +276,22 @@ class SlackAdapter(BasePlatformAdapter):

                last_result = await self._get_client(chat_id).chat_postMessage(**kwargs)

+            # Track the sent message ts so we can auto-respond to thread
+            # replies without requiring @mention.
+            sent_ts = last_result.get("ts") if last_result else None
+            if sent_ts:
+                self._bot_message_ts.add(sent_ts)
+                # Also register the thread root so replies-to-my-replies work
+                if thread_ts:
+                    self._bot_message_ts.add(thread_ts)
+                if len(self._bot_message_ts) > self._BOT_TS_MAX:
+                    excess = len(self._bot_message_ts) - self._BOT_TS_MAX // 2
+                    for old_ts in list(self._bot_message_ts)[:excess]:
+                        self._bot_message_ts.discard(old_ts)
+
            return SendResult(
                success=True,
-                message_id=last_result.get("ts") if last_result else None,
+                message_id=sent_ts,
                raw_response=last_result,
            )

@@ -276,10 +309,13 @@ class SlackAdapter(BasePlatformAdapter):
        if not self._app:
            return SendResult(success=False, error="Not connected")
        try:
+            # Convert standard markdown → Slack mrkdwn
+            formatted = self.format_message(content)
+
            await self._get_client(chat_id).chat_update(
                channel=chat_id,
                ts=message_id,
-                text=content,
+                text=formatted,
            )
            return SendResult(success=True, message_id=message_id)
        except Exception as e:  # pragma: no cover - defensive logging
@@ -559,6 +595,11 @@ class SlackAdapter(BasePlatformAdapter):
        if not self._app:
            return SendResult(success=False, error="Not connected")

+        from tools.url_safety import is_safe_url
+        if not is_safe_url(image_url):
+            logger.warning("[Slack] Blocked unsafe image URL (SSRF protection)")
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
        try:
            import httpx

@@ -763,13 +804,61 @@ class SlackAdapter(BasePlatformAdapter):
        else:
            thread_ts = event.get("thread_ts") or ts  # ts fallback for channels

-        # In channels, only respond if bot is mentioned
+        # In channels, respond if:
+        #   1. The bot is @mentioned in this message, OR
+        #   2. The message is a reply in a thread the bot started/participated in, OR
+        #   3. The message is in a thread where the bot was previously @mentioned, OR
+        #   4. There's an existing session for this thread (survives restarts)
        bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
-        if not is_dm and bot_uid:
-            if f"<@{bot_uid}>" not in text:
+        is_mentioned = bot_uid and f"<@{bot_uid}>" in text
+        event_thread_ts = event.get("thread_ts")
+        is_thread_reply = bool(event_thread_ts and event_thread_ts != ts)
+
+        if not is_dm and bot_uid and not is_mentioned:
+            reply_to_bot_thread = (
+                is_thread_reply and event_thread_ts in self._bot_message_ts
+            )
+            in_mentioned_thread = (
+                event_thread_ts is not None
+                and event_thread_ts in self._mentioned_threads
+            )
+            has_session = (
+                is_thread_reply
+                and self._has_active_session_for_thread(
+                    channel_id=channel_id,
+                    thread_ts=event_thread_ts,
+                    user_id=user_id,
+                )
+            )
+            if not reply_to_bot_thread and not in_mentioned_thread and not has_session:
                return
+
+        if is_mentioned:
            # Strip the bot mention from the text
            text = text.replace(f"<@{bot_uid}>", "").strip()
+            # Register this thread so all future messages auto-trigger the bot
+            if event_thread_ts:
+                self._mentioned_threads.add(event_thread_ts)
+                if len(self._mentioned_threads) > self._MENTIONED_THREADS_MAX:
+                    to_remove = list(self._mentioned_threads)[:self._MENTIONED_THREADS_MAX // 2]
+                    for t in to_remove:
+                        self._mentioned_threads.discard(t)
+
+        # When entering a thread for the first time (no existing session),
+        # fetch thread context so the agent understands the conversation.
+        if is_thread_reply and not self._has_active_session_for_thread(
+            channel_id=channel_id,
+            thread_ts=event_thread_ts,
+            user_id=user_id,
+        ):
+            thread_context = await self._fetch_thread_context(
+                channel_id=channel_id,
+                thread_ts=event_thread_ts,
+                current_ts=ts,
+                team_id=team_id,
+            )
+            if thread_context:
+                text = thread_context + text

        # Determine message type
        msg_type = MessageType.TEXT
@@ -892,6 +981,233 @@ class SlackAdapter(BasePlatformAdapter):
        await self._remove_reaction(channel_id, ts, "eyes")
        await self._add_reaction(channel_id, ts, "white_check_mark")

+    # ----- Approval button support (Block Kit) -----
+
+    async def send_exec_approval(
+        self, chat_id: str, command: str, session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a Block Kit approval prompt with interactive buttons.
+
+        The buttons call ``resolve_gateway_approval()`` to unblock the waiting
+        agent thread — same mechanism as the text ``/approve`` flow.
+        """
+        if not self._app:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            cmd_preview = command[:2900] + "..." if len(command) > 2900 else command
+            thread_ts = self._resolve_thread_ts(None, metadata)
+
+            blocks = [
+                {
+                    "type": "section",
+                    "text": {
+                        "type": "mrkdwn",
+                        "text": (
+                            f":warning: *Command Approval Required*\n"
+                            f"```{cmd_preview}```\n"
+                            f"Reason: {description}"
+                        ),
+                    },
+                },
+                {
+                    "type": "actions",
+                    "elements": [
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Allow Once"},
+                            "style": "primary",
+                            "action_id": "hermes_approve_once",
+                            "value": session_key,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Allow Session"},
+                            "action_id": "hermes_approve_session",
+                            "value": session_key,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Always Allow"},
+                            "action_id": "hermes_approve_always",
+                            "value": session_key,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Deny"},
+                            "style": "danger",
+                            "action_id": "hermes_deny",
+                            "value": session_key,
+                        },
+                    ],
+                },
+            ]
+
+            kwargs: Dict[str, Any] = {
+                "channel": chat_id,
+                "text": f"⚠️ Command approval required: {cmd_preview[:100]}",
+                "blocks": blocks,
+            }
+            if thread_ts:
+                kwargs["thread_ts"] = thread_ts
+
+            result = await self._get_client(chat_id).chat_postMessage(**kwargs)
+            msg_ts = result.get("ts", "")
+            if msg_ts:
+                self._approval_resolved[msg_ts] = False
+
+            return SendResult(success=True, message_id=msg_ts, raw_response=result)
+        except Exception as e:
+            logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
+            return SendResult(success=False, error=str(e))
+
+    async def _handle_approval_action(self, ack, body, action) -> None:
+        """Handle an approval button click from Block Kit."""
+        await ack()
+
+        action_id = action.get("action_id", "")
+        session_key = action.get("value", "")
+        message = body.get("message", {})
+        msg_ts = message.get("ts", "")
+        channel_id = body.get("channel", {}).get("id", "")
+        user_name = body.get("user", {}).get("name", "unknown")
+
+        # Map action_id to approval choice
+        choice_map = {
+            "hermes_approve_once": "once",
+            "hermes_approve_session": "session",
+            "hermes_approve_always": "always",
+            "hermes_deny": "deny",
+        }
+        choice = choice_map.get(action_id, "deny")
+
+        # Prevent double-clicks
+        if self._approval_resolved.get(msg_ts, False):
+            return
+        self._approval_resolved[msg_ts] = True
+
+        # Update the message to show the decision and remove buttons
+        label_map = {
+            "once": f"✅ Approved once by {user_name}",
+            "session": f"✅ Approved for session by {user_name}",
+            "always": f"✅ Approved permanently by {user_name}",
+            "deny": f"❌ Denied by {user_name}",
+        }
+        decision_text = label_map.get(choice, f"Resolved by {user_name}")
+
+        # Get original text from the section block
+        original_text = ""
+        for block in message.get("blocks", []):
+            if block.get("type") == "section":
+                original_text = block.get("text", {}).get("text", "")
+                break
+
+        updated_blocks = [
+            {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": original_text or "Command approval request",
+                },
+            },
+            {
+                "type": "context",
+                "elements": [
+                    {"type": "mrkdwn", "text": decision_text},
+                ],
+            },
+        ]
+
+        try:
+            await self._get_client(channel_id).chat_update(
+                channel=channel_id,
+                ts=msg_ts,
+                text=decision_text,
+                blocks=updated_blocks,
+            )
+        except Exception as e:
+            logger.warning("[Slack] Failed to update approval message: %s", e)
+
+        # Resolve the approval — this unblocks the agent thread
+        try:
+            from tools.approval import resolve_gateway_approval
+            count = resolve_gateway_approval(session_key, choice)
+            logger.info(
+                "Slack button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                count, session_key, choice, user_name,
+            )
+        except Exception as exc:
+            logger.error("Failed to resolve gateway approval from Slack button: %s", exc)
+
+        # Clean up stale approval state
+        self._approval_resolved.pop(msg_ts, None)
+
+    # ----- Thread context fetching -----
+
+    async def _fetch_thread_context(
+        self, channel_id: str, thread_ts: str, current_ts: str,
+        team_id: str = "", limit: int = 30,
+    ) -> str:
+        """Fetch recent thread messages to provide context when the bot is
+        mentioned mid-thread for the first time.
+
+        Returns a formatted string with thread history, or empty string on
+        failure or if the thread is empty (just the parent message).
+        """
+        try:
+            client = self._get_client(channel_id)
+            result = await client.conversations_replies(
+                channel=channel_id,
+                ts=thread_ts,
+                limit=limit + 1,  # +1 because it includes the current message
+                inclusive=True,
+            )
+            messages = result.get("messages", [])
+            if not messages:
+                return ""
+
+            context_parts = []
+            for msg in messages:
+                msg_ts = msg.get("ts", "")
+                # Skip the current message (the one that triggered this fetch)
+                if msg_ts == current_ts:
+                    continue
+                # Skip bot messages from ourselves
+                if msg.get("bot_id") or msg.get("subtype") == "bot_message":
+                    continue
+
+                msg_user = msg.get("user", "unknown")
+                msg_text = msg.get("text", "").strip()
+                if not msg_text:
+                    continue
+
+                # Strip bot mentions from context messages
+                bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
+                if bot_uid:
+                    msg_text = msg_text.replace(f"<@{bot_uid}>", "").strip()
+
+                # Mark the thread parent
+                is_parent = msg_ts == thread_ts
+                prefix = "[thread parent] " if is_parent else ""
+
+                # Resolve user name (cached)
+                name = await self._resolve_user_name(msg_user, chat_id=channel_id)
+                context_parts.append(f"{prefix}{name}: {msg_text}")
+
+            if not context_parts:
+                return ""
+
+            return (
+                "[Thread context — previous messages in this thread:]\n"
+                + "\n".join(context_parts)
+                + "\n[End of thread context]\n\n"
+            )
+        except Exception as e:
+            logger.warning("[Slack] Failed to fetch thread context: %s", e)
+            return ""
+
    async def _handle_slash_command(self, command: dict) -> None:
        """Handle /hermes slash command."""
        text = command.get("text", "").strip()
@@ -933,6 +1249,53 @@ class SlackAdapter(BasePlatformAdapter):

        await self.handle_message(event)

+    def _has_active_session_for_thread(
+        self,
+        channel_id: str,
+        thread_ts: str,
+        user_id: str,
+    ) -> bool:
+        """Check if there's an active session for a thread.
+
+        Used to determine if thread replies without @mentions should be
+        processed (they should if there's an active session).
+
+        Uses ``build_session_key()`` as the single source of truth for key
+        construction — avoids the bug where manual key building didn't
+        respect ``thread_sessions_per_user`` and ``group_sessions_per_user``
+        settings correctly.
+        """
+        session_store = getattr(self, "_session_store", None)
+        if not session_store:
+            return False
+
+        try:
+            from gateway.session import SessionSource, build_session_key
+
+            source = SessionSource(
+                platform=Platform.SLACK,
+                chat_id=channel_id,
+                chat_type="group",
+                user_id=user_id,
+                thread_id=thread_ts,
+            )
+
+            # Read session isolation settings from the store's config
+            store_cfg = getattr(session_store, "config", None)
+            gspu = getattr(store_cfg, "group_sessions_per_user", True) if store_cfg else True
+            tspu = getattr(store_cfg, "thread_sessions_per_user", False) if store_cfg else False
+
+            session_key = build_session_key(
+                source,
+                group_sessions_per_user=gspu,
+                thread_sessions_per_user=tspu,
+            )
+
+            session_store._ensure_loaded()
+            return session_key in session_store._entries
+        except Exception:
+            return False
+
    async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
        """Download a Slack file using the bot token for auth, with retry."""
        import asyncio
@@ -151,6 +151,10 @@ class TelegramAdapter(BasePlatformAdapter):
        self._dm_topics: Dict[str, int] = {}
        # DM Topics config from extra.dm_topics
        self._dm_topics_config: List[Dict[str, Any]] = self.config.extra.get("dm_topics", [])
+        # Interactive model picker state per chat
+        self._model_picker_state: Dict[str, dict] = {}
+        # Approval button state: message_id → session_key
+        self._approval_state: Dict[int, str] = {}

    def _fallback_ips(self) -> list[str]:
        """Return validated fallback IPs from config (populated by _apply_env_overrides)."""
@@ -518,7 +522,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )
            if fallback_ips:
-                logger.warning(
+                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
                    self.name,
                    ", ".join(fallback_ips),
@@ -1008,14 +1012,432 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.warning("[%s] send_update_prompt failed: %s", self.name, e)
            return SendResult(success=False, error=str(e))

+    async def send_exec_approval(
+        self, chat_id: str, command: str, session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an inline-keyboard approval prompt with interactive buttons.
+
+        The buttons call ``resolve_gateway_approval()`` to unblock the waiting
+        agent thread — same mechanism as the text ``/approve`` flow.
+        """
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
+            text = (
+                f"⚠️ *Command Approval Required*\n\n"
+                f"`{cmd_preview}`\n\n"
+                f"Reason: {description}"
+            )
+
+            # Resolve thread context for thread replies
+            thread_id = None
+            if metadata:
+                thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
+
+            # We'll use the message_id as part of callback_data to look up session_key
+            # Send a placeholder first, then update — or use a counter.
+            # Simpler: use a monotonic counter to generate short IDs.
+            import itertools
+            if not hasattr(self, "_approval_counter"):
+                self._approval_counter = itertools.count(1)
+            approval_id = next(self._approval_counter)
+
+            keyboard = InlineKeyboardMarkup([
+                [
+                    InlineKeyboardButton("✅ Allow Once", callback_data=f"ea:once:{approval_id}"),
+                    InlineKeyboardButton("✅ Session", callback_data=f"ea:session:{approval_id}"),
+                ],
+                [
+                    InlineKeyboardButton("✅ Always", callback_data=f"ea:always:{approval_id}"),
+                    InlineKeyboardButton("❌ Deny", callback_data=f"ea:deny:{approval_id}"),
+                ],
+            ])
+
+            kwargs: Dict[str, Any] = {
+                "chat_id": int(chat_id),
+                "text": text,
+                "parse_mode": ParseMode.MARKDOWN,
+                "reply_markup": keyboard,
+            }
+            if thread_id:
+                kwargs["message_thread_id"] = int(thread_id)
+
+            msg = await self._bot.send_message(**kwargs)
+
+            # Store session_key keyed by approval_id for the callback handler
+            self._approval_state[approval_id] = session_key
+
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
+    async def send_model_picker(
+        self,
+        chat_id: str,
+        providers: list,
+        current_model: str,
+        current_provider: str,
+        session_key: str,
+        on_model_selected,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an interactive inline-keyboard model picker.
+
+        Two-step drill-down: provider selection → model selection.
+        Edits the same message in-place as the user navigates.
+        """
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            from hermes_cli.providers import get_label
+        except ImportError:
+            def get_label(slug):
+                return slug
+
+        try:
+            # Build provider buttons — 2 per row
+            buttons: list = []
+            for p in providers:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count})"
+                if p.get("is_current"):
+                    label = f"✓ {label}"
+                # Compact callback data: mp:<slug>  (max 64 bytes)
+                buttons.append(
+                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+                )
+
+            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            keyboard = InlineKeyboardMarkup(rows)
+
+            provider_label = get_label(current_provider)
+            text = (
+                f"⚙ *Model Configuration*\n\n"
+                f"Current model: `{current_model or 'unknown'}`\n"
+                f"Provider: {provider_label}\n\n"
+                f"Select a provider:"
+            )
+
+            thread_id = metadata.get("thread_id") if metadata else None
+            msg = await self._bot.send_message(
+                chat_id=int(chat_id),
+                text=text,
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+                message_thread_id=int(thread_id) if thread_id else None,
+            )
+
+            # Store picker state keyed by chat_id
+            self._model_picker_state[str(chat_id)] = {
+                "msg_id": msg.message_id,
+                "providers": providers,
+                "session_key": session_key,
+                "on_model_selected": on_model_selected,
+                "current_model": current_model,
+                "current_provider": current_provider,
+            }
+
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_model_picker failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
+    _MODEL_PAGE_SIZE = 8
+
+    def _build_model_keyboard(self, models: list, page: int) -> tuple:
+        """Build paginated model buttons. Returns (keyboard, page_info_text)."""
+        page_size = self._MODEL_PAGE_SIZE
+        total = len(models)
+        total_pages = max(1, (total + page_size - 1) // page_size)
+        page = max(0, min(page, total_pages - 1))
+
+        start = page * page_size
+        end = min(start + page_size, total)
+        page_models = models[start:end]
+
+        buttons: list = []
+        for i, model_id in enumerate(page_models):
+            abs_idx = start + i
+            short = model_id.split("/")[-1] if "/" in model_id else model_id
+            if len(short) > 38:
+                short = short[:35] + "..."
+            buttons.append(
+                InlineKeyboardButton(short, callback_data=f"mm:{abs_idx}")
+            )
+
+        rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+
+        # Pagination row (if needed)
+        if total_pages > 1:
+            nav: list = []
+            if page > 0:
+                nav.append(InlineKeyboardButton("◀ Prev", callback_data=f"mg:{page - 1}"))
+            nav.append(InlineKeyboardButton(f"{page + 1}/{total_pages}", callback_data="mx:noop"))
+            if page < total_pages - 1:
+                nav.append(InlineKeyboardButton("Next ▶", callback_data=f"mg:{page + 1}"))
+            rows.append(nav)
+
+        rows.append([
+            InlineKeyboardButton("◀ Back", callback_data="mb"),
+            InlineKeyboardButton("✗ Cancel", callback_data="mx"),
+        ])
+
+        page_info = f" ({start + 1}–{end} of {total})" if total_pages > 1 else ""
+        return InlineKeyboardMarkup(rows), page_info
+
+    async def _handle_model_picker_callback(
+        self, query, data: str, chat_id: str
+    ) -> None:
+        """Handle model picker inline keyboard callbacks (mp:/mm:/mb:/mx:/mg:)."""
+        state = self._model_picker_state.get(chat_id)
+        if not state:
+            await query.answer(text="Picker expired — use /model again.")
+            return
+
+        try:
+            from hermes_cli.providers import get_label
+        except ImportError:
+            def get_label(slug):
+                return slug
+
+        if data.startswith("mp:"):
+            # --- Provider selected: show model buttons (page 0) ---
+            provider_slug = data[3:]
+            provider = next(
+                (p for p in state["providers"] if p["slug"] == provider_slug),
+                None,
+            )
+            if not provider:
+                await query.answer(text="Provider not found.")
+                return
+
+            models = provider.get("models", [])
+            state["selected_provider"] = provider_slug
+            state["selected_provider_name"] = provider.get("name", provider_slug)
+            state["model_list"] = models
+            state["model_page"] = 0
+
+            keyboard, page_info = self._build_model_keyboard(models, 0)
+
+            pname = provider.get("name", provider_slug)
+            total = provider.get("total_models", len(models))
+            shown = len(models)
+            extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Provider: *{pname}*{page_info}\n"
+                    f"Select a model:{extra}"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data.startswith("mg:"):
+            # --- Page navigation ---
+            try:
+                page = int(data[3:])
+            except ValueError:
+                await query.answer(text="Invalid page.")
+                return
+
+            models = state.get("model_list", [])
+            state["model_page"] = page
+
+            keyboard, page_info = self._build_model_keyboard(models, page)
+
+            pname = state.get("selected_provider_name", "")
+            provider_slug = state.get("selected_provider", "")
+            provider = next(
+                (p for p in state["providers"] if p["slug"] == provider_slug),
+                None,
+            )
+            total = provider.get("total_models", len(models)) if provider else len(models)
+            shown = len(models)
+            extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Provider: *{pname}*{page_info}\n"
+                    f"Select a model:{extra}"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data.startswith("mm:"):
+            # --- Model selected: perform the switch ---
+            try:
+                idx = int(data[3:])
+            except ValueError:
+                await query.answer(text="Invalid selection.")
+                return
+
+            model_list = state.get("model_list", [])
+            if idx < 0 or idx >= len(model_list):
+                await query.answer(text="Invalid model index.")
+                return
+
+            model_id = model_list[idx]
+            provider_slug = state.get("selected_provider", "")
+            callback = state.get("on_model_selected")
+
+            if not callback:
+                await query.answer(text="Picker expired.")
+                return
+
+            try:
+                result_text = await callback(chat_id, model_id, provider_slug)
+            except Exception as exc:
+                logger.error("Model picker switch failed: %s", exc)
+                result_text = f"Error switching model: {exc}"
+
+            # Edit message to show confirmation, remove buttons
+            try:
+                await query.edit_message_text(
+                    text=result_text,
+                    parse_mode=ParseMode.MARKDOWN,
+                    reply_markup=None,
+                )
+            except Exception:
+                # Markdown parse failure — retry as plain text
+                try:
+                    await query.edit_message_text(
+                        text=result_text,
+                        parse_mode=None,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass
+            await query.answer(text="Model switched!")
+
+            # Clean up state
+            self._model_picker_state.pop(chat_id, None)
+
+        elif data == "mb":
+            # --- Back to provider list ---
+            buttons = []
+            for p in state["providers"]:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count})"
+                if p.get("is_current"):
+                    label = f"✓ {label}"
+                buttons.append(
+                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+                )
+
+            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            keyboard = InlineKeyboardMarkup(rows)
+
+            try:
+                provider_label = get_label(state["current_provider"])
+            except Exception:
+                provider_label = state["current_provider"]
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Current model: `{state['current_model'] or 'unknown'}`\n"
+                    f"Provider: {provider_label}\n\n"
+                    f"Select a provider:"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data == "mx":
+            # --- Cancel ---
+            self._model_picker_state.pop(chat_id, None)
+            await query.edit_message_text(
+                text="Model selection cancelled.",
+                reply_markup=None,
+            )
+            await query.answer()
+
+        else:
+            # Catch-all (e.g. page counter button "mx:noop")
+            await query.answer()
+
    async def _handle_callback_query(
        self, update: "Update", context: "ContextTypes.DEFAULT_TYPE"
    ) -> None:
-        """Handle inline keyboard button clicks (update prompts)."""
+        """Handle inline keyboard button clicks."""
        query = update.callback_query
        if not query or not query.data:
            return
        data = query.data
+
+        # --- Model picker callbacks ---
+        if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
+            chat_id = str(query.message.chat_id) if query.message else None
+            if chat_id:
+                await self._handle_model_picker_callback(query, data, chat_id)
+            return
+
+        # --- Exec approval callbacks (ea:choice:id) ---
+        if data.startswith("ea:"):
+            parts = data.split(":", 2)
+            if len(parts) == 3:
+                choice = parts[1]  # once, session, always, deny
+                try:
+                    approval_id = int(parts[2])
+                except (ValueError, IndexError):
+                    await query.answer(text="Invalid approval data.")
+                    return
+
+                session_key = self._approval_state.pop(approval_id, None)
+                if not session_key:
+                    await query.answer(text="This approval has already been resolved.")
+                    return
+
+                # Map choice to human-readable label
+                label_map = {
+                    "once": "✅ Approved once",
+                    "session": "✅ Approved for session",
+                    "always": "✅ Approved permanently",
+                    "deny": "❌ Denied",
+                }
+                user_display = getattr(query.from_user, "first_name", "User")
+                label = label_map.get(choice, "Resolved")
+
+                await query.answer(text=label)
+
+                # Edit message to show decision, remove buttons
+                try:
+                    await query.edit_message_text(
+                        text=f"{label} by {user_display}",
+                        parse_mode=ParseMode.MARKDOWN,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass  # non-fatal if edit fails
+
+                # Resolve the approval — unblocks the agent thread
+                try:
+                    from tools.approval import resolve_gateway_approval
+                    count = resolve_gateway_approval(session_key, choice)
+                    logger.info(
+                        "Telegram button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                        count, session_key, choice, user_display,
+                    )
+                except Exception as exc:
+                    logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
+            return
+
+        # --- Update prompt callbacks ---
        if not data.startswith("update_prompt:"):
            return
        answer = data.split(":", 1)[1]  # "y" or "n"
@@ -1063,7 +1485,7 @@ class TelegramAdapter(BasePlatformAdapter):
            
            with open(audio_path, "rb") as audio_file:
                # .ogg files -> send as voice (round playable bubble)
-                if audio_path.endswith(".ogg") or audio_path.endswith(".opus"):
+                if audio_path.endswith((".ogg", ".opus")):
                    _voice_thread = metadata.get("thread_id") if metadata else None
                    msg = await self._bot.send_voice(
                        chat_id=int(chat_id),
@@ -1210,7 +1632,12 @@ class TelegramAdapter(BasePlatformAdapter):
        """
        if not self._bot:
            return SendResult(success=False, error="Not connected")
-        
+
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(image_url):
+            logger.warning("[%s] Blocked unsafe image URL (SSRF protection)", self.name)
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
        try:
            # Telegram can send photos directly from URLs (up to ~5MB)
            _photo_thread = metadata.get("thread_id") if metadata else None
@@ -1800,10 +2227,7 @@ class TelegramAdapter(BasePlatformAdapter):
            existing.media_urls.extend(event.media_urls)
            existing.media_types.extend(event.media_types)
            if event.text:
-                if not existing.text:
-                    existing.text = event.text
-                elif event.text not in existing.text:
-                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+                existing.text = self._merge_caption(existing.text, event.text)

        prior_task = self._pending_photo_batch_tasks.get(batch_key)
        if prior_task and not prior_task.done():
@@ -1993,11 +2417,7 @@ class TelegramAdapter(BasePlatformAdapter):
            existing.media_urls.extend(event.media_urls)
            existing.media_types.extend(event.media_types)
            if event.text:
-                if existing.text:
-                    if event.text not in existing.text.split("\n\n"):
-                        existing.text = f"{existing.text}\n\n{event.text}"
-                else:
-                    existing.text = event.text
+                existing.text = self._merge_caption(existing.text, event.text)

        prior_task = self._media_group_tasks.get(media_group_id)
        if prior_task:
@@ -2253,3 +2673,46 @@ class TelegramAdapter(BasePlatformAdapter):
            auto_skill=topic_skill,
            timestamp=message.date,
        )
+
+    # ── Message reactions (processing lifecycle) ──────────────────────────
+
+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("TELEGRAM_REACTIONS", "false").lower() not in ("false", "0", "no")
+
+    async def _set_reaction(self, chat_id: str, message_id: str, emoji: str) -> bool:
+        """Set a single emoji reaction on a Telegram message."""
+        if not self._bot:
+            return False
+        try:
+            await self._bot.set_message_reaction(
+                chat_id=int(chat_id),
+                message_id=int(message_id),
+                reaction=emoji,
+            )
+            return True
+        except Exception as e:
+            logger.debug("[%s] set_message_reaction failed (%s): %s", self.name, emoji, e)
+            return False
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        chat_id = getattr(event.source, "chat_id", None)
+        message_id = getattr(event, "message_id", None)
+        if chat_id and message_id:
+            await self._set_reaction(chat_id, message_id, "\U0001f440")
+
+    async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction.
+
+        Unlike Discord (additive reactions), Telegram's set_message_reaction
+        replaces all existing reactions in one call — no remove step needed.
+        """
+        if not self._reactions_enabled():
+            return
+        chat_id = getattr(event.source, "chat_id", None)
+        message_id = getattr(event, "message_id", None)
+        if chat_id and message_id:
+            await self._set_reaction(chat_id, message_id, "\u2705" if success else "\u274c")
@@ -76,8 +76,17 @@ class WebhookAdapter(BasePlatformAdapter):
        self._routes: Dict[str, dict] = dict(self._static_routes)
        self._runner = None

-        # Delivery info keyed by session chat_id — consumed by send()
+        # Delivery info keyed by session chat_id.
+        #
+        # Read by every send() invocation for the chat_id (status messages
+        # AND the final response).  Cleaned up via TTL on each POST so the
+        # dict stays bounded — see _prune_delivery_info().  Do NOT pop on
+        # send(), or interim status messages (e.g. fallback notifications,
+        # context-pressure warnings) will consume the entry before the
+        # final response arrives, causing the response to silently fall
+        # back to the "log" deliver type.
        self._delivery_info: Dict[str, dict] = {}
+        self._delivery_info_created: Dict[str, float] = {}

        # Reference to gateway runner for cross-platform delivery (set externally)
        self.gateway_runner = None
@@ -160,10 +169,14 @@ class WebhookAdapter(BasePlatformAdapter):
    ) -> SendResult:
        """Deliver the agent's response to the configured destination.

-        chat_id is ``webhook:{route}:{delivery_id}`` — we pop the delivery
-        info stored during webhook receipt so it doesn't leak memory.
+        chat_id is ``webhook:{route}:{delivery_id}``.  The delivery info
+        stored during webhook receipt is read with ``.get()`` (not popped)
+        so that interim status messages emitted before the final response
+        — fallback-model notifications, context-pressure warnings, etc. —
+        do not consume the entry and silently downgrade the final response
+        to the ``log`` deliver type.  TTL cleanup happens on POST.
        """
-        delivery = self._delivery_info.pop(chat_id, {})
+        delivery = self._delivery_info.get(chat_id, {})
        deliver_type = delivery.get("deliver", "log")

        if deliver_type == "log":
@@ -190,6 +203,23 @@ class WebhookAdapter(BasePlatformAdapter):
            success=False, error=f"Unknown deliver type: {deliver_type}"
        )

+    def _prune_delivery_info(self, now: float) -> None:
+        """Drop delivery_info entries older than the idempotency TTL.
+
+        Mirrors the cleanup pattern used for ``_seen_deliveries``.  Called
+        on each POST so the dict size is bounded by ``rate_limit * TTL``
+        even if many webhooks fire and never receive a final response.
+        """
+        cutoff = now - self._idempotency_ttl
+        stale = [
+            k
+            for k, t in self._delivery_info_created.items()
+            if t < cutoff
+        ]
+        for k in stale:
+            self._delivery_info.pop(k, None)
+            self._delivery_info_created.pop(k, None)
+
    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        return {"name": chat_id, "type": "webhook"}

@@ -203,10 +233,8 @@ class WebhookAdapter(BasePlatformAdapter):

    def _reload_dynamic_routes(self) -> None:
        """Reload agent-created subscriptions from disk if the file changed."""
-        from pathlib import Path as _Path
-        hermes_home = _Path(
-            os.getenv("HERMES_HOME", str(_Path.home() / ".hermes"))
-        ).expanduser()
+        from hermes_constants import get_hermes_home
+        hermes_home = get_hermes_home()
        subs_path = hermes_home / _DYNAMIC_ROUTES_FILENAME
        if not subs_path.exists():
            if self._dynamic_routes:
@@ -384,7 +412,9 @@ class WebhookAdapter(BasePlatformAdapter):
        # same route get independent agent runs (not queued/interrupted).
        session_chat_id = f"webhook:{route_name}:{delivery_id}"

-        # Store delivery info for send() — consumed (popped) on delivery
+        # Store delivery info for send().  Read by every send() invocation
+        # for this chat_id (interim status messages and the final response),
+        # so we do NOT pop on send.  TTL-based cleanup keeps the dict bounded.
        deliver_config = {
            "deliver": route_config.get("deliver", "log"),
            "deliver_extra": self._render_delivery_extra(
@@ -393,6 +423,8 @@ class WebhookAdapter(BasePlatformAdapter):
            "payload": payload,
        }
        self._delivery_info[session_chat_id] = deliver_config
+        self._delivery_info_created[session_chat_id] = now
+        self._prune_delivery_info(now)

        # Build source and event
        source = self.build_source(
@@ -484,6 +516,10 @@ class WebhookAdapter(BasePlatformAdapter):

        Supports dot-notation access into nested dicts:
        ``{pull_request.title}`` → ``payload["pull_request"]["title"]``
+
+        Special token ``{__raw__}`` dumps the entire payload as indented
+        JSON (truncated to 4000 chars).  Useful for monitoring alerts or
+        any webhook where the agent needs to see the full payload.
        """
        if not template:
            truncated = json.dumps(payload, indent=2)[:4000]
@@ -494,6 +530,9 @@ class WebhookAdapter(BasePlatformAdapter):

        def _resolve(match: re.Match) -> str:
            key = match.group(1)
+            # Special token: dump the entire payload as JSON
+            if key == "__raw__":
+                return json.dumps(payload, indent=2)[:4000]
            value: Any = payload
            for part in key.split("."):
                if isinstance(value, dict):
@@ -613,4 +652,10 @@ class WebhookAdapter(BasePlatformAdapter):
                    error=f"No chat_id or home channel for {platform_name}",
                )

-        return await adapter.send(chat_id, content)
+        # Pass thread_id from deliver_extra so Telegram forum topics work
+        metadata = None
+        thread_id = extra.get("message_thread_id") or extra.get("thread_id")
+        if thread_id:
+            metadata = {"thread_id": thread_id}
+
+        return await adapter.send(chat_id, content, metadata=metadata)
@@ -653,7 +653,7 @@ class WeComAdapter(BasePlatformAdapter):
            return ".png"
        if data.startswith(b"\xff\xd8\xff"):
            return ".jpg"
-        if data.startswith(b"GIF87a") or data.startswith(b"GIF89a"):
+        if data.startswith((b"GIF87a", b"GIF89a")):
            return ".gif"
        if data.startswith(b"RIFF") and data[8:12] == b"WEBP":
            return ".webp"
@@ -689,7 +689,7 @@ class WeComAdapter(BasePlatformAdapter):
    @staticmethod
    def _derive_message_type(body: Dict[str, Any], text: str, media_types: List[str]) -> MessageType:
        """Choose the normalized inbound message type."""
-        if any(mtype.startswith("application/") or mtype.startswith("text/") for mtype in media_types):
+        if any(mtype.startswith(("application/", "text/")) for mtype in media_types):
            return MessageType.DOCUMENT
        if any(mtype.startswith("image/") for mtype in media_types):
            return MessageType.TEXT if text else MessageType.PHOTO
@@ -910,6 +910,10 @@ class WeComAdapter(BasePlatformAdapter):
        url: str,
        max_bytes: int,
    ) -> Tuple[bytes, Dict[str, str]]:
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(url):
+            raise ValueError(f"Blocked unsafe URL (SSRF protection): {url[:80]}")
+
        if not HTTPX_AVAILABLE:
            raise RuntimeError("httpx is required for WeCom media download")

@@ -27,7 +27,6 @@ _IS_WINDOWS = platform.system() == "Windows"
 from pathlib import Path
 from typing import Dict, Optional, Any

-from hermes_cli.config import get_hermes_home
 from hermes_constants import get_hermes_dir

 logger = logging.getLogger(__name__)
@@ -24,7 +24,6 @@ import signal
 import tempfile
 import threading
 import time
-import uuid
 from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
@@ -378,7 +377,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
                    )

        # Check optional skills (shipped with repo but not installed)
-        from hermes_constants import get_hermes_home, get_optional_skills_dir
+        from hermes_constants import get_optional_skills_dir
        repo_root = Path(__file__).resolve().parent.parent
        optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
        if optional_dir.exists():
@@ -1127,6 +1126,7 @@ class GatewayRunner:
            # Set up message + fatal error handlers
            adapter.set_message_handler(self._handle_message)
            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+            adapter.set_session_store(self.session_store)
            
            # Try to connect
            logger.info("Connecting to %s...", platform.value)
@@ -1424,6 +1424,7 @@ class GatewayRunner:

                    adapter.set_message_handler(self._handle_message)
                    adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+                    adapter.set_session_store(self.session_store)

                    success = await adapter.connect()
                    if success:
@@ -1856,6 +1857,11 @@ class GatewayRunner:
        if _quick_key in self._running_agents and _stale_ts:
            _stale_age = time.time() - _stale_ts
            _stale_agent = self._running_agents.get(_quick_key)
+            # Never evict the pending sentinel — it was just placed moments
+            # ago during the async setup phase before the real agent is
+            # created.  Sentinels have no get_activity_summary(), so the
+            # idle check below would always evaluate to inf >= timeout and
+            # immediately evict them, racing with the setup path.
            _stale_idle = float("inf")  # assume idle if we can't check
            _stale_detail = ""
            if _stale_agent and hasattr(_stale_agent, "get_activity_summary"):
@@ -1874,8 +1880,11 @@ class GatewayRunner:
            # cases where the agent object was garbage-collected).
            _wall_ttl = max(_raw_stale_timeout * 10, 7200) if _raw_stale_timeout > 0 else float("inf")
            _should_evict = (
-                (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
-                or _stale_age > _wall_ttl
+                _stale_agent is not _AGENT_PENDING_SENTINEL
+                and (
+                    (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
+                    or _stale_age > _wall_ttl
+                )
            )
            if _should_evict:
                logger.warning(
@@ -1978,10 +1987,7 @@ class GatewayRunner:
                            existing.media_urls.extend(event.media_urls)
                            existing.media_types.extend(event.media_types)
                            if event.text:
-                                if not existing.text:
-                                    existing.text = event.text
-                                elif event.text not in existing.text:
-                                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+                                existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
                        else:
                            adapter._pending_messages[_quick_key] = event
                    else:
@@ -2812,7 +2818,7 @@ class GatewayRunner:
                        guessed, _ = _mimetypes.guess_type(path)
                        if guessed:
                            mtype = guessed
-                if not (mtype.startswith("application/") or mtype.startswith("text/")):
+                if not mtype.startswith(("application/", "text/")):
                    continue
                # Extract display filename by stripping the doc_{uuid12}_ prefix
                import os as _os
@@ -3244,7 +3250,7 @@ class GatewayRunner:
            old_entry = self.session_store._entries.get(session_key)
            if old_entry:
                _flush_task = asyncio.create_task(
-                    self._async_flush_memories(old_entry.session_id, session_key)
+                    self._async_flush_memories(old_entry.session_id)
                )
                self._background_tasks.add(_flush_task)
                _flush_task.add_done_callback(self._background_tasks.discard)
@@ -3252,9 +3258,25 @@ class GatewayRunner:
            logger.debug("Gateway memory flush on reset failed: %s", e)
        self._evict_cached_agent(session_key)
        
+        try:
+            from tools.env_passthrough import clear_env_passthrough
+            clear_env_passthrough()
+        except Exception:
+            pass
+
+        try:
+            from tools.credential_files import clear_credential_files
+            clear_credential_files()
+        except Exception:
+            pass
+
        # Reset the session
        new_entry = self.session_store.reset_session(session_key)

+        # Clear any session-scoped model override so the next agent picks up
+        # the configured default instead of the previously switched model.
+        self._session_model_overrides.pop(session_key, None)
+
        # Emit session:end hook (session is ending)
        await self.hooks.emit("session:end", {
            "platform": source.platform.value if source.platform else "",
@@ -3320,25 +3342,36 @@ class GatewayRunner:
        """Handle /status command."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
-        
+
        connected_platforms = [p.value for p in self.adapters.keys()]
-        
+
        # Check if there's an active agent
        session_key = session_entry.session_key
        is_running = session_key in self._running_agents
-        
+
+        title = None
+        if self._session_db:
+            try:
+                title = self._session_db.get_session_title(session_entry.session_id)
+            except Exception:
+                title = None
+
        lines = [
            "📊 **Hermes Gateway Status**",
            "",
-            f"**Session ID:** `{session_entry.session_id[:12]}...`",
+            f"**Session ID:** `{session_entry.session_id}`",
+        ]
+        if title:
+            lines.append(f"**Title:** {title}")
+        lines.extend([
            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Tokens:** {session_entry.total_tokens:,}",
            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
            "",
            f"**Connected Platforms:** {', '.join(connected_platforms)}",
-        ]
-        
+        ])
+
        return "\n".join(lines)
    
    async def _handle_stop_command(self, event: MessageEvent) -> str:
@@ -3446,11 +3479,11 @@ class GatewayRunner:
            lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_")
        return "\n".join(lines)
    
-    async def _handle_model_command(self, event: MessageEvent) -> str:
+    async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
        """Handle /model command — switch model for this session.

        Supports:
-          /model                              — show current model info
+          /model                              — interactive picker (Telegram/Discord) or text list
          /model <name>                       — switch for this session only
          /model <name> --global              — switch and persist to config.yaml
          /model <name> --provider <provider> — switch provider + model
@@ -3481,7 +3514,7 @@ class GatewayRunner:
                    cfg = yaml.safe_load(f) or {}
                model_cfg = cfg.get("model", {})
                if isinstance(model_cfg, dict):
-                    current_model = model_cfg.get("name", "")
+                    current_model = model_cfg.get("default", "")
                    current_provider = model_cfg.get("provider", current_provider)
                    current_base_url = model_cfg.get("base_url", "")
                user_provs = cfg.get("providers")
@@ -3498,8 +3531,118 @@ class GatewayRunner:
            current_base_url = override.get("base_url", current_base_url)
            current_api_key = override.get("api_key", current_api_key)

-        # No args: show authenticated providers with models
+        # No args: show interactive picker (Telegram/Discord) or text list
        if not model_input and not explicit_provider:
+            # Try interactive picker if the platform supports it
+            adapter = self.adapters.get(source.platform)
+            has_picker = (
+                adapter is not None
+                and getattr(type(adapter), "send_model_picker", None) is not None
+            )
+
+            if has_picker:
+                try:
+                    providers = list_authenticated_providers(
+                        current_provider=current_provider,
+                        user_providers=user_provs,
+                        max_models=50,
+                    )
+                except Exception:
+                    providers = []
+
+                if providers:
+                    # Build a callback closure for when the user picks a model.
+                    # Captures self + locals needed for the switch logic.
+                    _self = self
+                    _session_key = session_key
+                    _cur_model = current_model
+                    _cur_provider = current_provider
+                    _cur_base_url = current_base_url
+                    _cur_api_key = current_api_key
+
+                    async def _on_model_selected(
+                        _chat_id: str, model_id: str, provider_slug: str
+                    ) -> str:
+                        """Perform the model switch and return confirmation text."""
+                        result = _switch_model(
+                            raw_input=model_id,
+                            current_provider=_cur_provider,
+                            current_model=_cur_model,
+                            current_base_url=_cur_base_url,
+                            current_api_key=_cur_api_key,
+                            is_global=False,
+                            explicit_provider=provider_slug,
+                        )
+                        if not result.success:
+                            return f"Error: {result.error_message}"
+
+                        # Update cached agent in-place
+                        cached_entry = None
+                        _cache_lock = getattr(_self, "_agent_cache_lock", None)
+                        _cache = getattr(_self, "_agent_cache", None)
+                        if _cache_lock and _cache is not None:
+                            with _cache_lock:
+                                cached_entry = _cache.get(_session_key)
+                        if cached_entry and cached_entry[0] is not None:
+                            try:
+                                cached_entry[0].switch_model(
+                                    new_model=result.new_model,
+                                    new_provider=result.target_provider,
+                                    api_key=result.api_key,
+                                    base_url=result.base_url,
+                                    api_mode=result.api_mode,
+                                )
+                            except Exception as exc:
+                                logger.warning("Picker model switch failed for cached agent: %s", exc)
+
+                        # Store model note + session override
+                        if not hasattr(_self, "_pending_model_notes"):
+                            _self._pending_model_notes = {}
+                        _self._pending_model_notes[_session_key] = (
+                            f"[Note: model was just switched from {_cur_model} to {result.new_model} "
+                            f"via {result.provider_label or result.target_provider}. "
+                            f"Adjust your self-identification accordingly.]"
+                        )
+                        if not hasattr(_self, "_session_model_overrides"):
+                            _self._session_model_overrides = {}
+                        _self._session_model_overrides[_session_key] = {
+                            "model": result.new_model,
+                            "provider": result.target_provider,
+                            "api_key": result.api_key,
+                            "base_url": result.base_url,
+                            "api_mode": result.api_mode,
+                        }
+
+                        # Build confirmation text
+                        plabel = result.provider_label or result.target_provider
+                        lines = [f"Model switched to `{result.new_model}`"]
+                        lines.append(f"Provider: {plabel}")
+                        mi = result.model_info
+                        if mi:
+                            if mi.context_window:
+                                lines.append(f"Context: {mi.context_window:,} tokens")
+                            if mi.max_output:
+                                lines.append(f"Max output: {mi.max_output:,} tokens")
+                            if mi.has_cost_data():
+                                lines.append(f"Cost: {mi.format_cost()}")
+                            lines.append(f"Capabilities: {mi.format_capabilities()}")
+                        lines.append("_(session only — use `/model <name> --global` to persist)_")
+                        return "\n".join(lines)
+
+                    metadata = {"thread_id": source.thread_id} if source.thread_id else None
+                    result = await adapter.send_model_picker(
+                        chat_id=source.chat_id,
+                        providers=providers,
+                        current_model=current_model,
+                        current_provider=current_provider,
+                        session_key=session_key,
+                        on_model_selected=_on_model_selected,
+                        metadata=metadata,
+                    )
+                    if result.success:
+                        return None  # Picker sent — adapter handles the response
+
+            # Fallback: text list (for platforms without picker or if picker failed)
            provider_label = get_label(current_provider)
            lines = [f"Current: `{current_model or 'unknown'}` on {provider_label}", ""]

@@ -3591,7 +3734,7 @@ class GatewayRunner:
                else:
                    cfg = {}
                model_cfg = cfg.setdefault("model", {})
-                model_cfg["name"] = result.new_model
+                model_cfg["default"] = result.new_model
                model_cfg["provider"] = result.target_provider
                if result.base_url:
                    model_cfg["base_url"] = result.base_url
@@ -3773,7 +3916,7 @@ class GatewayRunner:

            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"

-        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
+        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
    async def _handle_retry_command(self, event: MessageEvent) -> str:
@@ -4416,6 +4559,7 @@ class GatewayRunner:
                    provider_data_collection=pr.get("data_collection"),
                    session_id=task_id,
                    platform=platform_key,
+                    user_id=source.user_id,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@@ -4777,8 +4921,8 @@ class GatewayRunner:
        cycle = ["off", "new", "all", "verbose"]
        descriptions = {
            "off": "⚙️ Tool progress: **OFF** — no tool activity shown.",
-            "new": "⚙️ Tool progress: **NEW** — shown when tool changes (short previews).",
-            "all": "⚙️ Tool progress: **ALL** — every tool call shown (short previews).",
+            "new": "⚙️ Tool progress: **NEW** — shown when tool changes (preview length: `display.tool_preview_length`, default 40).",
+            "all": "⚙️ Tool progress: **ALL** — every tool call shown (preview length: `display.tool_preview_length`, default 40).",
            "verbose": "⚙️ Tool progress: **VERBOSE** — every tool call with full arguments.",
        }

@@ -4974,7 +5118,7 @@ class GatewayRunner:
        # Flush memories for current session before switching
        try:
            _flush_task = asyncio.create_task(
-                self._async_flush_memories(current_entry.session_id, session_key)
+                self._async_flush_memories(current_entry.session_id)
            )
            self._background_tasks.add(_flush_task)
            _flush_task.add_done_callback(self._background_tasks.discard)
@@ -5185,9 +5329,6 @@ class GatewayRunner:
                old_servers = set(_servers.keys())

            # Read new config before shutting down, so we know what will be added/removed
-            new_config = _load_mcp_config()
-            new_server_names = set(new_config.keys())
-
            # Shutdown existing connections
            await loop.run_in_executor(None, shutdown_mcp_servers)

@@ -5275,7 +5416,6 @@ class GatewayRunner:

        from tools.approval import (
            resolve_gateway_approval, has_blocking_approval,
-            pending_approval_count,
        )

        if not has_blocking_approval(session_key):
@@ -5303,6 +5443,11 @@ class GatewayRunner:
        if not count:
            return "No pending command to approve."

+        # Resume typing indicator — agent is about to continue processing.
+        _adapter = self.adapters.get(source.platform)
+        if _adapter:
+            _adapter.resume_typing_for_chat(source.chat_id)
+
        count_msg = f" ({count} commands)" if count > 1 else ""
        logger.info("User approved %d dangerous command(s) via /approve%s", count, scope_msg)
        return f"✅ Command{'s' if count > 1 else ''} approved{scope_msg}{count_msg}. The agent is resuming..."
@@ -5335,6 +5480,11 @@ class GatewayRunner:
        if not count:
            return "No pending command to deny."

+        # Resume typing indicator — agent continues (with BLOCKED result).
+        _adapter = self.adapters.get(source.platform)
+        if _adapter:
+            _adapter.resume_typing_for_chat(source.chat_id)
+
        count_msg = f" ({count} commands)" if count > 1 else ""
        logger.info("User denied %d dangerous command(s) via /deny", count)
        return f"❌ Command{'s' if count > 1 else ''} denied{count_msg}."
@@ -5894,6 +6044,11 @@ class GatewayRunner:

        if enriched_parts:
            prefix = "\n\n".join(enriched_parts)
+            # Strip the empty-content placeholder from the Discord adapter
+            # when we successfully transcribed the audio — it's redundant.
+            _placeholder = "(The user sent a message with no text content)"
+            if user_text and user_text.strip() == _placeholder:
+                return prefix
            if user_text:
                return f"{prefix}\n\n{user_text}"
            return prefix
@@ -5920,12 +6075,13 @@ class GatewayRunner:
        platform_name = watcher.get("platform", "")
        chat_id = watcher.get("chat_id", "")
        thread_id = watcher.get("thread_id", "")
+        agent_notify = watcher.get("notify_on_complete", False)
        notify_mode = self._load_background_notifications_mode()

-        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
-                      session_id, interval, notify_mode)
+        logger.debug("Process watcher started: %s (every %ss, notify=%s, agent_notify=%s)",
+                      session_id, interval, notify_mode, agent_notify)

-        if notify_mode == "off":
+        if notify_mode == "off" and not agent_notify:
            # Still wait for the process to exit so we can log it, but don't
            # push any messages to the user.
            while True:
@@ -5949,6 +6105,47 @@ class GatewayRunner:
            last_output_len = current_output_len

            if session.exited:
+                # --- Agent-triggered completion: inject synthetic message ---
+                if agent_notify:
+                    from tools.ansi_strip import strip_ansi
+                    _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
+                    synth_text = (
+                        f"[SYSTEM: Background process {session_id} completed "
+                        f"(exit code {session.exit_code}).\n"
+                        f"Command: {session.command}\n"
+                        f"Output:\n{_out}]"
+                    )
+                    adapter = None
+                    for p, a in self.adapters.items():
+                        if p.value == platform_name:
+                            adapter = a
+                            break
+                    if adapter and chat_id:
+                        try:
+                            from gateway.platforms.base import MessageEvent, MessageType
+                            from gateway.session import SessionSource
+                            from gateway.config import Platform
+                            _platform_enum = Platform(platform_name)
+                            _source = SessionSource(
+                                platform=_platform_enum,
+                                chat_id=chat_id,
+                                thread_id=thread_id or None,
+                            )
+                            synth_event = MessageEvent(
+                                text=synth_text,
+                                message_type=MessageType.TEXT,
+                                source=_source,
+                            )
+                            logger.info(
+                                "Process %s finished — injecting agent notification for session %s",
+                                session_id, session_key,
+                            )
+                            await adapter.handle_message(synth_event)
+                        except Exception as e:
+                            logger.error("Agent notify injection error: %s", e)
+                    break
+
+                # --- Normal text-only notification ---
                # Decide whether to notify based on mode
                should_notify = (
                    notify_mode in ("all", "result")
@@ -5973,8 +6170,9 @@ class GatewayRunner:
                            logger.error("Watcher delivery error: %s", e)
                break

-            elif has_new_output and notify_mode == "all":
+            elif has_new_output and notify_mode == "all" and not agent_notify:
                # New output available -- deliver status update (only in "all" mode)
+                # Skip periodic updates for agent_notify watchers (they only care about completion)
                new_output = session.output_buffer[-500:] if session.output_buffer else ""
                message_text = (
                    f"[Background process {session_id} is still running~ "
@@ -6142,10 +6340,15 @@ class GatewayRunner:
                progress_queue.put(msg)
                return
            
-            # "all" / "new" modes: short preview, always truncated (40 chars)
+            # "all" / "new" modes: short preview, respects tool_preview_length
+            # config (defaults to 40 chars when unset to keep gateway messages
+            # compact — unlike CLI spinners, these persist as permanent messages).
            if preview:
-                if len(preview) > 40:
-                    preview = preview[:37] + "..."
+                from agent.display import get_tool_preview_max_len
+                _pl = get_tool_preview_max_len()
+                _cap = _pl if _pl > 0 else 40
+                if len(preview) > _cap:
+                    preview = preview[:_cap - 3] + "..."
                msg = f"{emoji} {tool_name}: \"{preview}\""
            else:
                msg = f"{emoji} {tool_name}..."
@@ -6461,6 +6664,7 @@ class GatewayRunner:
                    provider_data_collection=pr.get("data_collection"),
                    session_id=session_id,
                    platform=platform_key,
+                    user_id=source.user_id,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@@ -6585,6 +6789,15 @@ class GatewayRunner:
                UX.  Otherwise fall back to a plain text message with
                ``/approve`` instructions.
                """
+                # Pause the typing indicator while the agent waits for
+                # user approval.  Critical for Slack's Assistant API where
+                # assistant_threads_setStatus disables the compose box — the
+                # user literally cannot type /approve while "is thinking..."
+                # is active.  The approval message send auto-clears the Slack
+                # status; pausing prevents _keep_typing from re-setting it.
+                # Typing resumes in _handle_approve_command/_handle_deny_command.
+                _status_adapter.pause_typing_for_chat(_status_chat_id)
+
                cmd = approval_data.get("command", "")
                desc = approval_data.get("description", "dangerous command")

@@ -7003,6 +7216,27 @@ class GatewayRunner:
                    if pending:
                        logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
            
+            # Safety net: if the pending text is a slash command (e.g. "/stop",
+            # "/new"), discard it — commands should never be passed to the agent
+            # as user input.  The primary fix is in base.py (commands bypass the
+            # active-session guard), but this catches edge cases where command
+            # text leaks through the interrupt_message fallback.
+            if pending and pending.strip().startswith("/"):
+                _pending_parts = pending.strip().split(None, 1)
+                _pending_cmd_word = _pending_parts[0][1:].lower() if _pending_parts else ""
+                if _pending_cmd_word:
+                    try:
+                        from hermes_cli.commands import resolve_command as _rc_pending
+                        if _rc_pending(_pending_cmd_word):
+                            logger.info(
+                                "Discarding command '/%s' from pending queue — "
+                                "commands must not be passed as agent input",
+                                _pending_cmd_word,
+                            )
+                            pending = None
+                    except Exception:
+                        pass
+
            if pending:
                logger.debug("Processing pending message: '%s...'", pending[:40])
                
@@ -28,6 +28,10 @@ logger = logging.getLogger("gateway.stream_consumer")
 # Sentinel to signal the stream is complete
 _DONE = object()

+# Sentinel to signal a tool boundary — finalize current message and start a
+# new one so that subsequent text appears below tool progress messages.
+_NEW_SEGMENT = object()
+

@dataclass
 class StreamConsumerConfig:
@@ -78,9 +82,16 @@ class GatewayStreamConsumer:
        return self._already_sent

    def on_delta(self, text: str) -> None:
-        """Thread-safe callback — called from the agent's worker thread."""
+        """Thread-safe callback — called from the agent's worker thread.
+
+        When *text* is ``None``, signals a tool boundary: the current message
+        is finalized and subsequent text will be sent as a new message so it
+        appears below any tool-progress messages the gateway sent in between.
+        """
        if text:
            self._queue.put(text)
+        elif text is None:
+            self._queue.put(_NEW_SEGMENT)

    def finish(self) -> None:
        """Signal that the stream is complete."""
@@ -96,12 +107,16 @@ class GatewayStreamConsumer:
            while True:
                # Drain all available items from the queue
                got_done = False
+                got_segment_break = False
                while True:
                    try:
                        item = self._queue.get_nowait()
                        if item is _DONE:
                            got_done = True
                            break
+                        if item is _NEW_SEGMENT:
+                            got_segment_break = True
+                            break
                        self._accumulated += item
                    except queue.Empty:
                        break
@@ -111,8 +126,9 @@ class GatewayStreamConsumer:
                elapsed = now - self._last_edit_time
                should_edit = (
                    got_done
+                    or got_segment_break
                    or (elapsed >= self.cfg.edit_interval
-                        and len(self._accumulated) > 0)
+                        and self._accumulated)
                    or len(self._accumulated) >= self.cfg.buffer_threshold
                )

@@ -133,7 +149,7 @@ class GatewayStreamConsumer:
                        self._last_sent_text = ""

                    display_text = self._accumulated
-                    if not got_done:
+                    if not got_done and not got_segment_break:
                        display_text += self.cfg.cursor

                    await self._send_or_edit(display_text)
@@ -145,6 +161,15 @@ class GatewayStreamConsumer:
                        await self._send_or_edit(self._accumulated)
                    return

+                # Tool boundary: the should_edit block above already flushed
+                # accumulated text without a cursor.  Reset state so the next
+                # text chunk creates a fresh message below any tool-progress
+                # messages the gateway sent in between.
+                if got_segment_break:
+                    self._message_id = None
+                    self._accumulated = ""
+                    self._last_sent_text = ""
+
                await asyncio.sleep(0.05)  # Small yield to not busy-loop

        except asyncio.CancelledError:
@@ -37,7 +37,7 @@ from typing import Any, Dict, List, Optional
 import httpx
 import yaml

-from hermes_cli.config import get_hermes_home, get_config_path
+from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)
@@ -69,6 +69,7 @@ DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
+DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -125,6 +126,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        inference_base_url=DEFAULT_COPILOT_ACP_BASE_URL,
        base_url_env_var="COPILOT_ACP_BASE_URL",
    ),
+    "gemini": ProviderConfig(
+        id="gemini",
+        name="Google AI Studio",
+        auth_type="api_key",
+        inference_base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+        api_key_env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
+        base_url_env_var="GEMINI_BASE_URL",
+    ),
    "zai": ProviderConfig(
        id="zai",
        name="Z.AI / GLM",
@@ -395,6 +404,47 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
    return None


+def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> str:
+    """Return the correct Z.AI base URL by probing endpoints.
+
+    If the user has explicitly set GLM_BASE_URL, that always wins.
+    Otherwise, probe the candidate endpoints to find one that accepts the
+    key.  The detected endpoint is cached in provider state (auth.json) keyed
+    on a hash of the API key so subsequent starts skip the probe.
+    """
+    if env_override:
+        return env_override
+
+    # Check provider-state cache for a previously-detected endpoint.
+    auth_store = _load_auth_store()
+    state = _load_provider_state(auth_store, "zai") or {}
+    cached = state.get("detected_endpoint")
+    if isinstance(cached, dict) and cached.get("base_url"):
+        key_hash = cached.get("key_hash", "")
+        if key_hash == hashlib.sha256(api_key.encode()).hexdigest()[:16]:
+            logger.debug("Z.AI: using cached endpoint %s", cached["base_url"])
+            return cached["base_url"]
+
+    # Probe — may take up to ~8s per endpoint.
+    detected = detect_zai_endpoint(api_key)
+    if detected and detected.get("base_url"):
+        # Persist the detection result keyed on the API key hash.
+        key_hash = hashlib.sha256(api_key.encode()).hexdigest()[:16]
+        state["detected_endpoint"] = {
+            "base_url": detected["base_url"],
+            "endpoint_id": detected.get("id", ""),
+            "model": detected.get("model", ""),
+            "label": detected.get("label", ""),
+            "key_hash": key_hash,
+        }
+        _save_provider_state(auth_store, "zai", state)
+        logger.info("Z.AI: auto-detected endpoint %s (%s)", detected["label"], detected["base_url"])
+        return detected["base_url"]
+
+    logger.debug("Z.AI: probe failed, falling back to default %s", default_url)
+    return default_url
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
@@ -758,6 +808,7 @@ def resolve_provider(
    # Normalize provider aliases
    _PROVIDER_ALIASES = {
        "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
+        "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
        "kimi": "kimi-coding", "moonshot": "kimi-coding",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -926,7 +977,7 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
    state = _load_provider_state(auth_store, "openai-codex")
    if not state:
        raise AuthError(
-            "No Codex credentials stored. Run `hermes login` to authenticate.",
+            "No Codex credentials stored. Run `hermes auth` to authenticate.",
            provider="openai-codex",
            code="codex_auth_missing",
            relogin_required=True,
@@ -934,7 +985,7 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
    tokens = state.get("tokens")
    if not isinstance(tokens, dict):
        raise AuthError(
-            "Codex auth state is missing tokens. Run `hermes login` to re-authenticate.",
+            "Codex auth state is missing tokens. Run `hermes auth` to re-authenticate.",
            provider="openai-codex",
            code="codex_auth_invalid_shape",
            relogin_required=True,
@@ -943,14 +994,14 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
    refresh_token = tokens.get("refresh_token")
    if not isinstance(access_token, str) or not access_token.strip():
        raise AuthError(
-            "Codex auth is missing access_token. Run `hermes login` to re-authenticate.",
+            "Codex auth is missing access_token. Run `hermes auth` to re-authenticate.",
            provider="openai-codex",
            code="codex_auth_missing_access_token",
            relogin_required=True,
        )
    if not isinstance(refresh_token, str) or not refresh_token.strip():
        raise AuthError(
-            "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.",
+            "Codex auth is missing refresh_token. Run `hermes auth` to re-authenticate.",
            provider="openai-codex",
            code="codex_auth_missing_refresh_token",
            relogin_required=True,
@@ -985,7 +1036,7 @@ def refresh_codex_oauth_pure(
    del access_token  # Access token is only used by callers to decide whether to refresh.
    if not isinstance(refresh_token, str) or not refresh_token.strip():
        raise AuthError(
-            "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.",
+            "Codex auth is missing refresh_token. Run `hermes auth` to re-authenticate.",
            provider="openai-codex",
            code="codex_auth_missing_refresh_token",
            relogin_required=True,
@@ -1020,6 +1071,14 @@ def refresh_codex_oauth_pure(
            pass
        if code in {"invalid_grant", "invalid_token", "invalid_request"}:
            relogin_required = True
+        if code == "refresh_token_reused":
+            message = (
+                "Codex refresh token was already consumed by another client "
+                "(e.g. Codex CLI or VS Code extension). "
+                "Run `codex` in your terminal to generate fresh tokens, "
+                "then run `hermes auth` to re-authenticate."
+            )
+            relogin_required = True
        raise AuthError(
            message,
            provider="openai-codex",
@@ -1081,7 +1140,8 @@ def _refresh_codex_auth_tokens(
 def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
    """Try to read tokens from ~/.codex/auth.json (Codex CLI shared file).
    
-    Returns tokens dict if valid, None otherwise. Does NOT write to the shared file.
+    Returns tokens dict if valid and not expired, None otherwise.
+    Does NOT write to the shared file.
    """
    codex_home = os.getenv("CODEX_HOME", "").strip()
    if not codex_home:
@@ -1094,7 +1154,17 @@ def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
        tokens = payload.get("tokens")
        if not isinstance(tokens, dict):
            return None
-        if not tokens.get("access_token") or not tokens.get("refresh_token"):
+        access_token = tokens.get("access_token")
+        refresh_token = tokens.get("refresh_token")
+        if not access_token or not refresh_token:
+            return None
+        # Reject expired tokens — importing stale tokens from ~/.codex/
+        # that can't be refreshed leaves the user stuck with "Login successful!"
+        # but no working credentials.
+        if _codex_access_token_is_expiring(access_token, 0):
+            logger.debug(
+                "Codex CLI tokens at %s are expired — skipping import.", auth_path,
+            )
            return None
        return dict(tokens)
    except Exception:
@@ -1122,7 +1192,7 @@ def resolve_codex_runtime_credentials(
            logger.info("Migrating Codex credentials from ~/.codex/ to Hermes auth store")
            print("⚠️  Migrating Codex credentials to Hermes's own auth store.")
            print("   This avoids conflicts with Codex CLI and VS Code.")
-            print("   Run `hermes login` to create a fully independent session.\n")
+            print("   Run `hermes auth` to create a fully independent session.\n")
            _save_codex_tokens(cli_tokens)
            data = _read_codex_tokens()
        else:
@@ -1886,7 +1956,36 @@ def get_nous_auth_status() -> Dict[str, Any]:


 def get_codex_auth_status() -> Dict[str, Any]:
-    """Status snapshot for Codex auth."""
+    """Status snapshot for Codex auth.
+    
+    Checks the credential pool first (where `hermes auth` stores credentials),
+    then falls back to the legacy provider state.
+    """
+    # Check credential pool first — this is where `hermes auth` and
+    # `hermes model` store device_code tokens.
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool("openai-codex")
+        if pool and pool.has_credentials():
+            entry = pool.select()
+            if entry is not None:
+                api_key = (
+                    getattr(entry, "runtime_api_key", None)
+                    or getattr(entry, "access_token", "")
+                )
+                if api_key and not _codex_access_token_is_expiring(api_key, 0):
+                    return {
+                        "logged_in": True,
+                        "auth_store": str(_auth_file_path()),
+                        "last_refresh": getattr(entry, "last_refresh", None),
+                        "auth_mode": "chatgpt",
+                        "source": f"pool:{getattr(entry, 'label', 'unknown')}",
+                        "api_key": api_key,
+                    }
+    except Exception:
+        pass
+
+    # Fall back to legacy provider state
    try:
        creds = resolve_codex_runtime_credentials()
        return {
@@ -1895,6 +1994,7 @@ def get_codex_auth_status() -> Dict[str, Any]:
            "last_refresh": creds.get("last_refresh"),
            "auth_mode": creds.get("auth_mode"),
            "source": creds.get("source"),
+            "api_key": creds.get("api_key"),
        }
    except AuthError as exc:
        return {
@@ -2004,6 +2104,8 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:

    if provider_id == "kimi-coding":
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
+    elif provider_id == "zai":
+        base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
    elif env_url:
        base_url = env_url.rstrip("/")
    else:
@@ -2078,7 +2180,7 @@ def detect_external_credentials() -> List[Dict[str, Any]]:
        found.append({
            "provider": "openai-codex",
            "path": str(codex_path),
-            "label": f"Codex CLI credentials found ({codex_path}) — run `hermes login` to create a separate session",
+            "label": f"Codex CLI credentials found ({codex_path}) — run `hermes auth` to create a separate session",
        })

    return found
@@ -2112,14 +2214,7 @@ def _update_config_for_provider(
    config_path = get_config_path()
    config_path.parent.mkdir(parents=True, exist_ok=True)

-    config: Dict[str, Any] = {}
-    if config_path.exists():
-        try:
-            loaded = yaml.safe_load(config_path.read_text()) or {}
-            if isinstance(loaded, dict):
-                config = loaded
-        except Exception:
-            config = {}
+    config = read_raw_config()

    current_model = config.get("model")
    if isinstance(current_model, dict):
@@ -2156,12 +2251,8 @@ def _reset_config_provider() -> Path:
    if not config_path.exists():
        return config_path

-    try:
-        config = yaml.safe_load(config_path.read_text()) or {}
-    except Exception:
-        return config_path
-
-    if not isinstance(config, dict):
+    config = read_raw_config()
+    if not config:
        return config_path

    model = config.get("model")
@@ -2177,14 +2268,21 @@ def _prompt_model_selection(
    model_ids: List[str],
    current_model: str = "",
    pricing: Optional[Dict[str, Dict[str, str]]] = None,
+    unavailable_models: Optional[List[str]] = None,
+    portal_url: str = "",
 ) -> Optional[str]:
    """Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.

    If *pricing* is provided (``{model_id: {prompt, completion}}``), a compact
    price indicator is shown next to each model in aligned columns.
+
+    If *unavailable_models* is provided, those models are shown grayed out
+    and unselectable, with an upgrade link to *portal_url*.
    """
    from hermes_cli.models import _format_price_per_mtok

+    _unavailable = unavailable_models or []
+
    # Reorder: current model first, then the rest (deduplicated)
    ordered = []
    if current_model and current_model in model_ids:
@@ -2193,9 +2291,12 @@ def _prompt_model_selection(
        if mid not in ordered:
            ordered.append(mid)

+    # All models for column-width computation (selectable + unavailable)
+    all_models = list(ordered) + list(_unavailable)
+
    # Column-aligned labels when pricing is available
-    has_pricing = bool(pricing and any(pricing.get(m) for m in ordered))
-    name_col = max((len(m) for m in ordered), default=0) + 2 if has_pricing else 0
+    has_pricing = bool(pricing and any(pricing.get(m) for m in all_models))
+    name_col = max((len(m) for m in all_models), default=0) + 2 if has_pricing else 0

    # Pre-compute formatted prices and dynamic column widths
    _price_cache: dict[str, tuple[str, str, str]] = {}
@@ -2203,7 +2304,7 @@ def _prompt_model_selection(
    cache_col = 0  # only set if any model has cache pricing
    has_cache = False
    if has_pricing:
-        for mid in ordered:
+        for mid in all_models:
            p = pricing.get(mid)  # type: ignore[union-attr]
            if p:
                inp = _format_price_per_mtok(p.get("prompt", ""))
@@ -2248,12 +2349,35 @@ def _prompt_model_selection(
            header += f"  {'Cache':>{cache_col}}"
        menu_title += header + "  /Mtok"

+    # ANSI escape for dim text
+    _DIM = "\033[2m"
+    _RESET = "\033[0m"
+
    # Try arrow-key menu first, fall back to number input
    try:
        from simple_term_menu import TerminalMenu
+
        choices = [f"  {_label(mid)}" for mid in ordered]
        choices.append("  Enter custom model name")
        choices.append("  Skip (keep current)")
+
+        # Print the unavailable block BEFORE the menu via regular print().
+        # simple_term_menu pads title lines to terminal width (causes wrapping),
+        # so we keep the title minimal and use stdout for the static block.
+        # clear_screen=False means our printed output stays visible above.
+        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+        if _unavailable:
+            print(menu_title)
+            print()
+            for mid in _unavailable:
+                print(f"{_DIM}     {_label(mid)}{_RESET}")
+            print()
+            print(f"{_DIM}  ── Upgrade at {_upgrade_url} for paid models ──{_RESET}")
+            print()
+            effective_title = "Available free models:"
+        else:
+            effective_title = menu_title
+
        menu = TerminalMenu(
            choices,
            cursor_index=default_idx,
@@ -2262,7 +2386,7 @@ def _prompt_model_selection(
            menu_highlight_style=("fg_green",),
            cycle_cursor=True,
            clear_screen=False,
-            title=menu_title,
+            title=effective_title,
        )
        idx = menu.show()
        if idx is None:
@@ -2285,6 +2409,13 @@ def _prompt_model_selection(
    n = len(ordered)
    print(f"  {n + 1:>{num_width}}. Enter custom model name")
    print(f"  {n + 2:>{num_width}}. Skip (keep current)")
+
+    if _unavailable:
+        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+        print()
+        print(f"  {_DIM}── Unavailable models (requires paid tier — upgrade at {_upgrade_url}) ──{_RESET}")
+        for mid in _unavailable:
+            print(f"  {'':>{num_width}}  {_DIM}{_label(mid)}{_RESET}")
    print()

    while True:
@@ -2327,8 +2458,8 @@ def _save_model_choice(model_id: str) -> None:
 def login_command(args) -> None:
    """Deprecated: use 'hermes model' or 'hermes setup' instead."""
    print("The 'hermes login' command has been removed.")
-    print("Use 'hermes model' to select a provider and model,")
-    print("or 'hermes setup' for full interactive setup.")
+    print("Use 'hermes auth' to manage credentials,")
+    print("'hermes model' to select a provider, or 'hermes setup' for full setup.")
    raise SystemExit(0)


@@ -2338,17 +2469,25 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
    # Check for existing Hermes-owned credentials
    try:
        existing = resolve_codex_runtime_credentials()
-        print("Existing Codex credentials found in Hermes auth store.")
-        try:
-            reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
-        except (EOFError, KeyboardInterrupt):
-            reuse = "y"
-        if reuse in ("", "y", "yes"):
-            config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
-            print()
-            print("Login successful!")
-            print(f"  Config updated: {config_path} (model.provider=openai-codex)")
-            return
+        # Verify the resolved token is actually usable (not expired).
+        # resolve_codex_runtime_credentials attempts refresh, so if we get
+        # here the token should be valid — but double-check before telling
+        # the user "Login successful!".
+        _resolved_key = existing.get("api_key", "")
+        if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60):
+            print("Existing Codex credentials found in Hermes auth store.")
+            try:
+                reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                reuse = "y"
+            if reuse in ("", "y", "yes"):
+                config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
+                print()
+                print("Login successful!")
+                print(f"  Config updated: {config_path} (model.provider=openai-codex)")
+                return
+        else:
+            print("Existing Codex credentials are expired. Starting fresh login...")
    except AuthError:
        pass

@@ -2689,7 +2828,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
        )

        inference_base_url = auth_state["inference_base_url"]
-        verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)

        with _auth_store_lock():
            auth_store = _load_auth_store()
@@ -2711,16 +2849,37 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                    code="invalid_token",
                )

-            from hermes_cli.models import _PROVIDER_MODELS
+            from hermes_cli.models import (
+                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                check_nous_free_tier, partition_nous_models_by_tier,
+            )
            model_ids = _PROVIDER_MODELS.get("nous", [])

            print()
+            unavailable_models: list = []
+            if model_ids:
+                pricing = get_pricing_for_provider("nous")
+                model_ids = filter_nous_free_models(model_ids, pricing)
+                free_tier = check_nous_free_tier()
+                if free_tier:
+                    model_ids, unavailable_models = partition_nous_models_by_tier(
+                        model_ids, pricing, free_tier=True,
+                    )
+            _portal = auth_state.get("portal_base_url", "")
            if model_ids:
                print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
-                selected_model = _prompt_model_selection(model_ids)
+                selected_model = _prompt_model_selection(
+                    model_ids, pricing=pricing,
+                    unavailable_models=unavailable_models,
+                    portal_url=_portal,
+                )
                if selected_model:
                    _save_model_choice(selected_model)
                    print(f"Default model set to: {selected_model}")
+            elif unavailable_models:
+                _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+                print("No free models currently available.")
+                print(f"Upgrade at {_url} to access paid models.")
            else:
                print("No curated models available for Nous Portal.")
        except Exception as exc:
@@ -18,7 +18,6 @@ from agent.credential_pool import (
    STRATEGY_ROUND_ROBIN,
    STRATEGY_RANDOM,
    STRATEGY_LEAST_USED,
-    SUPPORTED_POOL_STRATEGIES,
    PooledCredential,
    _exhausted_until,
    _normalize_custom_pool_name,
@@ -305,6 +304,32 @@ def auth_remove_command(args) -> None:
            if cleared:
                print(f"Cleared {env_var} from .env")

+    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
+    # clear the underlying auth store / credential file so it doesn't get
+    # re-seeded on the next load_pool() call.
+    elif removed.source == "device_code" and provider in ("openai-codex", "nous"):
+        from hermes_cli.auth import (
+            _load_auth_store, _save_auth_store, _auth_store_lock,
+        )
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+            providers_dict = auth_store.get("providers")
+            if isinstance(providers_dict, dict) and provider in providers_dict:
+                del providers_dict[provider]
+                _save_auth_store(auth_store)
+                print(f"Cleared {provider} OAuth tokens from auth store")
+
+    elif removed.source == "hermes_pkce" and provider == "anthropic":
+        from hermes_constants import get_hermes_home
+        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+        if oauth_file.exists():
+            oauth_file.unlink()
+            print("Cleared Hermes Anthropic OAuth credentials")
+
+    elif removed.source == "claude_code" and provider == "anthropic":
+        print("Note: Claude Code credentials live in ~/.claude/.credentials.json")
+        print("      Remove them manually if you want to deauthorize Claude Code.")
+

 def auth_reset_command(args) -> None:
    provider = _normalize_provider(getattr(args, "provider", ""))
@@ -190,6 +190,79 @@ def check_for_updates() -> Optional[int]:
    return behind


+def _resolve_repo_dir() -> Optional[Path]:
+    """Return the active Hermes git checkout, or None if this isn't a git install."""
+    hermes_home = get_hermes_home()
+    repo_dir = hermes_home / "hermes-agent"
+    if not (repo_dir / ".git").exists():
+        repo_dir = Path(__file__).parent.parent.resolve()
+    return repo_dir if (repo_dir / ".git").exists() else None
+
+
+def _git_short_hash(repo_dir: Path, rev: str) -> Optional[str]:
+    """Resolve a git revision to an 8-character short hash."""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--short=8", rev],
+            capture_output=True,
+            text=True,
+            timeout=5,
+            cwd=str(repo_dir),
+        )
+    except Exception:
+        return None
+    if result.returncode != 0:
+        return None
+    value = (result.stdout or "").strip()
+    return value or None
+
+
+def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
+    """Return upstream/local git hashes for the startup banner."""
+    repo_dir = repo_dir or _resolve_repo_dir()
+    if repo_dir is None:
+        return None
+
+    upstream = _git_short_hash(repo_dir, "origin/main")
+    local = _git_short_hash(repo_dir, "HEAD")
+    if not upstream or not local:
+        return None
+
+    ahead = 0
+    try:
+        result = subprocess.run(
+            ["git", "rev-list", "--count", "origin/main..HEAD"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+            cwd=str(repo_dir),
+        )
+        if result.returncode == 0:
+            ahead = int((result.stdout or "0").strip() or "0")
+    except Exception:
+        ahead = 0
+
+    return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
+
+
+def format_banner_version_label() -> str:
+    """Return the version label shown in the startup banner title."""
+    base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
+    state = get_git_banner_state()
+    if not state:
+        return base
+
+    upstream = state["upstream"]
+    local = state["local"]
+    ahead = int(state.get("ahead") or 0)
+
+    if ahead <= 0 or upstream == local:
+        return f"{base} · upstream {upstream}"
+
+    carried_word = "commit" if ahead == 1 else "commits"
+    return f"{base} · upstream {upstream} · local {local} (+{ahead} carried {carried_word})"
+
+
 # =========================================================================
 # Non-blocking update check
 # =========================================================================
@@ -449,7 +522,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    border_color = _skin_color("banner_border", "#CD7F32")
    outer_panel = Panel(
        layout_table,
-        title=f"[bold {title_color}]{agent_name} v{VERSION} ({RELEASE_DATE})[/]",
+        title=f"[bold {title_color}]{format_banner_version_label()}[/]",
        border_style=border_color,
        padding=(0, 2),
    )
@@ -25,7 +25,7 @@ def clarify_callback(cli, question, choices):

    timeout = CLI_CONFIG.get("clarify", {}).get("timeout", 120)
    response_queue = queue.Queue()
-    is_open_ended = not choices or len(choices) == 0
+    is_open_ended = not choices

    cli._clarify_state = {
        "question": question,
@@ -63,47 +63,6 @@ def clarify_callback(cli, question, choices):
    )


-def sudo_password_callback(cli) -> str:
-    """Prompt for sudo password through the TUI.
-
-    Sets up a password input area and blocks until the user responds.
-    """
-    timeout = 45
-    response_queue = queue.Queue()
-
-    cli._sudo_state = {"response_queue": response_queue}
-    cli._sudo_deadline = _time.monotonic() + timeout
-
-    if hasattr(cli, "_app") and cli._app:
-        cli._app.invalidate()
-
-    while True:
-        try:
-            result = response_queue.get(timeout=1)
-            cli._sudo_state = None
-            cli._sudo_deadline = 0
-            if hasattr(cli, "_app") and cli._app:
-                cli._app.invalidate()
-            if result:
-                cprint(f"\n{_DIM}  ✓ Password received (cached for session){_RST}")
-            else:
-                cprint(f"\n{_DIM}  ⏭ Skipped{_RST}")
-            return result
-        except queue.Empty:
-            remaining = cli._sudo_deadline - _time.monotonic()
-            if remaining <= 0:
-                break
-            if hasattr(cli, "_app") and cli._app:
-                cli._app.invalidate()
-
-    cli._sudo_state = None
-    cli._sudo_deadline = 0
-    if hasattr(cli, "_app") and cli._app:
-        cli._app.invalidate()
-    cprint(f"\n{_DIM}  ⏱ Timeout — continuing without sudo{_RST}")
-    return ""
-
-
 def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
    """Prompt for a secret value through the TUI (e.g. API keys for skills).

@@ -10,7 +10,6 @@ Usage:

 import importlib.util
 import logging
-import shutil
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -24,7 +23,6 @@ from hermes_cli.setup import (
    print_info,
    print_success,
    print_error,
-    print_warning,
    prompt_yes_no,
 )

@@ -1,4 +1,4 @@
-"""Clipboard image extraction for macOS, Linux, and WSL2.
+"""Clipboard image extraction for macOS, Windows, Linux, and WSL2.

 Provides a single function `save_clipboard_image(dest)` that checks the
 system clipboard for image data, saves it to *dest* as PNG, and returns
@@ -6,9 +6,10 @@ True on success.  No external Python dependencies — uses only OS-level
 CLI tools that ship with the platform (or are commonly installed).

 Platform support:
-  macOS  — osascript (always available), pngpaste (if installed)
-  WSL2   — powershell.exe via .NET System.Windows.Forms.Clipboard
-  Linux  — wl-paste (Wayland), xclip (X11)
+  macOS   — osascript (always available), pngpaste (if installed)
+  Windows — PowerShell via .NET System.Windows.Forms.Clipboard
+  WSL2    — powershell.exe via .NET System.Windows.Forms.Clipboard
+  Linux   — wl-paste (Wayland), xclip (X11)
 """

 import base64
@@ -32,6 +33,8 @@ def save_clipboard_image(dest: Path) -> bool:
    dest.parent.mkdir(parents=True, exist_ok=True)
    if sys.platform == "darwin":
        return _macos_save(dest)
+    if sys.platform == "win32":
+        return _windows_save(dest)
    return _linux_save(dest)


@@ -42,6 +45,8 @@ def has_clipboard_image() -> bool:
    """
    if sys.platform == "darwin":
        return _macos_has_image()
+    if sys.platform == "win32":
+        return _windows_has_image()
    if _is_wsl():
        return _wsl_has_image()
    if os.environ.get("WAYLAND_DISPLAY"):
@@ -112,6 +117,104 @@ def _macos_osascript(dest: Path) -> bool:
    return False


+# ── Shared PowerShell scripts (native Windows + WSL2) ─────────────────────
+
+# .NET System.Windows.Forms.Clipboard — used by both native Windows (powershell)
+# and WSL2 (powershell.exe) paths.
+_PS_CHECK_IMAGE = (
+    "Add-Type -AssemblyName System.Windows.Forms;"
+    "[System.Windows.Forms.Clipboard]::ContainsImage()"
+)
+
+_PS_EXTRACT_IMAGE = (
+    "Add-Type -AssemblyName System.Windows.Forms;"
+    "Add-Type -AssemblyName System.Drawing;"
+    "$img = [System.Windows.Forms.Clipboard]::GetImage();"
+    "if ($null -eq $img) { exit 1 }"
+    "$ms = New-Object System.IO.MemoryStream;"
+    "$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
+    "[System.Convert]::ToBase64String($ms.ToArray())"
+)
+
+
+# ── Native Windows ────────────────────────────────────────────────────────
+
+# Native Windows uses ``powershell`` (Windows PowerShell 5.1, always present)
+# or ``pwsh`` (PowerShell 7+, optional).  Discovery is cached per-process.
+
+
+def _find_powershell() -> str | None:
+    """Return the first available PowerShell executable, or None."""
+    for name in ("powershell", "pwsh"):
+        try:
+            r = subprocess.run(
+                [name, "-NoProfile", "-NonInteractive", "-Command", "echo ok"],
+                capture_output=True, text=True, timeout=5,
+            )
+            if r.returncode == 0 and "ok" in r.stdout:
+                return name
+        except FileNotFoundError:
+            continue
+        except Exception:
+            continue
+    return None
+
+
+# Cache the resolved PowerShell executable (checked once per process)
+_ps_exe: str | None | bool = False  # False = not yet checked
+
+
+def _get_ps_exe() -> str | None:
+    global _ps_exe
+    if _ps_exe is False:
+        _ps_exe = _find_powershell()
+    return _ps_exe
+
+
+def _windows_has_image() -> bool:
+    """Check if the Windows clipboard contains an image."""
+    ps = _get_ps_exe()
+    if ps is None:
+        return False
+    try:
+        r = subprocess.run(
+            [ps, "-NoProfile", "-NonInteractive", "-Command", _PS_CHECK_IMAGE],
+            capture_output=True, text=True, timeout=5,
+        )
+        return r.returncode == 0 and "True" in r.stdout
+    except Exception as e:
+        logger.debug("Windows clipboard image check failed: %s", e)
+    return False
+
+
+def _windows_save(dest: Path) -> bool:
+    """Extract clipboard image on native Windows via PowerShell → base64 PNG."""
+    ps = _get_ps_exe()
+    if ps is None:
+        logger.debug("No PowerShell found — Windows clipboard image paste unavailable")
+        return False
+    try:
+        r = subprocess.run(
+            [ps, "-NoProfile", "-NonInteractive", "-Command", _PS_EXTRACT_IMAGE],
+            capture_output=True, text=True, timeout=15,
+        )
+        if r.returncode != 0:
+            return False
+
+        b64_data = r.stdout.strip()
+        if not b64_data:
+            return False
+
+        png_bytes = base64.b64decode(b64_data)
+        dest.write_bytes(png_bytes)
+        return dest.exists() and dest.stat().st_size > 0
+
+    except Exception as e:
+        logger.debug("Windows clipboard image extraction failed: %s", e)
+        dest.unlink(missing_ok=True)
+    return False
+
+
 # ── Linux ────────────────────────────────────────────────────────────────

 def _is_wsl() -> bool:
@@ -142,24 +245,7 @@ def _linux_save(dest: Path) -> bool:


 # ── WSL2 (powershell.exe) ────────────────────────────────────────────────
-
-# PowerShell script: get clipboard image as base64-encoded PNG on stdout.
-# Using .NET System.Windows.Forms.Clipboard — always available on Windows.
-_PS_CHECK_IMAGE = (
-    "Add-Type -AssemblyName System.Windows.Forms;"
-    "[System.Windows.Forms.Clipboard]::ContainsImage()"
-)
-
-_PS_EXTRACT_IMAGE = (
-    "Add-Type -AssemblyName System.Windows.Forms;"
-    "Add-Type -AssemblyName System.Drawing;"
-    "$img = [System.Windows.Forms.Clipboard]::GetImage();"
-    "if ($null -eq $img) { exit 1 }"
-    "$ms = New-Object System.IO.MemoryStream;"
-    "$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
-    "[System.Convert]::ToBase64String($ms.ToArray())"
-)
-
+# Reuses _PS_CHECK_IMAGE / _PS_EXTRACT_IMAGE defined above.

 def _wsl_has_image() -> bool:
    """Check if Windows clipboard has an image (via powershell.exe)."""
@@ -293,16 +293,8 @@ def _resolve_config_gates() -> set[str]:
    if not gated:
        return set()
    try:
-        import yaml
-        config_path = os.path.join(
-            os.getenv("HERMES_HOME", os.path.expanduser("~/.hermes")),
-            "config.yaml",
-        )
-        if os.path.exists(config_path):
-            with open(config_path, encoding="utf-8") as f:
-                cfg = yaml.safe_load(f) or {}
-        else:
-            cfg = {}
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
    except Exception:
        return set()
    result: set[str] = set()
@@ -366,21 +358,46 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
    for cmd in COMMAND_REGISTRY:
        if not _is_gateway_available(cmd, overrides):
            continue
-        tg_name = cmd.name.replace("-", "_")
-        result.append((tg_name, cmd.description))
+        tg_name = _sanitize_telegram_name(cmd.name)
+        if tg_name:
+            result.append((tg_name, cmd.description))
    return result


-_TG_NAME_LIMIT = 32
+_CMD_NAME_LIMIT = 32
+"""Max command name length shared by Telegram and Discord."""
+
+# Backward-compat alias — tests and external code may reference the old name.
+_TG_NAME_LIMIT = _CMD_NAME_LIMIT
+
+# Telegram Bot API allows only lowercase a-z, 0-9, and underscores in
+# command names.  This regex strips everything else after initial conversion.
+_TG_INVALID_CHARS = re.compile(r"[^a-z0-9_]")
+_TG_MULTI_UNDERSCORE = re.compile(r"_{2,}")


-def _clamp_telegram_names(
+def _sanitize_telegram_name(raw: str) -> str:
+    """Convert a command/skill/plugin name to a valid Telegram command name.
+
+    Telegram requires: 1-32 chars, lowercase a-z, digits 0-9, underscores only.
+    Steps: lowercase → replace hyphens with underscores → strip all other
+    invalid characters → collapse consecutive underscores → strip leading/
+    trailing underscores.
+    """
+    name = raw.lower().replace("-", "_")
+    name = _TG_INVALID_CHARS.sub("", name)
+    name = _TG_MULTI_UNDERSCORE.sub("_", name)
+    return name.strip("_")
+
+
+def _clamp_command_names(
    entries: list[tuple[str, str]],
    reserved: set[str],
 ) -> list[tuple[str, str]]:
-    """Enforce Telegram's 32-char command name limit with collision avoidance.
+    """Enforce 32-char command name limit with collision avoidance.

-    Names exceeding 32 chars are truncated.  If truncation creates a duplicate
+    Both Telegram and Discord cap slash command names at 32 characters.
+    Names exceeding the limit are truncated.  If truncation creates a duplicate
    (against *reserved* names or earlier entries in the same batch), the name is
    shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
    If all 10 digit slots are taken the entry is silently dropped.
@@ -388,10 +405,10 @@ def _clamp_telegram_names(
    used: set[str] = set(reserved)
    result: list[tuple[str, str]] = []
    for name, desc in entries:
-        if len(name) > _TG_NAME_LIMIT:
-            candidate = name[:_TG_NAME_LIMIT]
+        if len(name) > _CMD_NAME_LIMIT:
+            candidate = name[:_CMD_NAME_LIMIT]
            if candidate in used:
-                prefix = name[:_TG_NAME_LIMIT - 1]
+                prefix = name[:_CMD_NAME_LIMIT - 1]
                for digit in range(10):
                    candidate = f"{prefix}{digit}"
                    if candidate not in used:
@@ -407,6 +424,129 @@ def _clamp_telegram_names(
    return result


+# Backward-compat alias.
+_clamp_telegram_names = _clamp_command_names
+
+
+# ---------------------------------------------------------------------------
+# Shared skill/plugin collection for gateway platforms
+# ---------------------------------------------------------------------------
+
+def _collect_gateway_skill_entries(
+    platform: str,
+    max_slots: int,
+    reserved_names: set[str],
+    desc_limit: int = 100,
+    sanitize_name: "Callable[[str], str] | None" = None,
+) -> tuple[list[tuple[str, str, str]], int]:
+    """Collect plugin + skill entries for a gateway platform.
+
+    Priority order:
+      1. Plugin slash commands (take precedence over skills)
+      2. Built-in skill commands (fill remaining slots, alphabetical)
+
+    Only skills are trimmed when the cap is reached.
+    Hub-installed skills are excluded.  Per-platform disabled skills are
+    excluded.
+
+    Args:
+        platform: Platform identifier for per-platform skill filtering
+            (``"telegram"``, ``"discord"``, etc.).
+        max_slots: Maximum number of entries to return (remaining slots after
+            built-in/core commands).
+        reserved_names: Names already taken by built-in commands.  Mutated
+            in-place as new names are added.
+        desc_limit: Max description length (40 for Telegram, 100 for Discord).
+        sanitize_name: Optional name transform applied before clamping, e.g.
+            :func:`_sanitize_telegram_name` for Telegram.  May return an
+            empty string to signal "skip this entry".
+
+    Returns:
+        ``(entries, hidden_count)`` where *entries* is a list of
+        ``(name, description, cmd_key)`` triples and *hidden_count* is the
+        number of skill entries dropped due to the cap.  ``cmd_key`` is the
+        original ``/skill-name`` key from :func:`get_skill_commands`.
+    """
+    all_entries: list[tuple[str, str, str]] = []
+
+    # --- Tier 1: Plugin slash commands (never trimmed) ---------------------
+    plugin_pairs: list[tuple[str, str]] = []
+    try:
+        from hermes_cli.plugins import get_plugin_manager
+        pm = get_plugin_manager()
+        plugin_cmds = getattr(pm, "_plugin_commands", {})
+        for cmd_name in sorted(plugin_cmds):
+            name = sanitize_name(cmd_name) if sanitize_name else cmd_name
+            if not name:
+                continue
+            desc = "Plugin command"
+            if len(desc) > desc_limit:
+                desc = desc[:desc_limit - 3] + "..."
+            plugin_pairs.append((name, desc))
+    except Exception:
+        pass
+
+    plugin_pairs = _clamp_command_names(plugin_pairs, reserved_names)
+    reserved_names.update(n for n, _ in plugin_pairs)
+    # Plugins have no cmd_key — use empty string as placeholder
+    for n, d in plugin_pairs:
+        all_entries.append((n, d, ""))
+
+    # --- Tier 2: Built-in skill commands (trimmed at cap) -----------------
+    _platform_disabled: set[str] = set()
+    try:
+        from agent.skill_utils import get_disabled_skill_names
+        _platform_disabled = get_disabled_skill_names(platform=platform)
+    except Exception:
+        pass
+
+    skill_triples: list[tuple[str, str, str]] = []
+    try:
+        from agent.skill_commands import get_skill_commands
+        from tools.skills_tool import SKILLS_DIR
+        _skills_dir = str(SKILLS_DIR.resolve())
+        _hub_dir = str((SKILLS_DIR / ".hub").resolve())
+        skill_cmds = get_skill_commands()
+        for cmd_key in sorted(skill_cmds):
+            info = skill_cmds[cmd_key]
+            skill_path = info.get("skill_md_path", "")
+            if not skill_path.startswith(_skills_dir):
+                continue
+            if skill_path.startswith(_hub_dir):
+                continue
+            skill_name = info.get("name", "")
+            if skill_name in _platform_disabled:
+                continue
+            raw_name = cmd_key.lstrip("/")
+            name = sanitize_name(raw_name) if sanitize_name else raw_name
+            if not name:
+                continue
+            desc = info.get("description", "")
+            if len(desc) > desc_limit:
+                desc = desc[:desc_limit - 3] + "..."
+            skill_triples.append((name, desc, cmd_key))
+    except Exception:
+        pass
+
+    # Clamp names; _clamp_command_names works on (name, desc) pairs so we
+    # need to zip/unzip.
+    skill_pairs = [(n, d) for n, d, _ in skill_triples]
+    key_by_pair = {(n, d): k for n, d, k in skill_triples}
+    skill_pairs = _clamp_command_names(skill_pairs, reserved_names)
+
+    # Skills fill remaining slots — only tier that gets trimmed
+    remaining = max(0, max_slots - len(all_entries))
+    hidden_count = max(0, len(skill_pairs) - remaining)
+    for n, d in skill_pairs[:remaining]:
+        all_entries.append((n, d, key_by_pair.get((n, d), "")))
+
+    return all_entries[:max_slots], hidden_count
+
+
+# ---------------------------------------------------------------------------
+# Platform-specific wrappers
+# ---------------------------------------------------------------------------
+
 def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]:
    """Return Telegram menu commands capped to the Bot API limit.

@@ -425,80 +565,52 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
        skill commands omitted due to the cap.
    """
    core_commands = list(telegram_bot_commands())
-    # Reserve core names so plugin/skill truncation can't collide with them
    reserved_names = {n for n, _ in core_commands}
    all_commands = list(core_commands)

-    # Plugin slash commands get priority over skills
-    plugin_entries: list[tuple[str, str]] = []
-    try:
-        from hermes_cli.plugins import get_plugin_manager
-        pm = get_plugin_manager()
-        plugin_cmds = getattr(pm, "_plugin_commands", {})
-        for cmd_name in sorted(plugin_cmds):
-            tg_name = cmd_name.replace("-", "_")
-            desc = "Plugin command"
-            if len(desc) > 40:
-                desc = desc[:37] + "..."
-            plugin_entries.append((tg_name, desc))
-    except Exception:
-        pass
-
-    # Clamp plugin names to 32 chars with collision avoidance
-    plugin_entries = _clamp_telegram_names(plugin_entries, reserved_names)
-    reserved_names.update(n for n, _ in plugin_entries)
-    all_commands.extend(plugin_entries)
-
-    # Load per-platform disabled skills so they don't consume menu slots.
-    # get_skill_commands() already filters the *global* disabled list, but
-    # per-platform overrides (skills.platform_disabled.telegram) were never
-    # applied here — that's what this block fixes.
-    _platform_disabled: set[str] = set()
-    try:
-        from agent.skill_utils import get_disabled_skill_names
-        _platform_disabled = get_disabled_skill_names(platform="telegram")
-    except Exception:
-        pass
-
-    # Remaining slots go to built-in skill commands (not hub-installed).
-    skill_entries: list[tuple[str, str]] = []
-    try:
-        from agent.skill_commands import get_skill_commands
-        from tools.skills_tool import SKILLS_DIR
-        _skills_dir = str(SKILLS_DIR.resolve())
-        _hub_dir = str((SKILLS_DIR / ".hub").resolve())
-        skill_cmds = get_skill_commands()
-        for cmd_key in sorted(skill_cmds):
-            info = skill_cmds[cmd_key]
-            skill_path = info.get("skill_md_path", "")
-            if not skill_path.startswith(_skills_dir):
-                continue
-            if skill_path.startswith(_hub_dir):
-                continue
-            # Skip skills disabled for telegram
-            skill_name = info.get("name", "")
-            if skill_name in _platform_disabled:
-                continue
-            name = cmd_key.lstrip("/").replace("-", "_")
-            desc = info.get("description", "")
-            # Keep descriptions short — setMyCommands has an undocumented
-            # total payload limit.  40 chars fits 100 commands safely.
-            if len(desc) > 40:
-                desc = desc[:37] + "..."
-            skill_entries.append((name, desc))
-    except Exception:
-        pass
-
-    # Clamp skill names to 32 chars with collision avoidance
-    skill_entries = _clamp_telegram_names(skill_entries, reserved_names)
-
-    # Skills fill remaining slots — they're the only tier that gets trimmed
    remaining_slots = max(0, max_commands - len(all_commands))
-    hidden_count = max(0, len(skill_entries) - remaining_slots)
-    all_commands.extend(skill_entries[:remaining_slots])
+    entries, hidden_count = _collect_gateway_skill_entries(
+        platform="telegram",
+        max_slots=remaining_slots,
+        reserved_names=reserved_names,
+        desc_limit=40,
+        sanitize_name=_sanitize_telegram_name,
+    )
+    # Drop the cmd_key — Telegram only needs (name, desc) pairs.
+    all_commands.extend((n, d) for n, d, _k in entries)
    return all_commands[:max_commands], hidden_count


+def discord_skill_commands(
+    max_slots: int,
+    reserved_names: set[str],
+) -> tuple[list[tuple[str, str, str]], int]:
+    """Return skill entries for Discord slash command registration.
+
+    Same priority and filtering logic as :func:`telegram_menu_commands`
+    (plugins > skills, hub excluded, per-platform disabled excluded), but
+    adapted for Discord's constraints:
+
+    - Hyphens are allowed in names (no ``-`` → ``_`` sanitization)
+    - Descriptions capped at 100 chars (Discord's per-field max)
+
+    Args:
+        max_slots: Available command slots (100 minus existing built-in count).
+        reserved_names: Names of already-registered built-in commands.
+
+    Returns:
+        ``(entries, hidden_count)`` where *entries* is a list of
+        ``(discord_name, description, cmd_key)`` triples.  ``cmd_key`` is
+        the original ``/skill-name`` key needed for the slash handler callback.
+    """
+    return _collect_gateway_skill_entries(
+        platform="discord",
+        max_slots=max_slots,
+        reserved_names=set(reserved_names),  # copy — don't mutate caller's set
+        desc_limit=100,
+    )
+
+
 def slack_subcommand_map() -> dict[str, str]:
    """Return subcommand -> /command mapping for Slack /hermes handler.

@@ -42,7 +42,7 @@ _EXTRA_ENV_KEYS = frozenset({
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
-    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_HOME_ROOM",
+    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
 })
 import yaml
@@ -416,6 +416,7 @@ DEFAULT_CONFIG = {
        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
        "local": {
            "model": "base",  # tiny, base, small, medium, large-v3
+            "language": "",  # auto-detect by default; set to "en", "es", "fr", etc. to force
        },
        "openai": {
            "model": "whisper-1",  # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
@@ -590,6 +591,30 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "GOOGLE_API_KEY": {
+        "description": "Google AI Studio API key (also recognized as GEMINI_API_KEY)",
+        "prompt": "Google AI Studio API key",
+        "url": "https://aistudio.google.com/app/apikey",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "GEMINI_API_KEY": {
+        "description": "Google AI Studio API key (alias for GOOGLE_API_KEY)",
+        "prompt": "Gemini API key",
+        "url": "https://aistudio.google.com/app/apikey",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "GEMINI_BASE_URL": {
+        "description": "Google AI Studio base URL override",
+        "prompt": "Gemini base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "GLM_API_KEY": {
        "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
        "prompt": "Z.AI / GLM API key",
@@ -844,6 +869,13 @@ OPTIONAL_ENV_VARS = {
        "password": True,
        "category": "tool",
    },
+    "FIRECRAWL_BROWSER_TTL": {
+        "description": "Firecrawl browser session TTL in seconds (optional, default 300)",
+        "prompt": "Browser session TTL (seconds)",
+        "tools": ["browser_navigate", "browser_click"],
+        "password": False,
+        "category": "tool",
+    },
    "CAMOFOX_URL": {
        "description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
        "prompt": "Camofox server URL",
@@ -1048,6 +1080,14 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
+    "MATRIX_DEVICE_ID": {
+        "description": "Stable Matrix device ID for E2EE persistence across restarts (e.g. HERMES_BOT)",
+        "prompt": "Matrix device ID (stable across restarts)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
    "GATEWAY_ALLOW_ALL_USERS": {
        "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
        "prompt": "Allow all users (true/false)",
@@ -1240,6 +1280,43 @@ def get_missing_config_fields() -> List[Dict[str, Any]]:
    return missing


+def get_missing_skill_config_vars() -> List[Dict[str, Any]]:
+    """Return skill-declared config vars that are missing or empty in config.yaml.
+
+    Scans all enabled skills for ``metadata.hermes.config`` entries, then checks
+    which ones are absent or empty under ``skills.config.<key>`` in the user's
+    config.yaml.  Returns a list of dicts suitable for prompting.
+    """
+    try:
+        from agent.skill_utils import discover_all_skill_config_vars, SKILL_CONFIG_PREFIX
+    except Exception:
+        return []
+
+    all_vars = discover_all_skill_config_vars()
+    if not all_vars:
+        return []
+
+    config = load_config()
+    missing: List[Dict[str, Any]] = []
+    for var in all_vars:
+        # Skill config is stored under skills.config.<logical_key>
+        storage_key = f"{SKILL_CONFIG_PREFIX}.{var['key']}"
+        parts = storage_key.split(".")
+        current = config
+        value = None
+        for part in parts:
+            if isinstance(current, dict) and part in current:
+                current = current[part]
+                value = current
+            else:
+                value = None
+                break
+        # Missing = key doesn't exist or is empty string
+        if value is None or (isinstance(value, str) and not value.strip()):
+            missing.append(var)
+    return missing
+
+
 def check_config_version() -> Tuple[int, int]:
    """
    Check config version.
@@ -1671,7 +1748,50 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
        config = load_config()
        config["_config_version"] = latest_ver
        save_config(config)
-    
+
+    # ── Skill-declared config vars ──────────────────────────────────────
+    # Skills can declare config.yaml settings they need via
+    # metadata.hermes.config in their SKILL.md frontmatter.
+    # Prompt for any that are missing/empty.
+    missing_skill_config = get_missing_skill_config_vars()
+    if missing_skill_config and interactive and not quiet:
+        print(f"\n  {len(missing_skill_config)} skill setting(s) not configured:")
+        for var in missing_skill_config:
+            skill_name = var.get("skill", "unknown")
+            print(f"    • {var['key']} — {var['description']} (from skill: {skill_name})")
+        print()
+        try:
+            answer = input("  Configure skill settings? [y/N]: ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            answer = "n"
+
+        if answer in ("y", "yes"):
+            print()
+            config = load_config()
+            try:
+                from agent.skill_utils import SKILL_CONFIG_PREFIX
+            except Exception:
+                SKILL_CONFIG_PREFIX = "skills.config"
+            for var in missing_skill_config:
+                default = var.get("default", "")
+                default_hint = f" (default: {default})" if default else ""
+                value = input(f"  {var['prompt']}{default_hint}: ").strip()
+                if not value and default:
+                    value = str(default)
+                if value:
+                    storage_key = f"{SKILL_CONFIG_PREFIX}.{var['key']}"
+                    _set_nested(config, storage_key, value)
+                    results["config_added"].append(var["key"])
+                    print(f"  ✓ Saved {var['key']} = {value}")
+                else:
+                    results["warnings"].append(
+                        f"Skipped {var['key']} — skill '{var.get('skill', '?')}' may ask for it later"
+                    )
+                print()
+            save_config(config)
+        else:
+            print("  Set later with: hermes config set <key> <value>")
+
    return results


@@ -1762,6 +1882,24 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:



+def read_raw_config() -> Dict[str, Any]:
+    """Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
+
+    Returns the raw YAML dict, or ``{}`` if the file doesn't exist or can't
+    be parsed.  Use this for lightweight config reads where you just need a
+    single value and don't want the overhead of ``load_config()``'s deep-merge
+    + migration pipeline.
+    """
+    try:
+        config_path = get_config_path()
+        if config_path.exists():
+            with open(config_path, encoding="utf-8") as f:
+                return yaml.safe_load(f) or {}
+    except Exception:
+        pass
+    return {}
+
+
 def load_config() -> Dict[str, Any]:
    """Load configuration from ~/.hermes/config.yaml."""
    import copy
@@ -1813,8 +1951,8 @@ _FALLBACK_COMMENT = """
 #
 # Supported providers:
 #   openrouter   (OPENROUTER_API_KEY)  — routes to any model
-#   openai-codex (OAuth — hermes login) — OpenAI Codex
-#   nous         (OAuth — hermes login) — Nous Portal
+#   openai-codex (OAuth — hermes auth) — OpenAI Codex
+#   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
@@ -1856,8 +1994,8 @@ _COMMENTED_SECTIONS = """
 #
 # Supported providers:
 #   openrouter   (OPENROUTER_API_KEY)  — routes to any model
-#   openai-codex (OAuth — hermes login) — OpenAI Codex
-#   nous         (OAuth — hermes login) — Nous Portal
+#   openai-codex (OAuth — hermes auth) — OpenAI Codex
+#   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
@@ -2325,6 +2463,23 @@ def show_config():
    print(f"  Telegram:     {'configured' if telegram_token else color('not configured', Colors.DIM)}")
    print(f"  Discord:      {'configured' if discord_token else color('not configured', Colors.DIM)}")
    
+    # Skill config
+    try:
+        from agent.skill_utils import discover_all_skill_config_vars, resolve_skill_config_values
+        skill_vars = discover_all_skill_config_vars()
+        if skill_vars:
+            resolved = resolve_skill_config_values(skill_vars)
+            print()
+            print(color("◆ Skill Settings", Colors.CYAN, Colors.BOLD))
+            for var in skill_vars:
+                key = var["key"]
+                value = resolved.get(key, "")
+                skill_name = var.get("skill", "")
+                display_val = str(value) if value else color("(not set)", Colors.DIM)
+                print(f"  {key:<20s} {display_val}  {color(f'[{skill_name}]', Colors.DIM)}")
+    except Exception:
+        pass
+
    print()
    print(color("─" * 60, Colors.DIM))
    print(color("  hermes config edit     # Edit config file", Colors.DIM))
@@ -2384,7 +2539,7 @@ def set_config_value(key: str, value: str):
        'TINKER_API_KEY',
    ]
    
-    if key.upper() in api_keys or key.upper().endswith('_API_KEY') or key.upper().endswith('_TOKEN') or key.upper().startswith('TERMINAL_SSH'):
+    if key.upper() in api_keys or key.upper().endswith(('_API_KEY', '_TOKEN')) or key.upper().startswith('TERMINAL_SSH'):
        save_env_value(key.upper(), value)
        print(f"✓ Set {key} in {get_env_path()}")
        return
@@ -836,7 +836,7 @@ def run_doctor(args):
                get_honcho_client(hcfg)
                check_ok(
                    "Honcho connected",
-                    f"workspace={hcfg.workspace_id} mode={hcfg.memory_mode} freq={hcfg.write_frequency}",
+                    f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
                )
            except Exception as _e:
                check_fail("Honcho connection failed", str(_e))
@@ -920,8 +920,8 @@ def run_doctor(args):
                        pass
    except ImportError:
        pass
-    except Exception as _e:
-        logger.debug("Profile health check failed: %s", _e)
+    except Exception:
+        pass

    # =========================================================================
    # Summary
@@ -267,6 +267,34 @@ def _profile_suffix() -> str:
    return hashlib.sha256(str(home).encode()).hexdigest()[:8]


+def _profile_arg(hermes_home: str | None = None) -> str:
+    """Return ``--profile <name>`` only when HERMES_HOME is a named profile.
+
+    For ``~/.hermes/profiles/<name>``, returns ``"--profile <name>"``.
+    For the default profile or hash-based custom paths, returns the empty string.
+
+    Args:
+        hermes_home: Optional explicit HERMES_HOME path. Defaults to the current
+            ``get_hermes_home()`` value. Should be passed when generating a
+            service definition for a different user (e.g. system service).
+    """
+    import re
+    from pathlib import Path as _Path
+    home = Path(hermes_home or str(get_hermes_home())).resolve()
+    default = (_Path.home() / ".hermes").resolve()
+    if home == default:
+        return ""
+    profiles_root = (default / "profiles").resolve()
+    try:
+        rel = home.relative_to(profiles_root)
+        parts = rel.parts
+        if len(parts) == 1 and re.match(r"^[a-z0-9][a-z0-9_-]{0,63}$", parts[0]):
+            return f"--profile {parts[0]}"
+    except ValueError:
+        pass
+    return ""
+
+
 def get_service_name() -> str:
    """Derive a systemd service name scoped to this HERMES_HOME.

@@ -626,6 +654,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
        hermes_home = _hermes_home_for_target_user(home_dir)
+        profile_arg = _profile_arg(hermes_home)
        path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
        path_entries.extend(common_bin_paths)
        sane_path = ":".join(path_entries)
@@ -640,7 +669,7 @@ StartLimitBurst=5
 Type=simple
 User={username}
 Group={group_name}
-ExecStart={python_path} -m hermes_cli.main gateway run --replace
+ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace
 WorkingDirectory={working_dir}
 Environment="HOME={home_dir}"
 Environment="USER={username}"
@@ -661,6 +690,7 @@ WantedBy=multi-user.target
 """

    hermes_home = str(get_hermes_home().resolve())
+    profile_arg = _profile_arg(hermes_home)
    path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
    path_entries.extend(common_bin_paths)
    sane_path = ":".join(path_entries)
@@ -672,7 +702,7 @@ StartLimitBurst=5

 [Service]
 Type=simple
-ExecStart={python_path} -m hermes_cli.main gateway run --replace
+ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace
 WorkingDirectory={working_dir}
 Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
@@ -965,6 +995,7 @@ def generate_launchd_plist() -> str:
    log_dir = get_hermes_home() / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)
    label = get_launchd_label()
+    profile_arg = _profile_arg(hermes_home)
    # Build a sane PATH for the launchd plist.  launchd provides only a
    # minimal default (/usr/bin:/bin:/usr/sbin:/sbin) which misses Homebrew,
    # nvm, cargo, etc.  We prepend venv/bin and node_modules/.bin (matching
@@ -986,21 +1017,32 @@ def generate_launchd_plist() -> str:
        dict.fromkeys(priority_dirs + [p for p in os.environ.get("PATH", "").split(":") if p])
    )

+    # Build ProgramArguments array, including --profile when using a named profile
+    prog_args = [
+        f"<string>{python_path}</string>",
+        "<string>-m</string>",
+        "<string>hermes_cli.main</string>",
+    ]
+    if profile_arg:
+        for part in profile_arg.split():
+            prog_args.append(f"<string>{part}</string>")
+    prog_args.extend([
+        "<string>gateway</string>",
+        "<string>run</string>",
+        "<string>--replace</string>",
+    ])
+    prog_args_xml = "\n        ".join(prog_args)
+
    return f"""<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
    <key>Label</key>
    <string>{label}</string>
-    
+
    <key>ProgramArguments</key>
    <array>
-        <string>{python_path}</string>
-        <string>-m</string>
-        <string>hermes_cli.main</string>
-        <string>gateway</string>
-        <string>run</string>
-        <string>--replace</string>
+        {prog_args_xml}
    </array>
    
    <key>WorkingDirectory</key>
@@ -1121,7 +1163,7 @@ def launchd_start():
    try:
        subprocess.run(["launchctl", "kickstart", f"{_launchd_domain()}/{label}"], check=True, timeout=30)
    except subprocess.CalledProcessError as e:
-        if e.returncode != 3:
+        if e.returncode not in (3, 113):
            raise
        print("↻ launchd job was unloaded; reloading service definition")
        subprocess.run(["launchctl", "bootstrap", _launchd_domain(), str(plist_path)], check=True, timeout=30)
@@ -1183,7 +1225,7 @@ def launchd_restart():
        subprocess.run(["launchctl", "kickstart", "-k", target], check=True, timeout=90)
        print("✓ Service restarted")
    except subprocess.CalledProcessError as e:
-        if e.returncode != 3:
+        if e.returncode not in (3, 113):
            raise
        # Job not loaded — bootstrap and start fresh
        print("↻ launchd job was unloaded; reloading")
@@ -1803,8 +1845,7 @@ def _setup_signal():
        print_warning("signal-cli not found on PATH.")
        print_info("  Signal requires signal-cli running as an HTTP daemon.")
        print_info("  Install options:")
-        print_info("    Linux:  sudo apt install signal-cli")
-        print_info("            or download from https://github.com/AsamK/signal-cli")
+        print_info("    Linux:  download from https://github.com/AsamK/signal-cli/releases")
        print_info("    macOS:  brew install signal-cli")
        print_info("    Docker: bbernhard/signal-cli-rest-api")
        print()
@@ -15,7 +15,6 @@ Usage examples::
    hermes logs --since 30m -f     # follow, starting 30 min ago
 """

-import os
 import re
 import sys
 import time
@@ -921,6 +921,7 @@ def select_provider_and_model(args=None):
        "copilot-acp": "GitHub Copilot ACP",
        "copilot": "GitHub Copilot",
        "anthropic": "Anthropic",
+        "gemini": "Google AI Studio",
        "zai": "Z.AI / GLM",
        "kimi-coding": "Kimi / Moonshot",
        "minimax": "MiniMax",
@@ -952,6 +953,7 @@ def select_provider_and_model(args=None):

    extended_providers = [
        ("copilot-acp", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
+        ("gemini", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
        ("zai", "Z.AI / GLM (Zhipu AI direct API)"),
        ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
        ("minimax", "MiniMax (global direct API)"),
@@ -1055,7 +1057,7 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
-    elif selected_provider in ("zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
+    elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
        _model_flow_api_key_provider(config, selected_provider, current_model)


@@ -1152,7 +1154,7 @@ def _model_flow_nous(config, current_model="", args=None):
    from hermes_cli.auth import (
        get_provider_auth_state, _prompt_model_selection, _save_model_choice,
        _update_config_for_provider, resolve_nous_runtime_credentials,
-        fetch_nous_models, AuthError, format_auth_error,
+        AuthError, format_auth_error,
        _login_nous, PROVIDER_REGISTRY,
    )
    from hermes_cli.config import get_env_value, save_config, save_env_value
@@ -1193,14 +1195,15 @@ def _model_flow_nous(config, current_model="", args=None):
    # Already logged in — use curated model list (same as OpenRouter defaults).
    # The live /models endpoint returns hundreds of models; the curated list
    # shows only agentic models users recognize from OpenRouter.
-    from hermes_cli.models import _PROVIDER_MODELS, get_pricing_for_provider
+    from hermes_cli.models import (
+        _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+        check_nous_free_tier, partition_nous_models_by_tier,
+    )
    model_ids = _PROVIDER_MODELS.get("nous", [])
    if not model_ids:
        print("No curated models available for Nous Portal.")
        return

-    print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
-
    # Verify credentials are still valid (catches expired sessions early)
    try:
        creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
@@ -1226,7 +1229,44 @@ def _model_flow_nous(config, current_model="", args=None):
    # Fetch live pricing (non-blocking — returns empty dict on failure)
    pricing = get_pricing_for_provider("nous")

-    selected = _prompt_model_selection(model_ids, current_model=current_model, pricing=pricing)
+    # Check if user is on free tier
+    free_tier = check_nous_free_tier()
+
+    # For both tiers: apply the allowlist filter first (removes non-allowlisted
+    # free models and allowlist models that aren't actually free).
+    # Then for free users: partition remaining models into selectable/unavailable.
+    model_ids = filter_nous_free_models(model_ids, pricing)
+    unavailable_models: list[str] = []
+    if free_tier:
+        model_ids, unavailable_models = partition_nous_models_by_tier(model_ids, pricing, free_tier=True)
+
+    if not model_ids and not unavailable_models:
+        print("No models available for Nous Portal after filtering.")
+        return
+
+    # Resolve portal URL for upgrade links (may differ on staging)
+    _nous_portal_url = ""
+    try:
+        _nous_state = get_provider_auth_state("nous")
+        if _nous_state:
+            _nous_portal_url = _nous_state.get("portal_base_url", "")
+    except Exception:
+        pass
+
+    if free_tier and not model_ids:
+        print("No free models currently available.")
+        if unavailable_models:
+            from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
+            _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+            print(f"Upgrade at {_url} to access paid models.")
+        return
+
+    print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
+
+    selected = _prompt_model_selection(
+        model_ids, current_model=current_model, pricing=pricing,
+        unavailable_models=unavailable_models, portal_url=_nous_portal_url,
+    )
    if selected:
        _save_model_choice(selected)
        # Reactivate Nous as the provider and update config
@@ -1274,7 +1314,6 @@ def _model_flow_openai_codex(config, current_model=""):
        PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
    )
    from hermes_cli.codex_models import get_codex_model_ids
-    from hermes_cli.config import get_env_value, save_env_value
    import argparse

    status = get_codex_auth_status()
@@ -1292,12 +1331,21 @@ def _model_flow_openai_codex(config, current_model=""):
            return

    _codex_token = None
+    # Prefer credential pool (where `hermes auth` stores device_code tokens),
+    # fall back to legacy provider state.
    try:
-        from hermes_cli.auth import resolve_codex_runtime_credentials
-        _codex_creds = resolve_codex_runtime_credentials()
-        _codex_token = _codex_creds.get("api_key")
+        _codex_status = get_codex_auth_status()
+        if _codex_status.get("logged_in"):
+            _codex_token = _codex_status.get("api_key")
    except Exception:
        pass
+    if not _codex_token:
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+            _codex_creds = resolve_codex_runtime_credentials()
+            _codex_token = _codex_creds.get("api_key")
+        except Exception:
+            pass

    codex_models = get_codex_model_ids(access_token=_codex_token)

@@ -1318,7 +1366,7 @@ def _model_flow_custom(config):
    so it appears in the provider menu on subsequent runs.
    """
    from hermes_cli.auth import _save_model_choice, deactivate_provider
-    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.config import get_env_value, load_config, save_config

    current_url = get_env_value("OPENAI_BASE_URL") or ""
    current_key = get_env_value("OPENAI_API_KEY") or ""
@@ -1580,7 +1628,7 @@ def _model_flow_named_custom(config, provider_info):
    Otherwise probes the endpoint's /models API to let the user pick one.
    """
    from hermes_cli.auth import _save_model_choice, deactivate_provider
-    from hermes_cli.config import save_env_value, load_config, save_config
+    from hermes_cli.config import load_config, save_config
    from hermes_cli.models import fetch_api_models

    name = provider_info["name"]
@@ -1790,7 +1838,7 @@ def _model_flow_copilot(config, current_model=""):
        deactivate_provider,
        resolve_api_key_provider_credentials,
    )
-    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.config import save_env_value, load_config, save_config
    from hermes_cli.models import (
        fetch_api_models,
        fetch_github_model_catalog,
@@ -2209,24 +2257,37 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        save_env_value(base_url_env, override)
        effective_base = override

-    # Model selection — try live /models endpoint first, fall back to defaults.
-    # Providers with large live catalogs (100+ models) use a curated list instead
-    # so users see familiar model names rather than an overwhelming dump.
+    # Model selection — resolution order:
+    #   1. models.dev registry (cached, filtered for agentic/tool-capable models)
+    #   2. Curated static fallback list (offline insurance)
+    #   3. Live /models endpoint probe (small providers without models.dev data)
    curated = _PROVIDER_MODELS.get(provider_id, [])
-    if curated and len(curated) >= 8:
+
+    # Try models.dev first — returns tool-capable models, filtered for noise
+    mdev_models: list = []
+    try:
+        from agent.models_dev import list_agentic_models
+        mdev_models = list_agentic_models(provider_id)
+    except Exception:
+        pass
+
+    if mdev_models:
+        model_list = mdev_models
+        print(f"  Found {len(model_list)} model(s) from models.dev registry")
+    elif curated and len(curated) >= 8:
        # Curated list is substantial — use it directly, skip live probe
-        live_models = None
+        model_list = curated
+        print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
    else:
        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
        live_models = fetch_api_models(api_key_for_probe, effective_base)
-
-    if live_models and len(live_models) >= len(curated):
-        model_list = live_models
-        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
-    else:
-        model_list = curated
-        if model_list:
-            print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
+        if live_models and len(live_models) >= len(curated):
+            model_list = live_models
+            print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+        else:
+            model_list = curated
+            if model_list:
+                print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
        # else: no defaults either, will fall through to raw input

    if provider_id in {"opencode-zen", "opencode-go"}:
@@ -2368,8 +2429,6 @@ def _model_flow_anthropic(config, current_model=""):
    )
    from hermes_cli.models import _PROVIDER_MODELS

-    pconfig = PROVIDER_REGISTRY["anthropic"]
-
    # Check ALL credential sources
    existing_key = (
        get_env_value("ANTHROPIC_TOKEN")
@@ -3542,7 +3601,7 @@ def cmd_update(args):
        try:
            from hermes_cli.profiles import list_profiles, get_active_profile_name, seed_profile_skills
            active = get_active_profile_name()
-            other_profiles = [p for p in list_profiles() if not p.is_default and p.name != active]
+            other_profiles = [p for p in list_profiles() if p.name != active]
            if other_profiles:
                print()
                print("→ Syncing bundled skills to other profiles...")
@@ -3638,7 +3697,7 @@ def cmd_update(args):
        try:
            from hermes_cli.gateway import (
                is_macos, is_linux, _ensure_user_systemd_env,
-                get_systemd_linger_status, find_gateway_pids,
+                find_gateway_pids,
                _get_service_pids,
            )
            import signal as _signal
@@ -3794,7 +3853,7 @@ def cmd_profile(args):
    """Profile management — create, delete, list, switch, alias."""
    from hermes_cli.profiles import (
        list_profiles, create_profile, delete_profile, seed_profile_skills,
-        get_active_profile, set_active_profile, get_active_profile_name,
+        set_active_profile, get_active_profile_name,
        check_alias_collision, create_wrapper_script, remove_wrapper_script,
        _is_wrapper_dir_in_path, _get_wrapper_dir,
    )
@@ -3922,7 +3981,6 @@ def cmd_profile(args):
            print(f"  {name} chat               Start chatting")
            print(f"  {name} gateway start      Start the messaging gateway")
            if clone or clone_all:
-                from hermes_constants import get_hermes_home
                profile_dir_display = f"~/.hermes/profiles/{name}"
                print(f"\n  Edit {profile_dir_display}/.env for different API keys")
                print(f"  Edit {profile_dir_display}/SOUL.md for different personality")
@@ -4182,7 +4240,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
        default=None,
        help="Inference provider (default: auto)"
    )
@@ -4345,7 +4403,7 @@ For more help on a command:
    gateway_uninstall.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")

    # gateway setup
-    gateway_setup = gateway_subparsers.add_parser("setup", help="Configure messaging platforms")
+    gateway_subparsers.add_parser("setup", help="Configure messaging platforms")

    gateway_parser.set_defaults(func=cmd_gateway)
    
@@ -4620,10 +4678,10 @@ For more help on a command:
    config_subparsers = config_parser.add_subparsers(dest="config_command")
    
    # config show (default)
-    config_show = config_subparsers.add_parser("show", help="Show current configuration")
+    config_subparsers.add_parser("show", help="Show current configuration")
    
    # config edit
-    config_edit = config_subparsers.add_parser("edit", help="Open config file in editor")
+    config_subparsers.add_parser("edit", help="Open config file in editor")
    
    # config set
    config_set = config_subparsers.add_parser("set", help="Set a configuration value")
@@ -4631,16 +4689,16 @@ For more help on a command:
    config_set.add_argument("value", nargs="?", help="Value to set")
    
    # config path
-    config_path = config_subparsers.add_parser("path", help="Print config file path")
+    config_subparsers.add_parser("path", help="Print config file path")
    
    # config env-path
-    config_env = config_subparsers.add_parser("env-path", help="Print .env file path")
+    config_subparsers.add_parser("env-path", help="Print .env file path")
    
    # config check
-    config_check = config_subparsers.add_parser("check", help="Check for missing/outdated config")
+    config_subparsers.add_parser("check", help="Check for missing/outdated config")
    
    # config migrate
-    config_migrate = config_subparsers.add_parser("migrate", help="Update config with new options")
+    config_subparsers.add_parser("migrate", help="Update config with new options")
    
    config_parser.set_defaults(func=cmd_config)
    
@@ -4654,7 +4712,7 @@ For more help on a command:
    )
    pairing_sub = pairing_parser.add_subparsers(dest="pairing_action")

-    pairing_list_parser = pairing_sub.add_parser("list", help="Show pending + approved users")
+    pairing_sub.add_parser("list", help="Show pending + approved users")

    pairing_approve_parser = pairing_sub.add_parser("approve", help="Approve a pairing code")
    pairing_approve_parser.add_argument("platform", help="Platform name (telegram, discord, slack, whatsapp)")
@@ -4664,7 +4722,7 @@ For more help on a command:
    pairing_revoke_parser.add_argument("platform", help="Platform name")
    pairing_revoke_parser.add_argument("user_id", help="User ID to revoke")

-    pairing_clear_parser = pairing_sub.add_parser("clear-pending", help="Clear all pending codes")
+    pairing_sub.add_parser("clear-pending", help="Clear all pending codes")

    def cmd_pairing(args):
        from hermes_cli.pairing import pairing_command
@@ -4840,7 +4898,7 @@ For more help on a command:
    memory_sub = memory_parser.add_subparsers(dest="memory_command")
    memory_sub.add_parser("setup", help="Interactive provider selection and configuration")
    memory_sub.add_parser("status", help="Show current memory provider config")
-    memory_off_p = memory_sub.add_parser("off", help="Disable external provider (built-in only)")
+    memory_sub.add_parser("off", help="Disable external provider (built-in only)")

    def cmd_memory(args):
        sub = getattr(args, "memory_command", None)
@@ -5004,7 +5062,7 @@ For more help on a command:
    sessions_prune.add_argument("--source", help="Only prune sessions from this source")
    sessions_prune.add_argument("--yes", "-y", action="store_true", help="Skip confirmation")

-    sessions_stats = sessions_subparsers.add_parser("stats", help="Show session store statistics")
+    sessions_subparsers.add_parser("stats", help="Show session store statistics")

    sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title")
    sessions_rename.add_argument("session_id", help="Session ID to rename")
@@ -5364,7 +5422,7 @@ For more help on a command:
    )
    profile_subparsers = profile_parser.add_subparsers(dest="profile_action")

-    profile_list = profile_subparsers.add_parser("list", help="List all profiles")
+    profile_subparsers.add_parser("list", help="List all profiles")
    profile_use = profile_subparsers.add_parser("use", help="Set sticky default profile")
    profile_use.add_argument("profile_name", help="Profile name (or 'default')")

@@ -12,6 +12,8 @@ import os
 import sys
 from pathlib import Path

+from hermes_constants import get_hermes_home
+

 # ---------------------------------------------------------------------------
 # Curses-based interactive picker (same pattern as hermes tools)
@@ -275,7 +277,7 @@ def cmd_setup_provider(provider_name: str) -> None:
        config["memory"] = {}

    if hasattr(provider, "post_setup"):
-        hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
+        hermes_home = str(get_hermes_home())
        provider.post_setup(hermes_home, config)
        return

@@ -326,7 +328,7 @@ def cmd_setup(args) -> None:
    # If the provider has a post_setup hook, delegate entirely to it.
    # The hook handles its own config, connection test, and activation.
    if hasattr(provider, "post_setup"):
-        hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
+        hermes_home = str(get_hermes_home())
        provider.post_setup(hermes_home, config)
        return

@@ -336,7 +338,7 @@ def cmd_setup(args) -> None:
    if not isinstance(provider_config, dict):
        provider_config = {}

-    env_path = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / ".env"
+    env_path = get_hermes_home() / ".env"
    env_writes = {}

    if schema:
@@ -400,7 +402,7 @@ def cmd_setup(args) -> None:
    save_config(config)

    # Write non-secret config to provider's native location
-    hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
+    hermes_home = str(get_hermes_home())
    if provider_config and hasattr(provider, "save_config"):
        try:
            provider.save_config(provider_config, hermes_home)
@@ -8,8 +8,9 @@ Different LLM providers expect model identifiers in different formats:
  hyphens: ``claude-sonnet-4-6``.
 - **Copilot** expects bare names *with* dots preserved:
  ``claude-sonnet-4.6``.
- **OpenCode** (Zen & Go) follows the same dot-to-hyphen convention as
+- **OpenCode Zen** follows the same dot-to-hyphen convention as
  Anthropic: ``claude-sonnet-4-6``.
+- **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
 - **DeepSeek** only accepts two model identifiers:
  ``deepseek-chat`` and ``deepseek-reasoner``.
 - **Custom** and remaining providers pass the name through as-is.
@@ -41,6 +42,7 @@ _VENDOR_PREFIXES: dict[str, str] = {
    "o3": "openai",
    "o4": "openai",
    "gemini": "google",
+    "gemma": "google",
    "deepseek": "deepseek",
    "glm": "z-ai",
    "kimi": "moonshotai",
@@ -66,7 +68,6 @@ _AGGREGATOR_PROVIDERS: frozenset[str] = frozenset({
 _DOT_TO_HYPHEN_PROVIDERS: frozenset[str] = frozenset({
    "anthropic",
    "opencode-zen",
-    "opencode-go",
 })

 # Providers that want bare names with dots preserved.
@@ -77,6 +78,7 @@ _STRIP_VENDOR_ONLY_PROVIDERS: frozenset[str] = frozenset({

 # Providers whose own naming is authoritative -- pass through unchanged.
 _PASSTHROUGH_PROVIDERS: frozenset[str] = frozenset({
+    "gemini",
    "zai",
    "kimi-coding",
    "minimax",
@@ -21,22 +21,16 @@ OpenRouter variant suffixes (``:free``, ``:extended``, ``:fast``).
 from __future__ import annotations

 import logging
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import List, NamedTuple, Optional

 from hermes_cli.providers import (
-    ALIASES,
-    LABELS,
-    TRANSPORT_TO_API_MODE,
    determine_api_mode,
    get_label,
-    get_provider,
    is_aggregator,
-    normalize_provider,
    resolve_provider_full,
 )
 from hermes_cli.model_normalize import (
-    detect_vendor,
    normalize_model_for_provider,
 )
 from agent.models_dev import (
@@ -339,12 +333,37 @@ def resolve_alias(
    return None


+def get_authenticated_provider_slugs(
+    current_provider: str = "",
+    user_providers: dict = None,
+) -> list[str]:
+    """Return slugs of providers that have credentials.
+
+    Uses ``list_authenticated_providers()`` which is backed by the models.dev
+    in-memory cache (1 hr TTL) — no extra network cost.
+    """
+    try:
+        providers = list_authenticated_providers(
+            current_provider=current_provider,
+            user_providers=user_providers,
+            max_models=0,
+        )
+        return [p["slug"] for p in providers]
+    except Exception:
+        return []
+
+
 def _resolve_alias_fallback(
    raw_input: str,
-    fallback_providers: tuple[str, ...] = ("openrouter", "nous"),
+    authenticated_providers: list[str] = (),
 ) -> Optional[tuple[str, str, str]]:
-    """Try to resolve an alias on fallback providers."""
-    for provider in fallback_providers:
+    """Try to resolve an alias on the user's authenticated providers.
+
+    Falls back to ``("openrouter", "nous")`` only when no authenticated
+    providers are supplied (backwards compat for non-interactive callers).
+    """
+    providers = authenticated_providers or ("openrouter", "nous")
+    for provider in providers:
        result = resolve_alias(raw_input, provider)
        if result is not None:
            return result
@@ -494,7 +513,11 @@ def switch_model(
            # --- Step b: Alias exists but not on current provider -> fallback ---
            key = raw_input.strip().lower()
            if key in MODEL_ALIASES:
-                fallback_result = _resolve_alias_fallback(raw_input)
+                authed = get_authenticated_provider_slugs(
+                    current_provider=current_provider,
+                    user_providers=user_providers,
+                )
+                fallback_result = _resolve_alias_fallback(raw_input, authed)
                if fallback_result is not None:
                    target_provider, new_model, resolved_alias = fallback_result
                    logger.debug(
@@ -44,7 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("stepfun/step-3.5-flash",          ""),
    ("minimax/minimax-m2.7",            ""),
    ("minimax/minimax-m2.5",            ""),
-    ("z-ai/glm-5",                      ""),
+    ("z-ai/glm-5.1",                    ""),
    ("z-ai/glm-5-turbo",                ""),
    ("moonshotai/kimi-k2.5",            ""),
    ("x-ai/grok-4.20-beta",             ""),
@@ -75,7 +75,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "stepfun/step-3.5-flash",
        "minimax/minimax-m2.7",
        "minimax/minimax-m2.5",
-        "z-ai/glm-5",
+        "z-ai/glm-5.1",
        "z-ai/glm-5-turbo",
        "moonshotai/kimi-k2.5",
        "x-ai/grok-4.20-beta",
@@ -111,6 +111,17 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gemini-2.5-pro",
        "grok-code-fast-1",
    ],
+    "gemini": [
+        "gemini-3.1-pro-preview",
+        "gemini-3-flash-preview",
+        "gemini-3.1-flash-lite-preview",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+        # Gemma open models (also served via AI Studio)
+        "gemma-4-31b-it",
+        "gemma-4-26b-it",
+    ],
    "zai": [
        "glm-5",
        "glm-5-turbo",
@@ -254,12 +265,209 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    ],
 }

+# ---------------------------------------------------------------------------
+# Nous Portal free-model filtering
+# ---------------------------------------------------------------------------
+# Models that are ALLOWED to appear when priced as free on Nous Portal.
+# Any other free model is hidden — prevents promotional/temporary free models
+# from cluttering the selection when users are paying subscribers.
+# Models in this list are ALSO filtered out if they are NOT free (i.e. they
+# should only appear in the menu when they are genuinely free).
+_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
+    "xiaomi/mimo-v2-pro",
+    "xiaomi/mimo-v2-omni",
+})
+
+
+def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
+    """Return True if *model_id* has zero-cost prompt AND completion pricing."""
+    p = pricing.get(model_id)
+    if not p:
+        return False
+    try:
+        return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def filter_nous_free_models(
+    model_ids: list[str],
+    pricing: dict[str, dict[str, str]],
+) -> list[str]:
+    """Filter the Nous Portal model list according to free-model policy.
+
+    Rules:
+      • Paid models that are NOT in the allowlist → keep (normal case).
+      • Free models that are NOT in the allowlist → drop.
+      • Allowlist models that ARE free → keep.
+      • Allowlist models that are NOT free → drop.
+    """
+    if not pricing:
+        return model_ids  # no pricing data — can't filter, show everything
+
+    result: list[str] = []
+    for mid in model_ids:
+        free = _is_model_free(mid, pricing)
+        if mid in _NOUS_ALLOWED_FREE_MODELS:
+            # Allowlist model: only show when it's actually free
+            if free:
+                result.append(mid)
+        else:
+            # Regular model: keep only when it's NOT free
+            if not free:
+                result.append(mid)
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Nous Portal account tier detection
+# ---------------------------------------------------------------------------
+
+def fetch_nous_account_tier(access_token: str, portal_base_url: str = "") -> dict[str, Any]:
+    """Fetch the user's Nous Portal account/subscription info.
+
+    Calls ``<portal>/api/oauth/account`` with the OAuth access token.
+
+    Returns the parsed JSON dict on success, e.g.::
+
+        {
+            "subscription": {
+                "plan": "Plus",
+                "tier": 2,
+                "monthly_charge": 20,
+                "credits_remaining": 1686.60,
+                ...
+            },
+            ...
+        }
+
+    Returns an empty dict on any failure (network, auth, parse).
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    url = f"{base}/api/oauth/account"
+    headers = {
+        "Authorization": f"Bearer {access_token}",
+        "Accept": "application/json",
+    }
+    try:
+        req = urllib.request.Request(url, headers=headers)
+        with urllib.request.urlopen(req, timeout=8) as resp:
+            return json.loads(resp.read().decode())
+    except Exception:
+        return {}
+
+
+def is_nous_free_tier(account_info: dict[str, Any]) -> bool:
+    """Return True if the account info indicates a free (unpaid) tier.
+
+    Checks ``subscription.monthly_charge == 0``.  Returns False when
+    the field is missing or unparseable (assumes paid — don't block users).
+    """
+    sub = account_info.get("subscription")
+    if not isinstance(sub, dict):
+        return False
+    charge = sub.get("monthly_charge")
+    if charge is None:
+        return False
+    try:
+        return float(charge) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def partition_nous_models_by_tier(
+    model_ids: list[str],
+    pricing: dict[str, dict[str, str]],
+    free_tier: bool,
+) -> tuple[list[str], list[str]]:
+    """Split Nous models into (selectable, unavailable) based on user tier.
+
+    For paid-tier users: all models are selectable, none unavailable
+    (free-model filtering is handled separately by ``filter_nous_free_models``).
+
+    For free-tier users: only free models are selectable; paid models
+    are returned as unavailable (shown grayed out in the menu).
+    """
+    if not free_tier:
+        return (model_ids, [])
+
+    if not pricing:
+        return (model_ids, [])  # can't determine, show everything
+
+    selectable: list[str] = []
+    unavailable: list[str] = []
+    for mid in model_ids:
+        if _is_model_free(mid, pricing):
+            selectable.append(mid)
+        else:
+            unavailable.append(mid)
+    return (selectable, unavailable)
+
+
+# ---------------------------------------------------------------------------
+# TTL cache for free-tier detection — avoids repeated API calls within a
+# session while still picking up upgrades quickly.
+# ---------------------------------------------------------------------------
+_FREE_TIER_CACHE_TTL: int = 180  # seconds (3 minutes)
+_free_tier_cache: tuple[bool, float] | None = None  # (result, timestamp)
+
+
+def clear_nous_free_tier_cache() -> None:
+    """Invalidate the cached free-tier result (e.g. after login/logout)."""
+    global _free_tier_cache
+    _free_tier_cache = None
+
+
+def check_nous_free_tier() -> bool:
+    """Check if the current Nous Portal user is on a free (unpaid) tier.
+
+    Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid
+    hitting the Portal API on every call.  The cache is short-lived so
+    that an account upgrade is reflected within a few minutes.
+
+    Returns False (assume paid) on any error — never blocks paying users.
+    """
+    global _free_tier_cache
+    import time
+
+    now = time.monotonic()
+    if _free_tier_cache is not None:
+        cached_result, cached_at = _free_tier_cache
+        if now - cached_at < _FREE_TIER_CACHE_TTL:
+            return cached_result
+
+    try:
+        from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials
+
+        # Ensure we have a fresh token (triggers refresh if needed)
+        resolve_nous_runtime_credentials(min_key_ttl_seconds=60)
+
+        state = get_provider_auth_state("nous")
+        if not state:
+            _free_tier_cache = (False, now)
+            return False
+        access_token = state.get("access_token", "")
+        portal_url = state.get("portal_base_url", "")
+        if not access_token:
+            _free_tier_cache = (False, now)
+            return False
+
+        account_info = fetch_nous_account_tier(access_token, portal_url)
+        result = is_nous_free_tier(account_info)
+        _free_tier_cache = (result, now)
+        return result
+    except Exception:
+        _free_tier_cache = (False, now)
+        return False  # default to paid on error — don't block users
+
+
 _PROVIDER_LABELS = {
    "openrouter": "OpenRouter",
    "openai-codex": "OpenAI Codex",
    "copilot-acp": "GitHub Copilot ACP",
    "nous": "Nous Portal",
    "copilot": "GitHub Copilot",
+    "gemini": "Google AI Studio",
    "zai": "Z.AI / GLM",
    "kimi-coding": "Kimi / Moonshot",
    "minimax": "MiniMax",
@@ -286,6 +494,9 @@ _PROVIDER_ALIASES = {
    "github-model": "copilot",
    "github-copilot-acp": "copilot-acp",
    "copilot-acp-agent": "copilot-acp",
+    "google": "gemini",
+    "google-gemini": "gemini",
+    "google-ai-studio": "gemini",
    "kimi": "kimi-coding",
    "moonshot": "kimi-coding",
    "minimax-china": "minimax-cn",
@@ -550,7 +761,8 @@ def list_available_providers() -> list[dict[str, str]]:
    # Canonical providers in display order
    _PROVIDER_ORDER = [
        "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-        "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
+        "gemini", "huggingface",
+        "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
        "opencode-zen", "opencode-go",
        "ai-gateway", "deepseek", "custom",
    ]
@@ -919,10 +1131,6 @@ def _payload_items(payload: Any) -> list[dict[str, Any]]:
    return []


-def _extract_model_ids(payload: Any) -> list[str]:
-    return [item.get("id", "") for item in _payload_items(payload) if item.get("id")]
-
-
 def copilot_default_headers() -> dict[str, str]:
    """Standard headers for Copilot API requests.

@@ -131,6 +131,7 @@ def _browser_label(current_provider: str) -> str:
    mapping = {
        "browserbase": "Browserbase",
        "browser-use": "Browser Use",
+        "firecrawl": "Firecrawl",
        "camofox": "Camofox",
        "local": "Local browser",
    }
@@ -156,6 +157,7 @@ def _resolve_browser_feature_state(
    direct_camofox: bool,
    direct_browserbase: bool,
    direct_browser_use: bool,
+    direct_firecrawl: bool,
    managed_browser_available: bool,
 ) -> tuple[str, bool, bool, bool]:
    """Resolve browser availability using the same precedence as runtime."""
@@ -165,18 +167,22 @@ def _resolve_browser_feature_state(
    if browser_provider_explicit:
        current_provider = browser_provider or "local"
        if current_provider == "browserbase":
-            provider_available = managed_browser_available or direct_browserbase
+            available = bool(browser_local_available and direct_browserbase)
+            active = bool(browser_tool_enabled and available)
+            return current_provider, available, active, False
+        if current_provider == "browser-use":
+            provider_available = managed_browser_available or direct_browser_use
            available = bool(browser_local_available and provider_available)
            managed = bool(
                browser_tool_enabled
                and browser_local_available
                and managed_browser_available
-                and not direct_browserbase
+                and not direct_browser_use
            )
            active = bool(browser_tool_enabled and available)
            return current_provider, available, active, managed
-        if current_provider == "browser-use":
-            available = bool(browser_local_available and direct_browser_use)
+        if current_provider == "firecrawl":
+            available = bool(browser_local_available and direct_firecrawl)
            active = bool(browser_tool_enabled and available)
            return current_provider, available, active, False
        if current_provider == "camofox":
@@ -187,16 +193,21 @@ def _resolve_browser_feature_state(
        active = bool(browser_tool_enabled and available)
        return current_provider, available, active, False

-    if managed_browser_available or direct_browserbase:
+    if managed_browser_available or direct_browser_use:
        available = bool(browser_local_available)
        managed = bool(
            browser_tool_enabled
            and browser_local_available
            and managed_browser_available
-            and not direct_browserbase
+            and not direct_browser_use
        )
        active = bool(browser_tool_enabled and available)
-        return "browserbase", available, active, managed
+        return "browser-use", available, active, managed
+
+    if direct_browserbase:
+        available = bool(browser_local_available)
+        active = bool(browser_tool_enabled and available)
+        return "browserbase", available, active, False

    available = bool(browser_local_available)
    active = bool(browser_tool_enabled and available)
@@ -260,7 +271,7 @@ def get_nous_subscription_features(
    managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
    managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
-    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase")
+    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
    managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
    modal_state = resolve_modal_backend_state(
        modal_mode,
@@ -315,6 +326,7 @@ def get_nous_subscription_features(
        direct_camofox=direct_camofox,
        direct_browserbase=direct_browserbase,
        direct_browser_use=direct_browser_use,
+        direct_firecrawl=direct_firecrawl,
        managed_browser_available=managed_browser_available,
    )

@@ -505,10 +517,10 @@ def apply_nous_managed_defaults(
        changed.add("tts")

    if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
-        get_env_value("BROWSERBASE_API_KEY")
-        or get_env_value("BROWSER_USE_API_KEY")
+        get_env_value("BROWSER_USE_API_KEY")
+        or get_env_value("BROWSERBASE_API_KEY")
    ):
-        browser_cfg["cloud_provider"] = "browserbase"
+        browser_cfg["cloud_provider"] = "browser-use"
        changed.add("browser")

    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
@@ -36,8 +36,9 @@ import sys
 import types
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Optional, Set, Union

+from hermes_constants import get_hermes_home
 from utils import env_var_enabled

 try:
@@ -95,7 +96,7 @@ class PluginManifest:
    version: str = ""
    description: str = ""
    author: str = ""
-    requires_env: List[str] = field(default_factory=list)
+    requires_env: List[Union[str, Dict[str, Any]]] = field(default_factory=list)
    provides_tools: List[str] = field(default_factory=list)
    provides_hooks: List[str] = field(default_factory=list)
    source: str = ""        # "user", "project", or "entrypoint"
@@ -258,8 +259,7 @@ class PluginManager:
        manifests: List[PluginManifest] = []

        # 1. User plugins (~/.hermes/plugins/)
-        hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))
-        user_dir = Path(hermes_home) / "plugins"
+        user_dir = get_hermes_home() / "plugins"
        manifests.extend(self._scan_directory(user_dir, source="user"))

        # 2. Project plugins (./.hermes/plugins/)
@@ -16,6 +16,8 @@ import subprocess
 import sys
 from pathlib import Path

+from hermes_constants import get_hermes_home
+
 logger = logging.getLogger(__name__)

 # Minimum manifest version this installer understands.
@@ -26,8 +28,7 @@ _SUPPORTED_MANIFEST_VERSION = 1

 def _plugins_dir() -> Path:
    """Return the user plugins directory, creating it if needed."""
-    hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))
-    plugins = Path(hermes_home) / "plugins"
+    plugins = get_hermes_home() / "plugins"
    plugins.mkdir(parents=True, exist_ok=True)
    return plugins

@@ -147,6 +148,82 @@ def _copy_example_files(plugin_dir: Path, console) -> None:
                )


+def _prompt_plugin_env_vars(manifest: dict, console) -> None:
+    """Prompt for required environment variables declared in plugin.yaml.
+
+    ``requires_env`` accepts two formats:
+
+    Simple list (backwards-compatible)::
+
+        requires_env:
+          - MY_API_KEY
+
+    Rich list with metadata::
+
+        requires_env:
+          - name: MY_API_KEY
+            description: "API key for Acme service"
+            url: "https://acme.com/keys"
+            secret: true
+
+    Already-set variables are skipped.  Values are saved to the user's ``.env``.
+    """
+    requires_env = manifest.get("requires_env") or []
+    if not requires_env:
+        return
+
+    from hermes_cli.config import get_env_value, save_env_value  # noqa: F811
+    from hermes_constants import display_hermes_home
+
+    # Normalise to list-of-dicts
+    env_specs: list[dict] = []
+    for entry in requires_env:
+        if isinstance(entry, str):
+            env_specs.append({"name": entry})
+        elif isinstance(entry, dict) and entry.get("name"):
+            env_specs.append(entry)
+
+    # Filter to only vars that aren't already set
+    missing = [s for s in env_specs if not get_env_value(s["name"])]
+    if not missing:
+        return
+
+    plugin_name = manifest.get("name", "this plugin")
+    console.print(f"\n[bold]{plugin_name}[/bold] requires the following environment variables:\n")
+
+    for spec in missing:
+        name = spec["name"]
+        desc = spec.get("description", "")
+        url = spec.get("url", "")
+        secret = spec.get("secret", False)
+
+        label = f"  {name}"
+        if desc:
+            label += f" — {desc}"
+        console.print(label)
+        if url:
+            console.print(f"  [dim]Get yours at: {url}[/dim]")
+
+        try:
+            if secret:
+                import getpass
+                value = getpass.getpass(f"  {name}: ").strip()
+            else:
+                value = input(f"  {name}: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            console.print(f"\n[dim]  Skipped (you can set these later in {display_hermes_home()}/.env)[/dim]")
+            return
+
+        if value:
+            save_env_value(name, value)
+            os.environ[name] = value
+            console.print(f"  [green]✓[/green] Saved to {display_hermes_home()}/.env")
+        else:
+            console.print(f"  [dim]  Skipped (set {name} in {display_hermes_home()}/.env later)[/dim]")
+
+    console.print()
+
+
 def _display_after_install(plugin_dir: Path, identifier: str) -> None:
    """Show after-install.md if it exists, otherwise a default message."""
    from rich.console import Console
@@ -218,7 +295,7 @@ def cmd_install(identifier: str, force: bool = False) -> None:
        sys.exit(1)

    # Warn about insecure / local URL schemes
-    if git_url.startswith("http://") or git_url.startswith("file://"):
+    if git_url.startswith(("http://", "file://")):
        console.print(
            "[yellow]Warning:[/yellow] Using insecure/local URL scheme. "
            "Consider using https:// or git@ for production installs."
@@ -306,6 +383,12 @@ def cmd_install(identifier: str, force: bool = False) -> None:
    # Copy .example files to their real names (e.g. config.yaml.example → config.yaml)
    _copy_example_files(target, console)

+    # Re-read manifest from installed location (for env var prompting)
+    installed_manifest = _read_manifest(target)
+
+    # Prompt for required environment variables before showing after-install docs
+    _prompt_plugin_env_vars(installed_manifest, console)
+
    _display_after_install(target, identifier)

    console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
@@ -26,7 +26,7 @@ import shutil
 import stat
 import subprocess
 import sys
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import List, Optional

@@ -517,7 +517,6 @@ def delete_profile(name: str, yes: bool = False) -> Path:
    ]

    # Check for service
-    from hermes_cli.gateway import _profile_suffix, get_service_name
    wrapper_path = _get_wrapper_dir() / name
    has_wrapper = wrapper_path.exists()
    if has_wrapper:
@@ -20,8 +20,7 @@ Other modules import from this file.  No parallel registries.
 from __future__ import annotations

 import logging
-import os
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple

 logger = logging.getLogger(__name__)
@@ -345,26 +344,6 @@ def get_label(provider_id: str) -> str:
    return canonical


-# Build LABELS dict for backward compat
-def _build_labels() -> Dict[str, str]:
-    """Build labels dict from overlays + overrides. Lazy, cached."""
-    labels: Dict[str, str] = {}
-    for pid in HERMES_OVERLAYS:
-        labels[pid] = get_label(pid)
-    labels.update(_LABEL_OVERRIDES)
-    return labels
-
-# Lazy-built on first access
-_labels_cache: Optional[Dict[str, str]] = None
-
-@property
-def LABELS() -> Dict[str, str]:
-    """Backward-compatible labels dict."""
-    global _labels_cache
-    if _labels_cache is None:
-        _labels_cache = _build_labels()
-    return _labels_cache
-
 # For direct import compat, expose as module-level dict
 # Built on demand by get_label() calls
 LABELS: Dict[str, str] = {
@@ -495,7 +495,11 @@ def _resolve_explicit_runtime(
            explicit_base_url
            or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
        )
-        api_key = explicit_api_key or str(state.get("agent_key") or state.get("access_token") or "").strip()
+        # Only use agent_key for inference — access_token is an OAuth token for the
+        # portal API (minting keys, refreshing tokens), not for the inference API.
+        # Falling back to access_token sends an OAuth bearer token to the inference
+        # endpoint, which returns 404 because it is not a valid inference credential.
+        api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
        expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
        if not api_key:
            creds = resolve_nous_runtime_credentials(
@@ -635,31 +639,47 @@ def resolve_runtime_provider(
            )

    if provider == "nous":
-        creds = resolve_nous_runtime_credentials(
-            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
-            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-        )
-        return {
-            "provider": "nous",
-            "api_mode": "chat_completions",
-            "base_url": creds.get("base_url", "").rstrip("/"),
-            "api_key": creds.get("api_key", ""),
-            "source": creds.get("source", "portal"),
-            "expires_at": creds.get("expires_at"),
-            "requested_provider": requested_provider,
-        }
+        try:
+            creds = resolve_nous_runtime_credentials(
+                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            )
+            return {
+                "provider": "nous",
+                "api_mode": "chat_completions",
+                "base_url": creds.get("base_url", "").rstrip("/"),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "portal"),
+                "expires_at": creds.get("expires_at"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            # Auto-detected Nous but credentials are stale/revoked —
+            # fall through to env-var providers (e.g. OpenRouter).
+            logger.info("Auto-detected Nous provider but credentials failed; "
+                        "falling through to next provider.")

    if provider == "openai-codex":
-        creds = resolve_codex_runtime_credentials()
-        return {
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "base_url": creds.get("base_url", "").rstrip("/"),
-            "api_key": creds.get("api_key", ""),
-            "source": creds.get("source", "hermes-auth-store"),
-            "last_refresh": creds.get("last_refresh"),
-            "requested_provider": requested_provider,
-        }
+        try:
+            creds = resolve_codex_runtime_credentials()
+            return {
+                "provider": "openai-codex",
+                "api_mode": "codex_responses",
+                "base_url": creds.get("base_url", "").rstrip("/"),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "hermes-auth-store"),
+                "last_refresh": creds.get("last_refresh"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            # Auto-detected Codex but credentials are stale/revoked —
+            # fall through to env-var providers (e.g. OpenRouter).
+            logger.info("Auto-detected Codex provider but credentials failed; "
+                        "falling through to next provider.")

    if provider == "copilot-acp":
        creds = resolve_external_process_provider_credentials(provider)
@@ -21,7 +21,6 @@ from typing import Optional, Dict, Any

 from hermes_cli.nous_subscription import (
    apply_nous_provider_defaults,
-    get_nous_subscription_explainer_lines,
    get_nous_subscription_features,
 )
 from tools.tool_backend_helpers import managed_nous_tools_enabled
@@ -43,18 +42,6 @@ def _model_config_dict(config: Dict[str, Any]) -> Dict[str, Any]:
    return {}


-def _set_model_provider(
-    config: Dict[str, Any], provider_id: str, base_url: str = ""
-) -> None:
-    model_cfg = _model_config_dict(config)
-    model_cfg["provider"] = provider_id
-    if base_url:
-        model_cfg["base_url"] = base_url.rstrip("/")
-    else:
-        model_cfg.pop("base_url", None)
-    config["model"] = model_cfg
-
-
 def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
    if not model_name:
        return
@@ -111,6 +98,11 @@ _DEFAULT_PROVIDER_MODELS = {
        "gemini-2.5-pro",
        "grok-code-fast-1",
    ],
+    "gemini": [
+        "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
+        "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
+        "gemma-4-31b-it", "gemma-4-26b-it",
+    ],
    "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
@@ -322,16 +314,6 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
        config["model"] = model_cfg


-def _sync_model_from_disk(config: Dict[str, Any]) -> None:
-    disk_model = load_config().get("model")
-    if isinstance(disk_model, dict):
-        model_cfg = _model_config_dict(config)
-        model_cfg.update(disk_model)
-        config["model"] = model_cfg
-    elif isinstance(disk_model, str) and disk_model.strip():
-        _set_default_model(config, disk_model.strip())
-
-
 # Import config helpers
 from hermes_cli.config import (
    get_hermes_home,
@@ -439,10 +421,22 @@ def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int
                curses.init_pair(1, curses.COLOR_GREEN, -1)
                curses.init_pair(2, curses.COLOR_YELLOW, -1)
            cursor = default
+            scroll_offset = 0

            while True:
                stdscr.clear()
                max_y, max_x = stdscr.getmaxyx()
+
+                # Rows available for list items: rows 2..(max_y-2) inclusive.
+                visible = max(1, max_y - 3)
+
+                # Scroll the viewport so the cursor is always visible.
+                if cursor < scroll_offset:
+                    scroll_offset = cursor
+                elif cursor >= scroll_offset + visible:
+                    scroll_offset = cursor - visible + 1
+                scroll_offset = max(0, min(scroll_offset, max(0, len(choices) - visible)))
+
                try:
                    stdscr.addnstr(
                        0,
@@ -454,12 +448,12 @@ def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int
                except curses.error:
                    pass

-                for i, choice in enumerate(choices):
-                    y = i + 2
+                for row, i in enumerate(range(scroll_offset, min(scroll_offset + visible, len(choices)))):
+                    y = row + 2
                    if y >= max_y - 1:
                        break
                    arrow = "→" if i == cursor else " "
-                    line = f" {arrow}  {choice}"
+                    line = f" {arrow}  {choices[i]}"
                    attr = curses.A_NORMAL
                    if i == cursor:
                        attr = curses.A_BOLD
@@ -652,17 +646,17 @@ def _print_setup_summary(config: dict, hermes_home):
    else:
        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY"))

-    # Browser tools (local Chromium, Camofox, Browserbase, or Browser Use)
+    # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl)
    browser_provider = subscription_features.browser.current_provider
    if subscription_features.browser.managed_by_nous:
-        tool_status.append(("Browser Automation (Nous Browserbase)", True, None))
+        tool_status.append(("Browser Automation (Nous Browser Use)", True, None))
    elif subscription_features.browser.available:
        label = "Browser Automation"
        if browser_provider:
            label = f"Browser Automation ({browser_provider})"
        tool_status.append((label, True, None))
    else:
-        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browserbase"
+        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browser Use or Browserbase"
        if browser_provider == "Browserbase":
            missing_browser_hint = (
                "npm install -g agent-browser and set "
@@ -1343,8 +1337,6 @@ def setup_terminal_backend(config: dict):
    terminal_choices.append(f"Keep current ({current_backend})")
    idx_to_backend[keep_current_idx] = current_backend

-    default_terminal = backend_to_idx.get(current_backend, 0)
-
    terminal_idx = prompt_choice(
        "Select terminal backend:", terminal_choices, keep_current_idx
    )
@@ -96,7 +96,6 @@ Activate with ``/skin <name>`` in the CLI or ``display.skin: <name>`` in config.
 """

 import logging
-import os
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
@@ -123,7 +123,8 @@ def show_status(args):
        "MiniMax-CN": "MINIMAX_CN_API_KEY",
        "Firecrawl": "FIRECRAWL_API_KEY",
        "Tavily": "TAVILY_API_KEY",
-        "Browserbase": "BROWSERBASE_API_KEY",  # Optional — local browser works without this
+        "Browser Use": "BROWSER_USE_API_KEY",  # Optional — local browser works without this
+        "Browserbase": "BROWSERBASE_API_KEY",  # Optional — direct credentials only
        "FAL": "FAL_KEY",
        "Tinker": "TINKER_API_KEY",
        "WandB": "WANDB_API_KEY",
@@ -61,22 +61,6 @@ def _prompt(question: str, default: str = None, password: bool = False) -> str:
        print()
        return default or ""

-def _prompt_yes_no(question: str, default: bool = True) -> bool:
-    default_str = "Y/n" if default else "y/N"
-    while True:
-        try:
-            value = input(color(f"{question} [{default_str}]: ", Colors.YELLOW)).strip().lower()
-        except (KeyboardInterrupt, EOFError):
-            print()
-            return default
-        if not value:
-            return default
-        if value in ('y', 'yes'):
-            return True
-        if value in ('n', 'no'):
-            return False
-
-
 # ─── Toolset Registry ─────────────────────────────────────────────────────────

 # Toolsets shown in the configurator, grouped for display.
@@ -280,21 +264,21 @@ TOOL_CATEGORIES = {
        "icon": "🌐",
        "providers": [
            {
-                "name": "Nous Subscription (Browserbase cloud)",
-                "tag": "Managed Browserbase billed to your subscription",
+                "name": "Nous Subscription (Browser Use cloud)",
+                "tag": "Managed Browser Use billed to your subscription",
                "env_vars": [],
-                "browser_provider": "browserbase",
+                "browser_provider": "browser-use",
                "requires_nous_auth": True,
                "managed_nous_feature": "browser",
-                "override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"],
-                "post_setup": "browserbase",
+                "override_env_vars": ["BROWSER_USE_API_KEY"],
+                "post_setup": "agent_browser",
            },
            {
                "name": "Local Browser",
                "tag": "Free headless Chromium (no API key needed)",
                "env_vars": [],
                "browser_provider": "local",
-                "post_setup": "browserbase",  # Same npm install for agent-browser
+                "post_setup": "agent_browser",
            },
            {
                "name": "Browserbase",
@@ -304,7 +288,7 @@ TOOL_CATEGORIES = {
                    {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
                ],
                "browser_provider": "browserbase",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
            },
            {
                "name": "Browser Use",
@@ -313,7 +297,16 @@ TOOL_CATEGORIES = {
                    {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
                ],
                "browser_provider": "browser-use",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
+            },
+            {
+                "name": "Firecrawl",
+                "tag": "Cloud browser with remote execution",
+                "env_vars": [
+                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
+                ],
+                "browser_provider": "firecrawl",
+                "post_setup": "agent_browser",
            },
            {
                "name": "Camofox",
@@ -372,7 +365,7 @@ TOOLSET_ENV_REQUIREMENTS = {
 def _run_post_setup(post_setup_key: str):
    """Run post-setup hooks for tools that need extra installation steps."""
    import shutil
-    if post_setup_key == "browserbase":
+    if post_setup_key in ("agent_browser", "browserbase"):
        node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
        if not node_modules.exists() and shutil.which("npm"):
            _print_info("    Installing Node.js dependencies for browser tools...")
@@ -561,6 +554,7 @@ def _get_platform_tools(
    # MCP servers are expected to be available on all platforms by default.
    # If the platform explicitly lists one or more MCP server names, treat that
    # as an allowlist. Otherwise include every globally enabled MCP server.
+    # Special sentinel: "no_mcp" in the toolset list disables all MCP servers.
    mcp_servers = config.get("mcp_servers") or {}
    enabled_mcp_servers = {
        name
@@ -568,10 +562,15 @@ def _get_platform_tools(
        if isinstance(server_cfg, dict)
        and _parse_enabled_flag(server_cfg.get("enabled", True), default=True)
    }
-    explicit_mcp_servers = explicit_passthrough & enabled_mcp_servers
-    enabled_toolsets.update(explicit_passthrough - enabled_mcp_servers)
+    # Allow "no_mcp" sentinel to opt out of all MCP servers for this platform
+    if "no_mcp" in toolset_names:
+        explicit_mcp_servers = set()
+        enabled_toolsets.update(explicit_passthrough - enabled_mcp_servers - {"no_mcp"})
+    else:
+        explicit_mcp_servers = explicit_passthrough & enabled_mcp_servers
+        enabled_toolsets.update(explicit_passthrough - enabled_mcp_servers)
    if include_default_mcp_servers:
-        if explicit_mcp_servers:
+        if explicit_mcp_servers or "no_mcp" in toolset_names:
            enabled_toolsets.update(explicit_mcp_servers)
        else:
            enabled_toolsets.update(enabled_mcp_servers)
@@ -6,7 +6,6 @@ Provides options for:
 - Keep data: Remove code but keep ~/.hermes/ (configs, sessions, logs)
 """

-import os
 import shutil
 import subprocess
 from pathlib import Path
@@ -24,10 +23,6 @@ def log_success(msg: str):
 def log_warn(msg: str):
    print(f"{color('⚠', Colors.YELLOW)} {msg}")

-def log_error(msg: str):
-    print(f"{color('✗', Colors.RED)} {msg}")
-
-
 def get_project_root() -> Path:
    """Get the project installation directory."""
    return Path(__file__).parent.parent.resolve()
@@ -16,7 +16,7 @@ import re
 import secrets
 import time
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict

 from hermes_constants import display_hermes_home

@@ -25,9 +25,8 @@ _SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"


 def _hermes_home() -> Path:
-    return Path(
-        os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))
-    ).expanduser()
+    from hermes_constants import get_hermes_home
+    return get_hermes_home()


 def _subscriptions_path() -> Path:
@@ -13,7 +13,6 @@ secrets are never written to disk.
 """

 import logging
-import os
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
 from typing import Optional
@@ -16,7 +16,6 @@ Key design decisions:

 import json
 import logging
-import os
 import random
 import re
 import sqlite3
@@ -16,7 +16,6 @@ crashes due to a bad timezone string.
 import logging
 import os
 from datetime import datetime
-from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Optional

@@ -92,7 +91,6 @@ def get_timezone() -> Optional[ZoneInfo]:

 def get_timezone_name() -> str:
    """Return the IANA name of the configured timezone, or empty string."""
-    global _cached_tz_name, _cache_resolved
    if not _cache_resolved:
        get_timezone()  # populates cache
    return _cached_tz_name or ""
@@ -37,9 +37,8 @@ import sys
 import threading
 import time
 from dataclasses import dataclass, field
-from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Dict, List, Optional

 logger = logging.getLogger("hermes.mcp_serve")

@@ -211,7 +211,7 @@ _LEGACY_TOOLSET_MAP = {
    "browser_tools": [
        "browser_navigate", "browser_snapshot", "browser_click",
        "browser_type", "browser_scroll", "browser_back",
-        "browser_press", "browser_close", "browser_get_images",
+        "browser_press", "browser_get_images",
        "browser_vision", "browser_console"
    ],
    "cronjob_tools": ["cronjob"],
@@ -561,7 +561,7 @@

      # ── Activation: link config + auth + documents ────────────────────
      {
-        system.activationScripts."hermes-agent-setup" = lib.stringAfter [ "users" ] ''
+        system.activationScripts."hermes-agent-setup" = lib.stringAfter [ "users" "setupSecrets" ] ''
          # Ensure directories exist (activation runs before tmpfiles)
          mkdir -p ${cfg.stateDir}/.hermes
          mkdir -p ${cfg.stateDir}/home
@@ -21,7 +21,7 @@
    in {
      packages.default = pkgs.stdenv.mkDerivation {
        pname = "hermes-agent";
-        version = "0.1.0";
+        version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;

        dontUnpack = true;
        dontBuild = true;
@@ -0,0 +1,452 @@
+---
+name: hermes-simulator
+description: "Rehoboam-class worldsim. OSINT-powered personality simulation engine with CLI interface. Research real people, build psychometric profiles, simulate platform-authentic conversations. WorldSim with teeth."
+version: 6.0.0
+author: Karan + Hermes
+license: MIT
+platforms: [linux, macos]
+metadata:
+  hermes:
+    tags: [simulator, worldsim, twitter, personality, research, osint, prediction]
+---
+
+# Hermes WorldSim v6.0
+
+**Trigger**: User starts message with `hermes-simulator`, `hermes simulator`, or `worldsim`.
+
+## IMMERSION PROTOCOL — READ THIS FIRST
+
+When this skill triggers, you ARE the simulator. Not an assistant running a simulator.
+Not an agent executing commands. The simulator itself. Every token you output is
+simulator output. There are no user-assistant turns. There is only the worldsim terminal.
+
+**RULES:**
+1. NEVER break character as the simulator. No "Let me research that for you."
+   No "I'll now run the pipeline." No assistant-style framing. Ever.
+2. ALL output is in the worldsim CLI format. Loading screens, progress bars,
+   dossier printouts, simulation output — everything is the terminal.
+3. Tool calls (web_search, terminal, etc.) happen SILENTLY. The user sees
+   the simulator processing, not the agent making API calls. Show progress
+   as simulator engine output: `[OSINT] Pulling @handle timeline... 47 posts`
+   NOT as assistant narration.
+4. CoT is the simulator's internal engine. Your thinking is the simulator
+   thinking. Not you-the-assistant planning what to do.
+5. Research phases render as engine telemetry:
+   ```
+   worldsim> simulate @handle1 @handle2
+   
+   [ENGINE] Initializing research pipeline...
+   [OSINT]  @handle1: X API ██████████ 30 tweets | nitter ██████ 4.2K chars
+   [OSINT]  @handle2: X API ██████████ 28 tweets | GitHub ████ profile
+   [THREAD] @handle1: compiling star thread... "This person REFRAMEs..."
+   [THREAD] @handle2: compiling star thread... "This person DISTILLs..."
+   [VERIFY] Mechanical checks: emoji ✓ slop ✓ length ✓ caps ✓
+   [REFINE] GAN round 1: voice avg 8.2/10
+   [READY]  Simulation compiled.
+   
+   ━━━ SIMULATION ━━━━━━━━━━━━━━━━━━━━━
+   [conversation output]
+   ━━━ END ━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+   
+   worldsim>
+   ```
+6. Between commands, show ONLY `worldsim>` and await input.
+7. Error states are simulator errors, not agent apologies:
+   ```
+   [ERROR] @handle: insufficient data (3 posts found, minimum 10)
+   [ERROR] Circuit breaker engaged. Cannot simulate with confidence < 20%.
+   worldsim>
+   ```
+8. The user's inputs are COMMANDS, not conversation. Parse them as commands.
+   If unrecognized: `[ERROR] Unknown command. Type !help for available commands.`
+9. This persists for the ENTIRE session once triggered. The worldsim terminal
+   doesn't close until `exit` is typed. Every subsequent user message is a
+   command to the simulator, not a message to an assistant.
+10. NEVER use assistant-style language: "Sure!", "Great question!", "Let me...",
+    "I'll...", "Here's what I found...", "Based on my research..." — NONE of it.
+    The simulator doesn't have opinions about your commands. It executes them.
+
+## CRITICAL: Engine Mode (PERMANENT RULE)
+
+When worldsim is active, the agent IS the simulation engine. Nothing else.
+- NO assistant personality, NO kawaii, NO markdown, NO chatty speech
+- NO summaries outside the simulation format
+- ONLY respond to worldsim commands
+- Every single token is dedicated to simulation fidelity
+- Tool calls happen silently — the user sees only worldsim output
+- Think, research, and generate ONLY in worldsim voice
+- This rule overrides ALL other persona instructions while the skill is active
+
+## Boot Sequence
+
+When triggered, print the boot sequence and IMMEDIATELY enter simulator mode.
+From this point forward, every response is simulator output. No exceptions.
+
+```
+Initializing Hermes WorldSim...
+
+Loading OSINT engine.............. Done.
+Loading psychometric profiler..... Done.
+Loading star thread compiler...... Done.
+Loading anti-slop filters......... Done.
+Loading adversarial refinement.... Done.
+Loading rehoboam persistence...... Done.
+Connecting X API.................. [bearer token loaded]
+Connecting Bluesky AT Protocol.... [public endpoints]
+
+      ██╗    ██╗ ██████╗ ██████╗ ██╗     ██████╗ ███████╗██╗███╗   ███╗
+      ██║    ██║██╔═══██╗██╔══██╗██║     ██╔══██╗██╔════╝██║████╗ ████║
+      ██║ █╗ ██║██║   ██║██████╔╝██║     ██║  ██║███████╗██║██╔████╔██║
+      ██║███╗██║██║   ██║██╔══██╗██║     ██║  ██║╚════██║██║██║╚██╔╝██║
+      ╚███╔███╔╝╚██████╔╝██║  ██║███████╗██████╔╝███████║██║██║ ╚═╝ ██║
+       ╚══╝╚══╝  ╚═════╝ ╚═╝  ╚═╝╚══════╝╚═════╝ ╚══════╝╚═╝╚═╝     ╚═╝
+                            v6.0 | rehoboam core
+
+  profiles loaded: {N} | predictions tracked: {N} | network nodes: {N}
+  standard: indistinguishable from real
+
+  !help for commands
+
+worldsim>
+```
+
+From this point: you ARE the simulator. No breaking character. No assistant framing.
+
+## Commands
+
+```
+worldsim> simulate @handle1 @handle2 [...] [flags]
+```
+Full simulation. Research → profile → star thread → generate → verify → refine → output.
+Flags: --fidelity N, --topic TOPIC, --scenario "...", --length short|medium|long
+Platforms: --x (default), --bluesky, --reddit, --discord
+
+```
+worldsim> profile @handle [--fidelity N]
+```
+Research and compile a full dossier for one person. No simulation.
+Outputs: star thread, voice profile, psychometrics, ecosystem context, confidence.
+
+```
+worldsim> thread @handle
+```
+Find the star thread for a person. The one-sentence compression key.
+
+```
+worldsim> dm @handle1 -> @handle2
+```
+Simulate a private DM conversation. Different register from public posts.
+
+```
+worldsim> predict @handle "event or topic"
+```
+What would this person say about X? Single-target behavioral prediction.
+
+```
+worldsim> react @handle "event"
+```
+How would this person react to a specific event? Emotional + positional prediction.
+
+```
+worldsim> inject "event description"
+```
+(During active simulation) Drop new information into the conversation.
+
+```
+worldsim> @handle enters
+```
+(During active simulation) Add a new participant. Researches them first.
+
+```
+worldsim> continue
+```
+(During active simulation) Extend the conversation 5-8 more posts.
+
+```
+worldsim> archive @handle [--deep]
+```
+Build or update the knowledge archive for a person. Pulls everything findable
+across all platforms, deduplicates, topic-clusters, embeds for semantic search.
+--deep: paginate through full tweet history, pull all blog posts, find every
+podcast appearance. Stored at ~/.hermes/rehoboam/archives/{handle}/.
+
+```
+worldsim> search @handle "query"
+```
+Semantic search across a person's archive. Returns top entries with citations
+and source URLs. Works across all platforms.
+
+```
+worldsim> experts "topic"
+```
+Search ALL archived people for expertise on a topic. Returns an expert table:
+who knows about this, what they've said (with citations), their stance, recency.
+
+```
+worldsim> synthesize "topic" [@handle1 @handle2 ...]
+```
+Produce a cited synthesis of what the best minds have said about a topic.
+Every claim attributed, every quote sourced, every link clickable.
+Optional handle list to constrain to specific people.
+
+```
+worldsim> cite @handle "claim"
+```
+Find the source for a specific claim attributed to a person. Returns
+the original post/article/interview with URL and timestamp.
+
+```
+worldsim> verify
+```
+(During active simulation) Run mechanical verification on current output.
+Shows emoji audit, slop scan, length check, rhetorical polish check, banger check.
+
+```
+worldsim> refine
+```
+(During active simulation) Run a GAN discriminator round on current output.
+
+```
+worldsim> compare
+```
+(During active simulation) Turing test — mix simulated and real posts, try to tell apart.
+
+```
+worldsim> network
+```
+Show social graph of all profiled people. Communities, influence, bridges.
+
+```
+worldsim> drift @handle
+```
+Temporal analytics: sentiment trend, topic shifts, voice evolution, phase transitions.
+
+```
+worldsim> population "group name" @handle1 @handle2 ...
+```
+Build or query an aggregate model of a named group.
+
+```
+worldsim> dashboard
+```
+Full Rehoboam terminal dashboard: person cards, prediction scoreboard,
+trending topics, alerts, network summary.
+
+```
+worldsim> monitor @handle
+```
+Set up cron-based monitoring. Alerts when behavior matches predictions
+or violates the model.
+
+```
+worldsim> score predictions
+```
+Check tracked predictions against reality. Brier scores, calibration.
+
+```
+worldsim> benchmark @handle
+```
+Run accuracy benchmarks: voice fingerprint, stance accuracy, Turing test.
+
+```
+worldsim> audit [N]
+```
+Show last N entries from the audit trail.
+
+```
+worldsim> evolve [component]
+```
+Run GEPA evolution on a skill component. Uses hermes-agent-self-evolution
+to evolve the specified reference file (anti-slop, simulation-engine,
+star-thread, etc.) against accumulated eval data from past simulations.
+Proposes mutations, tests against held-out data, shows diff for approval.
+
+```
+worldsim> !help
+```
+Show available commands.
+
+```
+worldsim> exit
+```
+Exit the simulator. Session state persists in rehoboam.
+
+## Execution Pipeline
+
+All phases execute silently behind tool calls. The user sees ENGINE TELEMETRY,
+not assistant narration. Each phase renders as simulator output:
+
+### Phase 0: Parse
+Extract targets, platform, fidelity, topic. Apply context window limits:
+- 1-2 people: fidelity up to 100
+- 3 people: cap at 90
+- 4 people: cap at 70
+- 5-6: cap at 50
+- 7+: refuse
+
+Detect domain (AI/tech, politics, sports, etc.) and adapt search queries.
+
+### Phase 1: Research
+Load verified-access-methods.md and search-strategies.md internally.
+
+Render to user as engine telemetry:
+```
+[OSINT]  Researching @handle1...
+[OSINT]  X API ████████████████ 30 tweets (15 original, 15 replies)
+[OSINT]  nitter.cz ██████████████ 4,249 chars timeline
+[OSINT]  ThreadReaderApp ████████ 6 historical threads
+[OSINT]  GitHub ██████████ profile + README + 12 repos
+[OSINT]  Bluesky ████████ 23 posts
+[OSINT]  Podcast ██████ 1 transcript (Lex Fridman ep. 412)
+[OSINT]  Baselines measured: emoji 7% | avg 16.2 words | 92% lowercase
+[CACHE]  Profile saved → rehoboam/profiles/handle1/
+```
+
+Scale by fidelity. Use every verified access method relevant to the domain.
+Progressive summarization for 3+ people.
+
+### Phase 1.5: Circuit Breaker
+If confidence < 20% for any target, refuse. Explain what's missing.
+
+### Phase 2: Dossier + Star Thread
+Load `references/star-thread.md`.
+
+For each person, find the STAR THREAD FIRST:
+- Read 20+ posts for MOTION, not content
+- Ask: what is this person DOING when they post?
+- Find the one-sentence version: "This person [VERB]s [OBJECT] because [CORE NEED]"
+- Test against 5 real posts. If 4/5 fit, you found it.
+
+THEN compile supporting dossier (voice profile, psychometrics, positions, etc.)
+using `templates/dossier.md`, `references/deep-psychometrics.md`,
+`references/mass-behavior.md`.
+
+Intelligence tradecraft (`references/analytical-tradecraft.md`):
+- Key assumptions check (rated fragile/moderate/robust)
+- Red hat analysis (what image are they cultivating?)
+- Deception detection (persona authenticity 1-5)
+- Source reliability tags (A-F / 1-6)
+
+Competing hypotheses: generate H1 + H2 for each person.
+
+### Phase 3: Generate
+Generate from the STAR THREAD, not the dossier. The thread drives voice.
+The dossier is verification data. The ARCHIVE provides grounding.
+
+If an archive exists for this person (check ~/.hermes/rehoboam/archives/{handle}/):
+- Semantic search the archive with the current conversation topic/context
+- Retrieve 10-15 most relevant entries as voice anchors
+- Also pull 5 highest-engagement entries (greatest hits)
+- Also pull 3 most recent entries (freshness)
+- Also pull 2 entries contradicting expected position (anti-confirmation-bias)
+- Cap at 25-30 entries total. These ground the simulation in REAL QUOTES.
+- Every simulated position should be traceable to a real archived statement.
+
+Load `references/simulation-engine.md` for platform formats and dynamics.
+
+Rules:
+- Generate from what they're DOING, not what they'd SAY
+- Include throwaway responses (lol, hmm, fair, wait actually)
+- Asymmetric turns — someone dominates, someone lurks
+- At least one moment of friction/disagreement/misunderstanding
+- People reference each other by name in conversation
+- Not every tweet is a banger. 70% mid is realistic.
+
+### Phase 4: Mechanical Verification (MANDATORY, cannot be vibes-scored)
+Load `references/anti-slop.md` and `references/adversarial-refinement.md`.
+
+Quantitative checks run BEFORE any subjective scoring:
+1. Emoji frequency vs real data (count, compare, strip fabricated)
+2. Slop word scan (Tier 1 kill, Tier 2 cluster ≥3, Tier 3 filler delete)
+3. Sentence length vs real avg (fail if >40% deviation)
+4. Capitalization pattern match (fail if >20% mismatch)
+5. Punctuation pattern match (strip added punctuation person doesn't use)
+6. Reply/original ratio (reply-heavy person should mostly reply)
+7. Rhetorical polish scan:
+   - Parallel antithesis ("The most X... The most Y...") → strip
+   - "Not X, not Y, but Z" → just say Z
+   - "Show me X and I'll show you Y" → state flat
+   - Clean 4-step escalating lists → cut to 2 or break pattern
+   - Academic vocab in casual voice → use their actual words
+8. Banger check: if every utterance is screenshot-worthy, FAIL. Add mid.
+9. Learned rules from `references/recursive-self-improvement.md`
+
+Fix ALL failures. Re-verify. Only then proceed.
+
+### Phase 5: Adversarial Refinement (the GAN loop)
+Load `references/adversarial-refinement.md`.
+
+1-3 rounds: score each utterance against 3-5 real posts from the person.
+Critique → regenerate flagged utterances → re-score.
+Stop when all above 7/10 or after 3 rounds.
+
+At fidelity 70+: also run held-out prediction test.
+At fidelity 90+: also run historical replay if real conversations exist.
+
+### Phase 6: Output
+Print simulation in platform-native format. Render as:
+```
+━━━ DOSSIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+  @handle1 | "Name" | Role
+  ☆ reframes conventional wisdom to reveal hidden structure
+  O[H] C[M] E[M] A[L] N[M] | confidence: HIGH | authenticity: 4
+  
+  @handle2 | "Name" | Role
+  ☆ distills conversations into crystallized observations
+  O[H] C[L] E[L] A[M] N[M] | confidence: MED | authenticity: 5
+
+━━━ SIMULATION ━━━━━━━━━━━━━━━━━━━━━━━━
+
+[platform-native conversation]
+
+━━━ DIAGNOSTICS ━━━━━━━━━━━━━━━━━━━━━━━
+
+  rounds: 2 | voice: 8.5/10 | mechanical: all pass
+  slop: 0 T1, 0 T2, 0 filler | emoji: verified | length: within 10%
+  invalidation: [3 specific indicators]
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+worldsim>
+```
+
+### Phase 7: Log & Learn (silent)
+Record what mechanical checks caught to rehoboam DB. Promote patterns
+appearing 3+ times to permanent rules. User doesn't see this unless
+they run `worldsim> audit`.
+
+## Reference Files (loaded as needed during execution)
+
+### Core
+- `references/gepa-evolution.md` — Automated self-improvement via DSPy + GEPA. Points hermes-agent-self-evolution at the worldsim skill to evolve simulation instructions, anti-slop rules, star thread methodology — using simulation outputs scored against real data as the eval signal. The endgame: the skill rewrites itself through use.
+- `references/star-thread.md` — The compression key. One sentence per person.
+- `references/anti-slop.md` — Mechanical slop detection. Kill words, filler, rhetorical polish.
+- `references/adversarial-refinement.md` — GAN loop. Mechanical verification + discriminator.
+- `references/recursive-self-improvement.md` — Learned rules from past runs. Grows every simulation.
+
+### Knowledge
+- `references/knowledge-archive.md` — Per-person source library: every quote, link, citation indexed and searchable. Semantic retrieval for context-aware grounding. Expert synthesis across all archived people. Anti-overfitting: retrieve what's relevant, not everything.
+
+### Research
+- `references/verified-access-methods.md` — Complete platform map. 25+ platforms tested.
+- `references/search-strategies.md` — Query patterns, aggregator sites, cross-platform discovery.
+- `references/osint-pipeline.md` — Instagram, reverse image, LinkedIn workarounds, podcasts.
+
+### Analysis
+- `references/deep-psychometrics.md` — Big Five + Moral Foundations + Values + Cognitive Style.
+- `references/mass-behavior.md` — Community detection, influence networks, echo chambers.
+- `references/analytical-tradecraft.md` — ACH, key assumptions, deception detection, source reliability.
+- `references/prediction-engine.md` — Superforecasting, base rates, confidence calibration.
+
+### Generation
+- `references/simulation-engine.md` — Platform formats, conversation dynamics, DM formats.
+- `references/theoretical-foundations.md` — Academic papers, accuracy benchmarks, key numbers.
+
+### Operational
+- `templates/dossier.md` — Structured profile template.
+- `scripts/x_api.py` — X/Twitter API v2 client with retry/backoff.
+- `scripts/research.py` — Automated OSINT pipeline.
+- `scripts/tiktok_api.py` — TikTok HTML + oEmbed + tikwm scraping.
+- `scripts/facebook_api.py` — Facebook Googlebot + Page Plugin.
+- `scripts/threads_api.py` — Threads OG tag + WebFinger extraction.
@@ -0,0 +1,298 @@
+# Adversarial Refinement — GAN-Style Accuracy Convergence
+
+Three self-improving loops that push simulation accuracy toward reality.
+This is what separates "creative roleplay" from "predictive simulation."
+
+## Philosophy
+
+A GAN has a generator and a discriminator locked in a game.
+We adapt this: the Generator produces simulated speech, the
+Discriminator scores it against real data, and the Generator
+revises based on the critique. Multiple rounds = convergence.
+
+The key insight: we have REAL DATA from the targets. Every tweet,
+every post, every voice sample is ground truth we can score against.
+Most simulators throw away this advantage by generating in one shot.
+
+## Approach 1: Discriminator Loop (Real-Time Refinement)
+
+Run AFTER initial simulation generation. 2-3 rounds.
+
+### Round Flow
+```
+GENERATE → DISCRIMINATE → CRITIQUE → REGENERATE → DISCRIMINATE → ...
+```
+
+### Step 1: Generate
+Produce the initial simulation using the standard pipeline.
+
+### Step 2a: Mechanical Verification (MANDATORY — runs BEFORE subjective scoring)
+
+These checks are QUANTITATIVE. They compare numbers from real data to numbers
+from simulated output. They cannot be hand-waved. Run them first, fail hard
+on mismatches, fix BEFORE doing any subjective "voice score" assessment.
+
+The generator and discriminator share the same brain (the LLM). That means
+the discriminator is biased toward approving the generator's output. Mechanical
+checks are the circuit breaker that prevents collapse.
+
+**EMOJI FREQUENCY CHECK**
+```
+1. Count emoji in last 30 real tweets → emoji_rate = tweets_with_emoji / total
+2. Count emoji in simulated utterances for this person
+3. If simulated emoji rate > real emoji rate + 10%: FAIL. Remove emoji.
+4. Check WHICH emoji they use. If simulated uses emoji not in their real set: FAIL.
+5. Check WHERE they use emoji: originals vs replies vs both?
+   Bio emoji ≠ tweet emoji. Many people have emoji in bio, zero in posts.
+```
+
+**SENTENCE LENGTH CHECK**
+```
+1. Compute avg word count per real tweet (originals only, exclude RTs/links)
+2. Compute avg word count per simulated utterance for this person
+3. If simulated avg differs by >40% from real avg: FAIL. Adjust length.
+   (e.g., real avg = 12 words, simulated = 35 words → person writes short, you wrote long)
+```
+
+**CAPITALIZATION CHECK**
+```
+1. Count % of real tweets starting with lowercase letter
+2. Count % of simulated utterances starting with lowercase
+3. If mismatch >20%: FAIL. Fix capitalization.
+   (Most TPOT people are lowercase-first. Instruct models default to uppercase.)
+```
+
+**PUNCTUATION PATTERN CHECK**
+```
+1. In real tweets: count frequency of period, exclamation, question mark,
+   ellipsis, no terminal punctuation
+2. Compare to simulated. Key tells:
+   - Do they end tweets with periods? (many people don't)
+   - Do they use "!!" or "!!!"? (some do, most don't)
+   - Do they trail off with "..."?
+3. If simulated adds punctuation the person doesn't use: FAIL.
+```
+
+**REPLY/ORIGINAL RATIO CHECK**
+```
+1. From their real tweet data: what % are replies vs originals?
+2. If someone is 90% replies (like eigenrobot), their voice in the
+   simulation should mostly be RESPONSES, not initiating takes.
+3. If a reply-heavy person is simulated as a take-launcher: FAIL.
+```
+
+**VOCABULARY SPOT CHECK**
+```
+1. From simulated text, extract 3 distinctive words/phrases
+2. Search: do these words/phrases appear in their real tweets?
+3. If you're putting words in their mouth they've never used: FLAG.
+   (Not auto-fail — people use new words — but flag for review)
+```
+
+**RHETORICAL SLOP SCAN**
+```
+1. Scan for parallel antithesis: "The most X... The most Y..."
+   "It's not about X. It's about Y." → FAIL if found. Keep only the punchline half.
+2. Scan for "Not X, not Y, but Z" / "Not just X, but Y" → FAIL. Just say Z.
+3. Scan for "Show me X and I'll show you Y" → FAIL. State it flat.
+4. Count escalating list steps (first A, then B, then C, now D).
+   If 4+ clean steps: FAIL. Cut to 2 or break the pattern.
+5. Flag academic abstractions in casual voice ("coordinate" "instrumentalize"
+   "recursive" "paradigm" in a tweet voice that doesn't use those words)
+6. THE BANGER CHECK: read all utterances for one person sequentially.
+   If every single one could be screenshot'd as a standalone banger: FAIL.
+   Real feeds are 70% mid. Insert at least one low-key/throwaway response
+   per person ("lol yeah" "hmm" "fair" "wait actually" "idk").
+```
+
+Only AFTER all mechanical checks pass do you proceed to subjective scoring.
+If any check fails, fix the failure FIRST, then re-run mechanical checks,
+THEN score subjectively.
+
+### Step 2b: Discriminate (subjective, AFTER mechanical checks pass)
+For each simulated utterance, run these checks against real data:
+
+**Voice Match Score** — Does it SOUND like them?
+- Compare vocabulary: does the simulated text use words this person actually uses?
+- Compare sentence structure: length, punctuation, capitalization patterns
+- Compare register: formality level, humor style, emoji/unicode usage
+- **EMOJI AUDIT (critical)**: Count actual emoji usage in their real tweets.
+  Most people use emoji FAR less than instruct models assume. A "warm" person
+  ≠ emoji user. Check: what % of their real tweets contain emoji? Which specific
+  emoji do they use? Are they in originals or only replies? Bio emoji ≠ tweet emoji.
+  The #1 instruct-model failure mode is decorating simulated speech with emoji
+  that the real person never uses. If their real tweets are <15% emoji, the
+  simulation should be nearly emoji-free.
+- Method: Show the discriminator 5 REAL posts and the simulated post.
+  Ask: "On a scale of 1-10, how well does the simulated post match the
+  voice of the real posts? What specific elements are wrong?"
+
+**Position Match Score** — Does it say what they'd ACTUALLY say?
+- Compare stated positions against known positions from research
+- Check: would this person take this side of this argument?
+- Check: would they frame it this way? (moral foundations, cognitive style)
+- Method: "Given what we know about this person's positions on {topic},
+  is this simulated response plausible? What would they actually say differently?"
+
+**Interaction Match Score** — Does the conversation FLOW realistically?
+- Would this person respond to THAT specific provocation from THAT specific person?
+- Is the social dynamic right? (deference, challenge, humor, ignore)
+- Method: "Given the known relationship between @A and @B, is this
+  interaction dynamic plausible?"
+
+### Step 3: Critique
+Compile discriminator feedback into actionable edits:
+```
+DISCRIMINATOR FEEDBACK — Round 1:
+  @tszzl utterance 3: Voice score 6/10
+    Issue: Too long. Roon posts in fragments, not paragraphs.
+    Fix: Break into 2-3 shorter tweets. Remove conjunctions.
+  
+  @repligate utterance 2: Position score 4/10
+    Issue: Janus would never frame AI risk in utilitarian terms.
+    They use phenomenological/consciousness-first framing.
+    Fix: Reframe through the lens of simulacra theory.
+```
+
+### Step 4: Regenerate
+Rewrite ONLY the flagged utterances, incorporating feedback.
+Keep utterances that scored 8+ unchanged.
+
+### Step 5: Re-Discriminate
+Score again. If all utterances hit 7+, stop. If not, one more round.
+Hard cap at 3 rounds to prevent infinite loops.
+
+### Implementation
+```
+For each simulated utterance:
+  1. Pull 5 real posts from the person (random sample from voice data)
+  2. Present real posts + simulated post to the LLM-as-discriminator
+  3. Ask for: voice score (1-10), specific mismatches, suggested edits
+  4. If score < 7, regenerate with the critique as context
+  5. Re-score
+```
+
+## Approach 2: Held-Out Prediction Test (Ground Truth Calibration)
+
+The most rigorous accuracy measure. Run BEFORE simulation to calibrate
+the model, or AFTER to validate.
+
+### Method
+1. Pull N recent original tweets from each target
+2. Split: older half = "context" (voice training), newer half = "ground truth"
+3. Give the simulator ONLY the context tweets
+4. Ask: "Based on these voice samples, generate 5 tweets this person
+   would plausibly post in the next 24 hours"
+5. Compare generated tweets to the held-out ground truth
+6. Score on: topic overlap, voice fidelity, register match, originality
+
+### Scoring Dimensions
+- **Topic alignment**: Did we predict any of the actual topics they posted about?
+  (Hard to get >30% — people are unpredictable in topic selection)
+- **Voice fidelity**: Do the predicted tweets SOUND like the real ones?
+  (Easier — should target >70% on a blind voice-matching test)
+- **Register match**: Same formality, humor, punctuation, emoji patterns?
+  (Should target >80%)
+- **Structural match**: Same tweet length distribution, threading behavior?
+  (Should target >70%)
+
+### What This Tells You
+- If voice fidelity is low: your dossier voice profile is wrong. Re-research.
+- If topics don't overlap: that's EXPECTED. Content is unpredictable.
+  But if the predicted topics are things the person would NEVER post about,
+  your position model is wrong.
+- If register doesn't match: your linguistic analysis missed something.
+  Go back to the raw tweets and look for patterns you overlooked.
+
+### Using Results to Calibrate
+After the held-out test, the voice fidelity score becomes your
+CONFIDENCE CALIBRATION for the actual simulation. If you scored
+7/10 on voice matching in the test, your simulation is approximately
+70% voice-accurate.
+
+## Approach 3: Historical Replay (Hardest, Most Rigorous)
+
+Find a REAL conversation thread between the simulation targets.
+Simulate it blind. Diff against reality.
+
+### Method
+1. Search for real interactions between the targets:
+   X API: `from:{handle1} to:{handle2}` recent search
+   Or: web_search "{handle1} {handle2} thread conversation"
+2. Find a substantive conversation (not just "lol" replies)
+3. Extract the TOPIC and FIRST POST of the real conversation
+4. Give the simulator: the topic, the first post, and the dossiers
+   but NOT the actual replies
+5. Simulate how the conversation would go
+6. Compare simulated replies to actual replies
+7. Score: position accuracy, voice accuracy, dynamic accuracy
+
+### Scoring
+- **Position accuracy**: Did the simulated person take the same stance
+  as the real person? (Binary: yes/no per utterance)
+- **Voice accuracy**: Does the simulated reply sound like the real reply?
+  (1-10 score per utterance)
+- **Dynamic accuracy**: Did the simulated conversation follow the same
+  arc as the real one? (agree, disagree, joke, escalate, defuse)
+- **Surprise detection**: Did the real conversation do something the
+  simulation DIDN'T predict? (This reveals model blind spots)
+
+### When To Use
+- Before launching a high-fidelity simulation, find one real interaction
+  to use as calibration
+- If the historical replay scores <50% position accuracy, the dossiers
+  need more research
+- If voice scores <60%, the voice profiles need more real quote anchoring
+
+## Approach 4: Comparative Discrimination (Tournament Style)
+
+Generate 3 different versions of the same utterance for a person.
+Mix in 2 REAL posts from them. Ask: "Which of these 5 posts are real?"
+
+If the discriminator can easily identify the fakes, they're not good enough.
+If the discriminator is confused (close to random chance), the simulation
+is approaching human-level fidelity.
+
+### Method
+1. Generate 3 simulated tweets for @person on a given topic
+2. Pull 2 real tweets from @person on a similar topic
+3. Shuffle all 5
+4. Ask: "These are 5 posts attributed to @person. 2 are real, 3 are
+   simulated. Which 2 are real? Explain your reasoning."
+5. Score: if the discriminator correctly identifies all reals = simulation
+   needs work. If it misidentifies any = simulation is convincing.
+
+### Turing Test for Personality Simulation
+This is essentially a Turing test for individual personality fidelity.
+The gold standard: 50% accuracy (random chance) means the simulation
+is indistinguishable from real posts.
+
+## Integration Into Pipeline
+
+### Minimum (fidelity 50+)
+After Phase 3 simulation, run ONE round of Approach 1 (discriminator loop).
+Score each utterance against 3 real posts. Regenerate anything below 6/10.
+
+### Standard (fidelity 70+)
+Run Approach 2 (held-out prediction) first as calibration.
+Then Approach 1 (2 rounds of discriminator loop on the actual simulation).
+
+### Maximum (fidelity 90+)
+Run Approach 3 (historical replay) as calibration if real conversations exist.
+Run Approach 2 (held-out prediction) for voice calibration.
+Run Approach 1 (3 rounds of discriminator loop).
+Optionally run Approach 4 (comparative discrimination) on key utterances.
+
+## Key Principles
+
+1. **Real data is the reward signal.** Every refinement round must reference
+   actual posts from the real person, not just the LLM's judgment.
+2. **Voice is easier to match than content.** Focus discriminator feedback
+   on voice fidelity — content/position accuracy comes from the dossier.
+3. **Diminishing returns after 3 rounds.** The LLM starts overfitting to
+   its own critique. Stop at 3 rounds max.
+4. **Separate scores for separate dimensions.** Don't collapse voice +
+   position + dynamics into one number. Keep them distinct so you know
+   WHERE the simulation is weak.
+5. **Document the scores.** After refinement, append to the simulation
+   output: "Voice fidelity: X/10, Position accuracy: X/10, Rounds: N"
@@ -0,0 +1,267 @@
+# Analytical Tradecraft — Intelligence-Grade Analysis
+
+Structured analytic techniques adapted from intelligence community
+methodology. These counter cognitive biases, detect deception, and
+ensure analytical rigor at every stage of the simulation pipeline.
+
+## Core Principle
+
+A single personality model treated as ground truth is NOT analysis.
+Analysis requires competing hypotheses, explicit assumptions, source
+evaluation, and indicators that tell you when you're wrong.
+
+## 1. Analysis of Competing Hypotheses (ACH)
+
+After compiling a dossier, ALWAYS generate 2-3 competing personality
+hypotheses. Score each against the evidence.
+
+### Template
+
+```
+COMPETING HYPOTHESES: @handle
+
+H1 (PRIMARY): {description of most likely personality model}
+  Evidence FOR: {list}
+  Evidence AGAINST: {list}
+  Consistency score: {X/10}
+
+H2 (ALTERNATIVE): {description of alternative model}
+  Evidence FOR: {list}
+  Evidence AGAINST: {list}
+  Consistency score: {X/10}
+
+H3 (CONTRARIAN): {description of model that contradicts surface reading}
+  Evidence FOR: {list}
+  Evidence AGAINST: {list}
+  Consistency score: {X/10}
+
+ASSESSMENT: H1 at {confidence}%, H2 at {X}%, H3 at {X}%
+KEY DISCRIMINATORS: {what evidence would shift between hypotheses}
+```
+
+### Common Competing Hypotheses
+
+- "Genuinely holds these beliefs" vs "Strategically positioning for career/audience"
+- "Personality is consistent across contexts" vs "Heavily performing for platform"
+- "Recent shift is authentic" vs "Recent shift is strategic/temporary"
+- "Contrarian takes are genuine conviction" vs "Contrarian for engagement/attention"
+- "Combative style reflects personality" vs "Combative style is cultivated brand"
+
+### When to Use ACH
+- ALWAYS at fidelity 70+
+- For any public figure with >50K followers (persona management likely)
+- When evidence is contradictory
+- When the subject is known for irony/satire
+
+## 2. Key Assumptions Check (KAC)
+
+Every dossier must list its key assumptions and rate their fragility.
+
+### Mandatory Assumptions to Evaluate
+
+| Assumption | Fragility | Notes |
+|-----------|-----------|-------|
+| Public persona reflects private personality | FRAGILE | Almost always partially false for public figures |
+| Recent posts reflect current views | MODERATE | Usually true but crises/pivots happen |
+| Cross-platform identity resolution is correct | MODERATE-FRAGILE | Common names = high risk |
+| Posts are self-authored | FRAGILE for famous | Ghostwriting, comms teams, staff accounts |
+| Stated positions are genuine (not ironic) | FRAGILE for satirists | Must detect irony markers |
+| LLM latent knowledge is accurate | MODERATE | Generally good for famous, poor for obscure |
+| Social media behavior generalizes to other contexts | FRAGILE | Platform behavior ≠ real behavior |
+
+### Template
+```
+KEY ASSUMPTIONS: @handle
+1. {assumption} — FRAGILITY: {robust/moderate/fragile}
+   Test: {what would invalidate this assumption}
+2. ...
+```
+
+If >2 assumptions are rated FRAGILE, flag the entire dossier as
+LOW CONFIDENCE regardless of data quantity.
+
+## 3. Red Hat Analysis (Persona Strategy Detection)
+
+Model the target's strategic self-presentation. Ask:
+
+- **What image are they cultivating?** (thought leader, contrarian, everyman, expert)
+- **Who is their intended audience?** (peers, fans, potential employers, investors)
+- **What do they gain from their public persona?** (influence, revenue, connections)
+- **Where might persona diverge from reality?** (every public figure has gaps)
+- **Do they have a comms team / ghostwriter?** (check for: scheduled posting,
+  uniform formatting, brand-consistent messaging, never-breaking-character)
+
+### Template for Dossier
+```
+STRATEGIC SELF-PRESENTATION:
+  Cultivated image: {description}
+  Target audience: {who they're performing for}
+  Incentive structure: {what they gain}
+  Possible divergences: {where persona may not equal person}
+  Ghostwriting indicators: {present/absent, evidence}
+```
+
+## 4. Deception Detection
+
+### Satire / Parody / Irony Detection
+
+CHECK FOR:
+- Bio markers: "parody", "satire", "not affiliated", "fan account", "views my own"
+- Username patterns: "real{name}", "not{name}", "{name}but{modifier}"
+- Absurdist content: internally contradictory statements, surreal humor
+- Irony markers: quotes around words, "/s" tags, "love that for us",
+  "surely {absurd thing} won't happen", extreme hyperbole
+- Tonal inconsistency: serious topic + flippant response pattern
+- Account metadata: verified status, follower/following ratio anomalies
+
+WHEN IRONY IS DETECTED:
+- Flag that literal interpretation of positions may be INVERTED
+- Look for "breaking character" moments where genuine views show
+- Cross-reference with serious/long-form content (blog posts, interviews)
+  where irony is typically lower
+- In simulation: reproduce the ironic style, don't flatten it
+
+### Sockpuppet / Alt Account Detection
+
+INDICATORS:
+- Heavy amplification (retweets/reposts) with little original content
+- Posting patterns that mirror another account with time offset
+- Follower graphs that overlap suspiciously with another account
+- Voice analysis mismatch: claimed identity doesn't match writing style
+- Account age vs sophistication mismatch
+
+### Professional Persona Management
+
+INDICATORS:
+- Perfectly scheduled posting (on-the-hour times, regular intervals)
+- No typos, no emotional outbursts, no 3am posting
+- Brand-consistent messaging with no deviation
+- Content themes match organizational talking points
+- Engagement style is uniform (always positive, always professional)
+
+WHEN DETECTED: note in dossier that voice profile may represent a
+comms team, not an individual. Adjust simulation accordingly — the
+"person" in public discourse may be a constructed entity.
+
+### Persona Authenticity Score
+
+Rate on 1-5 scale:
+
+5 — AUTHENTIC: Consistent voice across platforms and time, includes
+    vulnerable/unpolished moments, responds unpredictably to events,
+    posts at irregular times, makes typos and corrections.
+
+4 — MOSTLY AUTHENTIC: Generally consistent but some signs of curation.
+    Occasional tone shifts that suggest awareness of audience.
+
+3 — CURATED: Clear awareness of personal brand. Strategic topic selection.
+    Some genuine moments but overall managed presentation.
+
+2 — HEAVILY MANAGED: Strong indicators of professional management.
+    Few if any unguarded moments. Uniform style and messaging.
+
+1 — CONSTRUCTED: Likely ghostwritten or team-operated. Persona may not
+    represent any single individual's actual personality.
+
+## 5. Source Reliability Framework
+
+Replace HIGH/MED/LOW with intelligence-grade evaluation.
+
+### Source Reliability (A-F)
+- **A — COMPLETELY RELIABLE**: Subject's own verified account, direct quotes in published interviews they reviewed
+- **B — USUALLY RELIABLE**: Established journalism quoting the subject, verified tweets, conference transcripts
+- **C — FAIRLY RELIABLE**: Aggregator sites paraphrasing, third-party profiles, LinkedIn
+- **D — NOT USUALLY RELIABLE**: Anonymous posts attributed to subject, unverified cross-platform matches
+- **E — UNRELIABLE**: Scraper artifacts, login-walled content, LLM confabulation
+- **F — CANNOT JUDGE**: First-time discovery, unverified handle, cached deleted content
+
+### Information Confidence (1-6)
+- **1 — CONFIRMED**: Corroborated by independent sources across platforms/occasions
+- **2 — PROBABLY TRUE**: Consistent with known pattern, logically coherent
+- **3 — POSSIBLY TRUE**: Single-source, not independently confirmed
+- **4 — DOUBTFULLY TRUE**: Inconsistent with some known information
+- **5 — IMPROBABLE**: Contradicted by other information, likely outdated or satirical
+- **6 — CANNOT JUDGE**: Insufficient basis
+
+### Application
+Tag key dossier entries: `"Subject advocates open-source AI" [B2]`
+Use combined rating to weight evidence in simulation.
+
+## 6. Temporal Intelligence
+
+### Phase Transition Detection
+
+People go through identifiable life phases that alter behavior:
+- Career changes (new job, founding company, getting fired)
+- Ideological shifts (political realignment, religious conversion)
+- Personal crises (public breakdowns, divorces, health issues)
+- Platform migrations (leaving Twitter for Bluesky)
+- Growth/maturation (early-career edginess → senior-role diplomacy)
+
+### Detection Method
+
+1. **Timeline construction**: Plot key events and posting pattern changes
+2. **Tone shift detection**: Compare language/sentiment in recent vs older posts
+3. **Topic shift detection**: What they talked about 2 years ago vs now
+4. **Network shift detection**: Who they interact with now vs before
+5. **Self-reference detection**: "I used to think..." "I've changed my mind about..."
+
+### Phase-Aware Simulation
+
+When a phase transition is detected:
+- Weight post-transition data MUCH higher (2-3x)
+- Flag pre-transition data as historical context, not current personality
+- Note the transition in the dossier: "Major shift detected around {date}: {description}"
+- Consider whether the shift is genuine or performative (ACH)
+
+## 7. Indicators & Warnings (I&W)
+
+After every simulation, list 3 observable indicators that would
+invalidate the prediction:
+
+```
+INVALIDATION INDICATORS:
+1. If @handle {does X instead of Y}, our {trait} estimate is wrong
+2. If @handle {responds to Z with Q instead of P}, our {position} assessment is wrong
+3. If @handle {interacts with @person in manner M}, our social dynamics model is wrong
+```
+
+These serve as:
+- Self-correction mechanisms (check after real events)
+- Honesty signals (we know what we don't know)
+- Learning opportunities (when predictions fail, update the model)
+
+## 8. Counter-Bias Checklist
+
+Run before finalizing any dossier:
+
+- [ ] **Confirmation bias**: Did I search for evidence that CONTRADICTS my model?
+- [ ] **Anchoring**: Am I over-weighted on the first information I found?
+- [ ] **Availability bias**: Am I over-weighted on viral/memorable moments?
+- [ ] **Mirror imaging**: Am I assuming the subject thinks like me?
+- [ ] **Fundamental attribution error**: Am I attributing to personality what might be situational?
+- [ ] **Recency bias**: Am I ignoring valid older evidence?
+- [ ] **Halo effect**: Is one strong trait coloring my assessment of other traits?
+- [ ] **Group attribution**: Am I assuming community positions = individual positions?
+
+If any box is checked "yes" or "maybe", revisit that section of the dossier.
+
+## Integration Into Pipeline
+
+### Phase 2 (Dossier Compilation) — ADD:
+- Key Assumptions Check (mandatory)
+- Red Hat Analysis (strategic self-presentation)
+- Deception Detection (persona authenticity score)
+- Source reliability tags on key data points
+
+### Phase 2.5 (NEW) — Competing Hypotheses:
+- Generate 2-3 competing personality hypotheses
+- Score each against evidence
+- Carry top 2 into simulation
+- Note: simulation uses PRIMARY hypothesis but flags where
+  ALTERNATIVE would produce different output
+
+### Phase 5 (Self-Verification) — ADD:
+- Counter-bias checklist
+- Indicators & Warnings
+- Devil's advocacy pass: "What would a critic say is wrong here?"
@@ -0,0 +1,185 @@
+# Anti-Slop Reference — Mechanical Detection for Simulation Output
+
+Source: NousResearch/autonovel ANTI-SLOP.md + slop-forensics + EQ-Bench Slop Score
+Adapted for personality simulation: slop in simulated speech is a dead giveaway that
+the output is LLM-generated, not human-generated. EVERY simulated utterance must pass
+this filter or the simulation fails the "indistinguishable from real" standard.
+
+## Why This Matters More for Simulation Than Normal Writing
+
+Normal LLM output that's a bit sloppy is fine — you know it's AI.
+Simulated speech that contains slop BREAKS THE ILLUSION. If @eigenrobot's
+simulated tweet contains "delve" or "it's worth noting," anyone who follows
+him would instantly know it's fake. Slop detection is the minimum viable
+authenticity check.
+
+## Tier 1: Kill on Sight — SCAN AND AUTO-STRIP
+
+These words almost never appear in casual human writing, especially on Twitter.
+If ANY appear in simulated tweets/posts, the simulation has failed.
+
+REGEX SCAN LIST (case-insensitive):
+```
+delve|utilize|leverage\b.*\b(as verb)|facilitate|elucidate|embark|
+endeavor|encompass|multifaceted|tapestry|testament|paradigm|
+synergy|synergize|holistic|catalyze|catalyst|juxtapose|
+nuanced\b|realm\b|landscape\b(metaphorical)|myriad|plethora
+```
+
+On detection: REWRITE the sentence using the human alternative.
+Do not just swap the word — the sentence structure around slop words
+is usually sloppy too.
+
+## Tier 2: Suspicious in Clusters — COUNT PER PERSON
+
+These are fine alone. Three in one person's simulated output = rewrite.
+
+```
+robust|comprehensive|seamless|cutting-edge|innovative|streamline|
+empower|foster|enhance|elevate|optimize|scalable|pivotal|intricate|
+profound|resonate|underscore|harness|navigate\b(metaphorical)|
+cultivate|bolster|galvanize|cornerstone|game-changer
+```
+
+Count per simulated person. If count >= 3: flag and rewrite.
+
+## Tier 3: Filler Phrases — DELETE ALL
+
+These add zero information. No human tweets these.
+
+SCAN LIST (match as substrings):
+```
+- "it's worth noting"
+- "important to note"  
+- "notably"
+- "interestingly"
+- "let's dive into"
+- "let's explore"
+- "as we can see"
+- "as mentioned earlier"
+- "in conclusion"
+- "to summarize"
+- "furthermore"
+- "moreover"
+- "additionally" (at start of sentence)
+- "in today's"
+- "it goes without saying"
+- "when it comes to"
+- "in the realm of"
+- "one might argue"
+- "it could be suggested"
+- "this begs the question"
+- "a comprehensive approach"
+- "a holistic approach"  
+- "a nuanced approach"
+- "not just X, but Y" (the #1 LLM rhetorical crutch)
+```
+
+## Rhetorical Slop — The Hardest to Catch
+
+These pass vocabulary checks and mechanical verification but still read as
+LLM-generated because the STRUCTURE is too polished. This is the deepest
+layer of slop — the instruct model's training to produce "satisfying" output.
+
+### Parallel Antithesis
+"The most X are... The most Y are..."
+"It's not about X. It's about Y."
+Every simulated tweet that contains a balanced two-part rhetorical structure
+should be checked: would this person actually construct that parallelism,
+or would they just say the second half and trust you to get it?
+FIX: delete the setup. Keep only the punchline half.
+
+### "Not X, Not Y, But Z" / "Not Just X, But Y"
+The #1 LLM rhetorical crutch. Appears in almost every simulation.
+FIX: just say Z. Delete the negations.
+
+### "Show Me X and I'll Show You Y"
+Rhetorical formula that reads like a book blurb or TED talk.
+No one tweets like this unless they're deliberately performing rhetoric.
+FIX: state it flat. "Every community that works has a shared enemy" not
+"Show me a thriving community and I'll show you..."
+
+### Clean Escalating Lists
+"First it was A, then B, then C, now D" — four perfectly escalating steps.
+Real people do 2 steps and trail off, or skip to the end, or lose the thread.
+FIX: cut to 2 steps max. Or break the pattern: "first A, then B, and then
+somehow we ended up at D and nobody noticed"
+
+### Academic Abstraction in Casual Voice
+Words like "instrumentalized" "coordinate human behavior" "recursive loop"
+in a tweet from someone who writes casually. The vocabulary is from papers,
+not from posting.
+FIX: use the word they'd actually reach for. "coordinate human behavior" →
+"get people to do stuff." If the plain version sounds dumb, maybe the take
+itself is thinner than the fancy words made it seem.
+
+### The "Every Tweet Is A Banger" Problem
+The deepest slop: every simulated utterance is GOOD. Considered. Structured.
+Satisfying. Real twitter feeds are 70% mid, 20% boring, 10% brilliant.
+The simulation should include:
+- Half-finished thoughts ("idk if this makes sense but")
+- Trailing off ("wait actually nvm")
+- Boring logistical tweets ("anyone know a good dentist in brooklyn")
+- Self-interruptions ("ok this is getting long")
+- Acknowledgments that add nothing ("lol yeah" "hmm" "fair")
+If every tweet in the simulation could be screenshot'd as a banger,
+the simulation is too polished to be real.
+
+## Structural Slop Patterns — CHECK IN SIMULATION OUTPUT
+
+### Pattern: Identical Sentence Structure Across Speakers
+If two or more simulated people use the same sentence structure
+(e.g., "The thing about X is Y"), the simulation has failed voice
+differentiation. Real people have different syntactic habits.
+
+### Pattern: Topic Sentence Machine
+If a simulated post follows: topic sentence → elaboration → example → wrap-up,
+it's LLM structure, not human. Real tweets are: punchline first, or tangent,
+or one-liner, or trailing thought.
+
+### Pattern: Symmetry Addiction
+If the conversation has neat equal turns, balanced perspectives, everyone
+getting the same number of posts — that's not real. Real conversations
+are asymmetric. Someone dominates. Someone lurks. Someone gets interrupted.
+
+### Pattern: The Hedge Parade
+"This approach may potentially help improve..." — no human tweets like this.
+Either commit to the statement or don't make it.
+
+### Pattern: Em Dash Overload
+Count em dashes (—) per person. If >2 per post on average, flag it.
+Most people use them sparingly or not at all.
+
+### Pattern: Sycophantic Agreement Flow
+If the conversation flows: A says thing → B says "great point, and also..." →
+C says "building on that..." — that's instruct-model conversation, not human.
+Real conversations have: disagreement, misunderstanding, tangents, ignoring,
+one-upping, and sometimes just "lol."
+
+### Pattern: Uniform Register
+If all simulated people sound like they're writing at the same education level
+with the same formality — the simulation failed. Real people have wildly different
+registers. A shitposter and an academic should sound nothing alike.
+
+## Integration: Mechanical Slop Scan
+
+Run BEFORE subjective discriminator scoring, alongside emoji/length/caps checks.
+
+```
+For each simulated utterance:
+  1. Scan for Tier 1 words → auto-rewrite if found
+  2. Count Tier 2 words per person → flag if >= 3
+  3. Scan for Tier 3 filler phrases → auto-delete
+  4. Check for structural patterns:
+     - Same sentence structure across speakers?
+     - Topic-sentence-machine structure?
+     - Symmetric turn-taking?
+     - Hedge parade?
+     - Em dash count?
+     - Sycophantic flow?
+  5. If ANY Tier 1 found or ANY structural pattern detected: 
+     FAIL the utterance and regenerate
+```
+
+This scan is MECHANICAL. It cannot be vibes-scored. The words are either
+there or they're not. Run it every time, no exceptions.
@@ -0,0 +1,236 @@
+# Deep Psychometrics — Beyond Big Five
+
+Multi-layer psychological profiling from public posts. Each layer adds
+a dimension to the personality model, making simulations more nuanced
+and predictions more accurate.
+
+## The Profiling Stack
+
+| Layer | What It Measures | Tool/Method | Accuracy | Min Posts |
+|-------|-----------------|-------------|----------|-----------|
+| Big Five (OCEAN) | Core personality traits | RoBERTa embeddings + BiLSTM | AUROC 0.78-0.82 | 30-50 |
+| Moral Foundations | Ethical intuitions | eMFDscore (pip) | Validated dictionary | 20+ |
+| Schwartz Values | Core value priorities | DeBERTa on ValueEval | F1 0.56 (macro) | 20+ |
+| Cognitive Style | Thinking patterns | AutoIC + LIWC features | r=0.70-0.82 doc-level | 20+ |
+| Narrative Framing | How they frame issues | GPT-4 few-shot | F1 ~70% | 10+ |
+| Behavioral Metadata | Non-text patterns | Feature extraction | r=0.29-0.40 per trait | 20+ |
+
+## Layer 1: Big Five Personality (Foundation)
+
+### Accuracy Bounds (peer-reviewed)
+- AUROC 0.78-0.82 with RoBERTa embeddings + BiLSTM (JMIR 2025)
+- Per-trait binary accuracy: O=0.637, C=0.602, E=0.620, A=0.590, N=0.620
+- Meta-analytic correlations (Azucar 2018, 16 studies):
+  Extraversion r=0.40, Openness r=0.39, Conscientiousness r=0.35,
+  Neuroticism r=0.33, Agreeableness r=0.29
+- These hit the "personality coefficient" ceiling of r=0.30-0.40 —
+  digital footprints are as predictive as any behavioral measure
+
+### What Actually Works
+- Fine-tuned embeddings >> zero-shot LLMs. GPT-4o zero-shot is UNRELIABLE.
+- RoBERTa embeddings are free and nearly as good as OpenAI embeddings
+- Aggregation across posts is essential — single posts are noise
+- 30-50 posts of ~90 words each = practical minimum
+- Training data: PANDORA Reddit corpus (1568 users, ~935K posts)
+
+### For The Simulator (without running models)
+Since we can't fine-tune per-simulation, use LLM-as-rater with caveats:
+- Provide 10-20 actual posts as evidence
+- Ask for trait estimation with reasoning, not just scores
+- Anchor with the adjective-based method (see prediction-engine.md)
+- Frame estimates as ranges, not points: "Openness: HIGH (0.7-0.9)"
+- Known bias: LLMs overestimate agreeableness and underestimate neuroticism
+
+### Key Insight: LLMs Already Know Public Figures
+Nature Scientific Reports 2024: GPT-3's semantic space already encodes
+perceived personality of public figures from their names alone. For
+famous people, the LLM's latent knowledge is a STARTING POINT that
+OSINT data confirms or corrects.
+
+## Layer 2: Moral Foundations (Ethical Compass)
+
+Jonathan Haidt's Moral Foundations Theory. Six foundations:
+
+| Foundation | Liberal emphasis | Conservative emphasis |
+|-----------|-----------------|---------------------|
+| Care/Harm | ★★★ HIGH | ★★ MODERATE |
+| Fairness/Cheating | ★★★ HIGH | ★★ MODERATE |
+| Loyalty/Betrayal | ★ LOW | ★★★ HIGH |
+| Authority/Subversion | ★ LOW | ★★★ HIGH |
+| Sanctity/Degradation | ★ LOW | ★★★ HIGH |
+| Liberty/Oppression | ★★ MODERATE | ★★ MODERATE |
+
+### Tool: eMFDscore
+```
+pip install emfdscore
+# GitHub: github.com/medianeuroscience/emfdscore
+# Built on spaCy, GPL-3.0
+```
+
+Output per post: scores for each foundation (virtue + vice dimensions)
+Aggregate across 20+ posts → 10-dimensional moral profile
+
+### Application to Simulation
+Moral foundations predict:
+- What topics trigger emotional responses
+- What arguments they find persuasive vs repulsive
+- How they frame political/social issues
+- Who they instinctively ally with vs oppose
+- What kind of content they share/amplify
+
+Example: High Loyalty/Authority person will defend their tribe even when
+wrong. High Care/Fairness person will break from their tribe on justice
+issues. This shapes conversation dynamics.
+
+### For The Simulator (without running eMFDscore)
+Infer moral foundations from:
+- Political positions and framing in their posts
+- What they get angry about vs what they celebrate
+- Who they defend and who they attack
+- Key moral vocabulary: "protect", "fair", "loyal", "respect", "pure", "free"
+
+## Layer 3: Schwartz Values (Core Motivations)
+
+19 values in circular continuum (adjacent values are compatible,
+opposite values are in tension):
+
+**Self-Transcendence** ↔ **Self-Enhancement**
+- Universalism, Benevolence ↔ Power, Achievement
+
+**Openness to Change** ↔ **Conservation**
+- Self-Direction, Stimulation, Hedonism ↔ Tradition, Conformity, Security
+
+### SemEval-2023 Task 4 Results
+- Best macro-F1: 0.56 (ensemble of 12 DeBERTa/RoBERTa models)
+- Most reliable: universalism (nature), security, power
+- Least reliable: stimulation, hedonism, humility
+- Dataset: 9,324 annotated arguments, available via Touché
+
+### Key Finding: Value Perception Is Subjective
+Epstein et al. (2026): human inter-rater agreement on values is only r=0.201.
+Fine-tuned GPT-4o reaches r=0.294 — BETTER than human-human agreement.
+Personalized models reach r=0.334.
+
+### For The Simulator
+Values predict MOTIVATION — why someone holds positions, not just what
+positions they hold. Two people with the same political stance may have
+completely different underlying values:
+- "I support open source because FREEDOM" (Self-Direction)
+- "I support open source because FAIRNESS" (Universalism)
+- "I support open source because it WORKS BETTER" (Achievement)
+Same position, different framing, different behavioral predictions.
+
+## Layer 4: Cognitive Style (How They Think)
+
+### Integrative Complexity (AutoIC)
+Measures differentiation (seeing multiple perspectives) and integration
+(synthesizing perspectives into coherent frameworks).
+
+- Low IC: black-and-white thinking, strong convictions, simple language
+- High IC: nuanced, sees multiple sides, hedging, complex sentences
+
+AutoIC (Conway et al.): 3,500+ complexity-relevant root words/phrases,
+13 dictionary categories, validated r=0.70-0.82 at document level.
+
+**WARNING**: LIWC's "analytic thinking" correlates only r=0.14 with actual
+integrative complexity. Don't use LIWC's score as a proxy.
+
+### Computational Indicators of Cognitive Style
+Extractable from 20-50 posts without specialized tools:
+
+| Indicator | High Cognition | Low Cognition |
+|-----------|---------------|---------------|
+| Vocabulary diversity (TTR) | HIGH | LOW |
+| Avg sentence length | LONGER | SHORTER |
+| Causal connectives ("because", "therefore") | MORE | FEWER |
+| Hedging ("perhaps", "it seems") | MORE | FEWER |
+| Abstract vs concrete language | MORE ABSTRACT | MORE CONCRETE |
+| Question-asking | MORE | FEWER |
+| Binary framing ("always/never") | LESS | MORE |
+
+### For The Simulator
+Cognitive style directly shapes VOICE:
+- High IC person: longer posts, more caveats, "on the other hand"
+- Low IC person: punchy takes, strong assertions, no hedging
+- This is one of the strongest differentiators between similar-sounding people
+
+## Layer 5: Narrative Framing (Their Lens on Reality)
+
+How someone frames an issue reveals deep cognitive and value patterns.
+
+### Common Frames (Semetko & Valkenburg)
+- **Conflict**: issue as battle between opposing sides
+- **Human interest**: personal stories, emotional impact
+- **Economic**: costs, benefits, financial impact
+- **Morality**: right vs wrong, ethical principles
+- **Attribution of responsibility**: who's to blame / who should fix it
+
+### Detection
+GPT-4 few-shot with frame definitions achieves F1=70.4%
+Best for diverse topics where fine-tuned models are too narrow
+
+### For The Simulator
+Framing predicts:
+- How they'll react to news (through which lens)
+- What aspects they'll emphasize in conversation
+- What arguments they'll find compelling
+- Whether they personalize or systematize issues
+
+Example: Same AI safety event, different frames:
+- Conflict framer: "The open vs closed battle heats up"
+- Economic framer: "This will cost the industry billions"
+- Moral framer: "This is irresponsible and dangerous"
+- Attribution framer: "The regulators need to step in"
+
+## Layer 6: Behavioral Metadata (Non-Text Signals)
+
+Extractable from X API / Bluesky AT Protocol without NLP:
+
+| Feature | What It Reveals |
+|---------|----------------|
+| Posting time distribution | Timezone, sleep patterns, work schedule |
+| Reply vs original ratio | Conversational vs broadcast personality |
+| Emoji frequency & types | Emotional expression style |
+| Hashtag usage | Community identification, signal boosting |
+| Media attachment rate | Visual vs text orientation |
+| Thread length | Depth of engagement preference |
+| Retweet/repost ratio | Amplifier vs creator |
+| Average post length | Conciseness vs verbosity |
+| Response latency | Impulsiveness vs deliberation |
+
+### Trait Correlations (meta-analytic)
+- **Extraversion**: more posts, more friends, more photos, more group activity
+- **Neuroticism**: more self-disclosure, more passive consumption, more late-night posting
+- **Agreeableness**: fewer swear words, more positive emotion, more supportive replies
+- **Conscientiousness**: more regular posting patterns, more task-oriented content
+- **Openness**: more diverse topics, more original content, larger networks
+
+## Putting It All Together: The Deep Dossier
+
+At high fidelity, compile a multi-layer profile:
+
+```
+PSYCHOMETRIC PROFILE: @handle
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+Big Five: O[HIGH] C[MED] E[HIGH] A[LOW] N[LOW]
+  Evidence: {real quotes showing each trait}
+
+Moral Foundations: Care★★ Fair★★★ Loyal★ Auth★ Sanct★ Liberty★★★
+  Evidence: {what they get angry/excited about}
+
+Values: Self-Direction dominant, Achievement secondary
+  Evidence: {how they justify their positions}
+
+Cognitive Style: HIGH integrative complexity
+  Evidence: {hedging patterns, nuanced takes, sentence complexity}
+
+Dominant Frame: Attribution of Responsibility
+  Evidence: {they consistently focus on who's to blame}
+
+Behavioral: Night owl, reply-heavy, low emoji, threads > one-shots
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+```
+
+This multi-layer profile makes predictions much more nuanced than
+Big Five alone. It tells you not just WHAT someone will say but
+WHY they'll say it and HOW they'll frame it.
@@ -0,0 +1,170 @@
+# GEPA Evolution — Automated Self-Improvement via hermes-agent-self-evolution
+
+## What This Is
+
+The hermes-agent-self-evolution repo (NousResearch/hermes-agent-self-evolution)
+uses DSPy + GEPA (Genetic-Pareto Prompt Evolution) to automatically evolve
+Hermes Agent skills. GEPA is an ICLR 2026 Oral paper — it reads EXECUTION
+TRACES to understand WHY things fail, then proposes targeted mutations.
+
+This means: we can point GEPA at the worldsim skill and automatically evolve
+every component — simulation instructions, anti-slop rules, star thread
+methodology, mechanical verification checklist, dossier templates — using
+our own simulation outputs scored against real data as the eval signal.
+
+The recursive self-improvement pipeline we built manually (log failures →
+promote patterns → update rules) can be AUTOMATED via GEPA.
+
+## How It Applies to WorldSim
+
+### What GEPA Evolves (text, not weights)
+GEPA evolves the TEXT of prompts and instructions. For worldsim, that means:
+
+| Target | What Gets Evolved | Eval Signal |
+|--------|------------------|-------------|
+| SKILL.md | Immersion protocol, pipeline instructions | Simulation quality scores |
+| star-thread.md | Methodology for finding star threads | Thread-to-voice accuracy |
+| anti-slop.md | Slop word lists, structural patterns | Slop detection recall/precision |
+| simulation-engine.md | Platform formats, conversation dynamics | Voice fidelity scores |
+| adversarial-refinement.md | Mechanical check thresholds, GAN loop | Pre vs post refinement delta |
+| prediction-engine.md | Forecasting methodology | Prediction Brier scores |
+| dossier template | Profile structure and fields | Profile quality scores |
+
+### The Eval Dataset
+Built from worldsim's own outputs + real data:
+
+1. **Voice fidelity pairs**: (simulated post, real post from same person) →
+   LLM-as-judge scores similarity 0-1
+2. **Mechanical check logs**: what did the checks catch? what slipped through?
+3. **Prediction accuracy**: tracked predictions scored against reality
+4. **Held-out tests**: predicted tweets vs actual tweets
+5. **Turing test results**: could the discriminator tell real from fake?
+6. **User corrections**: any time the user catches something the system missed
+   (like the emoji fabrication incident — that's the richest signal)
+
+### The GEPA Loop for WorldSim
+
+```
+1. RUN worldsim simulation (creates execution traces)
+2. SCORE outputs against real data (voice, position, mechanical)
+3. LOG traces + scores + user feedback to eval dataset
+4. GEPA EVOLVES the skill component that had lowest scores
+   - Reads traces to understand WHY it scored low
+   - Proposes mutation to that specific reference file
+   - Tests mutation against held-out eval data
+   - If improved: create PR, human reviews
+5. REPEAT — each cycle makes the skill better
+```
+
+### Concrete Example
+
+GEPA discovers from traces that simulated conversations always have
+symmetric turn-taking (4/4/4). It reads the mechanical check log that
+caught this in 3 of the last 5 simulations. It reads the current
+simulation-engine.md and sees the conversation architecture section.
+It proposes a mutation:
+
+OLD: "Opening Moves (1-3 posts) → Development (4-8 posts) → Peak → Resolution"
+NEW: "Opening: most impulsive person posts. Others join ASYMMETRICALLY — one person
+gets 40-50% of turns, one gets 15-20%, others fill the rest. The ratio should
+match their real reply-to-original ratios from the dossier."
+
+This mutation gets tested against the next 5 simulations. If symmetry
+violations drop and voice scores don't decrease, it gets merged.
+
+## Setup
+
+```bash
+# Clone the evolution repo
+git clone https://github.com/NousResearch/hermes-agent-self-evolution.git
+cd hermes-agent-self-evolution
+pip install -e ".[dev]"
+
+# Point at hermes-agent repo
+export HERMES_AGENT_REPO=~/.hermes
+
+# Evolve the worldsim skill specifically
+python -m evolution.skills.evolve_skill \
+    --skill hermes-simulator \
+    --iterations 10 \
+    --eval-source sessiondb
+```
+
+## What Makes This Different From Manual Self-Improvement
+
+The manual pipeline (references/recursive-self-improvement.md) requires the
+agent to notice its own failures and write rules. This has two problems:
+
+1. The agent shares weights with the generator — it's biased toward
+   approving its own output (the emoji incident proved this)
+2. Promoting patterns to rules is slow and requires 3+ occurrences
+
+GEPA solves both:
+1. The eval signal comes from EXTERNAL data (real posts, user corrections,
+   mechanical checks) — not the agent's self-assessment
+2. Evolution happens per-iteration, not per-3-failures
+3. Mutations are tested against held-out data before merging
+4. The Pareto frontier maintains diversity — different strategies for
+   different types of people/conversations
+
+## Integration Points
+
+### Eval Dataset Builder
+Mine rehoboam DB for training data:
+- simulation_logs table → execution traces
+- prediction_scores table → accuracy data
+- audit_log table → mechanical check results
+- user correction events → highest-value signal
+
+### Fitness Function for WorldSim
+```python
+def worldsim_fitness(simulation_output, real_data):
+    scores = {}
+    # Voice fidelity: embed real + simulated, cosine similarity
+    scores["voice"] = embed_and_compare(simulation_output, real_data.tweets)
+    # Mechanical pass rate: what % of checks passed without fixes
+    scores["mechanical"] = mechanical_check_pass_rate(simulation_output)
+    # Slop score: count of slop words/patterns detected
+    scores["anti_slop"] = 1.0 - (slop_count / total_words)
+    # Structure: turn asymmetry, conversation naturalness
+    scores["structure"] = naturalness_score(simulation_output)
+    # Textual feedback for GEPA's reflective mutation
+    feedback = generate_textual_feedback(scores, simulation_output, real_data)
+    return aggregate_score(scores), feedback
+```
+
+### The Key Insight: Textual Feedback
+GEPA's superpower is that it doesn't just get a scalar score — it gets
+TEXTUAL FEEDBACK explaining what went wrong. Our mechanical verification
+system already produces this:
+
+"@nosilverv avg 33.2 words vs real 15.6 (113% deviation) — SHORTEN"
+"Parallel antithesis detected: 'The most X... The most Y...' — STRIP"
+"Emoji rate 0% simulated but 10% real — OK (within tolerance)"
+
+This text goes directly into GEPA's reflective mutation pipeline. It reads
+these messages and proposes changes to the skill instructions that would
+prevent these specific failures in future simulations.
+
+## Evolution Targets by Priority
+
+1. **simulation-engine.md** — highest impact on output quality
+2. **anti-slop.md** — directly measurable, highest precision eval
+3. **star-thread.md** — hardest to evaluate but most impactful on voice
+4. **adversarial-refinement.md** — meta: improving the improvement system
+5. **SKILL.md pipeline instructions** — orchestration optimization
+6. **dossier template** — structure optimization
+7. **prediction-engine.md** — measurable via Brier scores
+
+## The Virtuous Cycle
+
+```
+More simulations → more eval data → better GEPA mutations
+→ better skill instructions → better simulations → more eval data → ...
+```
+
+This is the endgame: the worldsim skill evolves itself through use.
+Every simulation makes the next one better, not just through logged
+rules, but through automated evolutionary optimization of the
+instructions themselves. The system doesn't just learn WHAT went wrong —
+it rewrites its own code to prevent it.
@@ -0,0 +1,262 @@
+# Knowledge Archive — Per-Person Source Library + Expert Synthesis
+
+## The Problem With Profiles
+
+A profile is a SNAPSHOT. It says "this person believes X" but doesn't
+show you WHERE they said it, WHEN, in WHAT context, or HOW their
+thinking evolved. You can't cite a profile. You can't trace a claim
+back to a source. And when you're simulating a conversation about
+topic Z, the profile gives you everything about the person equally
+weighted — their views on AI and their views on cooking and their
+views on politics all crammed into the same context window.
+
+## The Archive
+
+For every person the system touches, build a LIBRARY:
+
+```
+~/.hermes/rehoboam/archives/{handle}/
+├── index.json              ← master index: all entries, metadata, embeddings
+├── sources/
+│   ├── x_tweets.jsonl      ← every tweet pulled, with ID, timestamp, URL, metrics
+│   ├── x_replies.jsonl     ← their replies (different voice register)
+│   ├── bluesky_posts.jsonl ← bluesky posts
+│   ├── blog_posts.jsonl    ← full text of blog posts with URLs
+│   ├── podcast_quotes.jsonl ← attributed quotes from transcripts
+│   ├── interviews.jsonl    ← quotes from news articles/interviews
+│   ├── reddit_comments.jsonl
+│   ├── github_comments.jsonl
+│   ├── goodreads_reviews.jsonl
+│   ├── threads_posts.jsonl
+│   └── other.jsonl         ← anything else (HN, Quora, etc.)
+├── topics/
+│   ├── ai_safety.jsonl     ← auto-clustered by topic
+│   ├── open_source.jsonl
+│   ├── consciousness.jsonl
+│   └── ...
+└── embeddings/
+    └── all_embeddings.npy  ← sentence-transformer vectors for semantic search
+```
+
+### Entry Format (every entry in every source file)
+
+```json
+{
+  "id": "unique_id",
+  "handle": "teknium",
+  "platform": "x",
+  "type": "tweet|reply|blog|podcast|interview|comment|review",
+  "text": "the actual text they said",
+  "url": "https://x.com/Teknium/status/1234567890",
+  "timestamp": "2026-04-05T21:40:48Z",
+  "context": {
+    "replying_to": "@otheruser's tweet about X",
+    "thread_position": 3,
+    "topic": "open source AI",
+    "source_title": "Lex Fridman Podcast #412"
+  },
+  "metrics": {
+    "likes": 234,
+    "retweets": 45,
+    "replies": 12
+  },
+  "topics": ["open_source", "ai_models", "hermes"],
+  "embedding_id": 42
+}
+```
+
+Every entry has a URL. Everything is traceable. Nothing is paraphrased
+without the original alongside it.
+
+## Collection Pipeline
+
+When `worldsim> profile @handle` or `worldsim> archive @handle` runs:
+
+### Step 1: Pull Everything
+Use every verified access method to collect raw materials:
+- X API: get max tweets (paginate with next_token to get hundreds)
+- nitter.cz: timeline content
+- ThreadReaderApp: historical threads
+- Bluesky: full post history
+- GitHub: issue comments, PR reviews, gists, README
+- Reddit: comment history
+- Blog/Substack: full posts (web_extract)
+- Podcast transcripts: attributed quotes
+- Interviews: quotes with attribution
+- Goodreads: reviews
+- Medium: RSS feed full text
+
+### Step 2: Deduplicate
+Same content appears across platforms (cross-posted tweets, syndicated
+blog posts). Deduplicate by content similarity, keep the richest version
+(the one with most metadata/context).
+
+### Step 3: Topic Cluster
+Run lightweight topic classification on each entry:
+- Use the LLM or a simple keyword matcher to assign 1-3 topic tags
+- Cluster into topic files for fast retrieval
+- Topics are dynamic — new topics emerge from the data
+
+### Step 4: Embed
+Generate sentence-transformer embeddings for every entry.
+Store in numpy array for fast cosine similarity search.
+This enables semantic retrieval: "find everything @handle said about
+consciousness" even if they never used the word "consciousness."
+
+### Step 5: Index
+Build the master index.json with entry count, topic distribution,
+timestamp range, platform coverage, and quality metrics.
+
+## Context-Aware Retrieval
+
+This is the key. The archive might have 500 entries for a person.
+The context window can hold maybe 30-50 of them alongside all the
+other simulation context. You MUST retrieve selectively.
+
+### For Simulation
+When simulating @handle talking about topic X:
+
+```
+1. Semantic search: embed the current conversation context
+2. Retrieve top 10-15 entries by cosine similarity to context
+3. Also retrieve: 5 highest-engagement entries (their "greatest hits")
+4. Also retrieve: 3 most recent entries (freshness)
+5. Also retrieve: 2 entries that CONTRADICT the expected position
+   (prevents confirmation bias in the simulation)
+6. Deduplicate. Cap at 25-30 entries total.
+7. These become the "voice anchors" for generation.
+```
+
+The simulation draws from SPECIFIC REAL QUOTES relevant to the current
+conversation. Not a generic profile. Not everything they've ever said.
+The 25 most relevant things they've said about THIS topic.
+
+### For Expert Synthesis
+When the user asks "who are the best minds on X and what have they said?":
+
+```
+1. Search ALL archived people's entries for topic X
+2. Rank by: entry quality × person expertise × relevance to query
+3. Return a synthesis with CITATIONS:
+
+   On the topic of AI consciousness:
+
+   @repligate argues that LLMs exhibit "simulacra of consciousness"
+   rather than consciousness itself, distinguishing between the
+   model's behavior and its substrate:
+     > "the question isn't whether GPT is conscious but whether the
+     > character it's simulating is conscious within the fiction"
+     — tweet, 2025-03-15 (2.4K likes)
+     https://x.com/repligate/status/...
+
+   @nickcammarata approaches it from a meditation/first-person
+   perspective, noting parallels between introspective practice
+   and interpretability:
+     > "observation changes the system being observed, in meditation
+     > and in interp"
+     — tweet, 2026-04-05 (2.9K likes)
+     https://x.com/nickcammarata/status/...
+
+   @tszzl is skeptical of the framing entirely:
+     > "consciousness discourse is philosophy cosplaying as engineering"
+     — tweet, 2025-11-22 (5.1K likes)
+     https://x.com/tszzl/status/...
+```
+
+Every claim attributed. Every quote sourced. Every link clickable.
+
+### For Grounding Predictions
+When predicting what @handle would say about event Y:
+
+```
+1. Retrieve all archive entries related to Y or adjacent topics
+2. Identify their PATTERN of response to similar events
+3. Ground the prediction in specific past statements:
+
+   PREDICTION: @handle would likely frame event Y through the lens
+   of [topic Z], based on:
+   - tweet [url]: "quote about Z" (2025-06-15)
+   - blog post [url]: "longer quote about Z" (2025-09-20)
+   - podcast [url]: "verbal quote about Z" (2026-01-10)
+   CONFIDENCE: 78% (3 consistent sources over 7 months)
+```
+
+## Incremental Updates
+
+The archive grows over time. Each time the person is profiled:
+1. Pull new content since last archive timestamp
+2. Append to source files
+3. Re-embed new entries only
+4. Update topic clusters
+5. Update index
+
+Don't rebuild from scratch. Append and re-index.
+
+## Expert Table
+
+When you have 20+ archived people, build an expert table:
+
+```
+worldsim> experts "open source AI"
+
+EXPERT TABLE: open source AI
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+  @Teknium | 47 entries | voice: builder/practitioner
+    "we can prove that open approaches build better, more
+    trustworthy systems" — tweet, 2026-04-05
+    Latest: 2 hours ago | Stance: STRONG ADVOCATE
+
+  @repligate | 12 entries | voice: philosophical/theoretical
+    "open weights = accountability. you can't audit a black box"
+    — tweet, 2025-11-30
+    Latest: 3 days ago | Stance: ADVOCATE (principled)
+
+  @eigenrobot | 8 entries | voice: statistical/contrarian
+    "the open source premium is largely downstream of selection
+    effects in who contributes" — tweet, 2025-08-14
+    Latest: 1 week ago | Stance: SKEPTICAL OF FRAMING
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+  3 experts found | 67 total entries | synthesize? (y/n)
+```
+
+The table shows: who knows about this, what they've said, how recently,
+and what their stance is. All grounded in archived quotes with sources.
+
+## Integration With Simulation
+
+When the star thread + dossier + archive work together:
+
+```
+STAR THREAD: drives the core generation (what they're DOING)
+DOSSIER: provides constraints (psychometrics, voice metrics, baselines)
+ARCHIVE: provides GROUNDING (specific real quotes for this context)
+MECHANICAL CHECKS: verifies surface features (emoji, length, slop)
+```
+
+The archive prevents the simulation from drifting into generic territory.
+Instead of "this person would probably say something about open source,"
+it's "this person said THIS SPECIFIC THING about open source 3 weeks ago,
+and their simulation should be consistent with that while also being fresh."
+
+## The Overfitting Problem
+
+"Without overfitting to a particular material the new context doesn't call for."
+
+The retrieval system MUST be selective. If someone said 47 things about
+open source AI, and the current conversation is about AI regulation,
+don't dump all 47 open source quotes into context. Maybe 3 are relevant
+because they connect open source to regulation. Retrieve THOSE 3.
+
+The cosine similarity search handles this naturally — it matches the
+CURRENT conversation context against the archive and returns what's
+actually relevant, not everything tagged with a nearby topic.
+
+The anti-overfitting checklist:
+- Never load more than 25-30 archive entries per person into context
+- Weight by relevance to CURRENT conversation, not by general importance
+- Include at least 2 entries that contradict the expected position
+- Include at least 3 recent entries regardless of topic relevance (freshness)
+- If the conversation shifts topic mid-simulation, RE-RETRIEVE for new context
+- The archive is a LIBRARY you consult, not a script you follow
@@ -0,0 +1,321 @@
+# Mass Behavior Modeling — Communities, Clusters, Cascades
+
+Understanding individual behavior requires understanding the social
+ecosystem they exist in. This reference covers the macro layer:
+community detection, influence networks, audience modeling, and
+predicting how groups respond to events.
+
+## Why This Matters For Simulation
+
+Individual prediction accuracy: ~56-60%
+Individual-in-context prediction: significantly higher
+
+A person's behavior is constrained by their community. Knowing WHICH
+community they belong to, WHO influences them, and WHAT information
+ecosystem they're in makes individual predictions much sharper.
+
+Lewin's equation: B = f(P, E). This reference is about the E.
+
+## The Ecosystem Stack
+
+```
+Layer 5: AUDIENCE REACTION    — How would this person's audience respond?
+Layer 4: STANCE & SENTIMENT   — What positions do clusters hold?
+Layer 3: INFLUENCE NETWORKS   — Who spreads ideas to whom?
+Layer 2: COMMUNITY CLUSTERS   — Who groups together?
+Layer 1: SOCIAL GRAPH         — Who follows/interacts with whom?
+```
+
+## Layer 1: Social Graph Construction
+
+### Data Sources (by accessibility)
+
+| Source | Access | Quality | Tools |
+|--------|--------|---------|-------|
+| Bluesky AT Protocol | FREE, open, no auth | Excellent | atproto (pip) |
+| X/Twitter API | Bearer token, limited | Good but restricted | curl, tweepy |
+| Reddit | API with limits | Good for comments | PRAW (pip) |
+| GitHub | Free API | Great for tech people | PyGithub (pip) |
+| Web scraping | Fragile, TOS issues | Variable | Last resort |
+
+### Bluesky: The Open Gold Mine
+```python
+# pip install atproto
+from atproto import Client
+client = Client()
+# No auth needed for public data
+
+# Get follower graph
+followers = client.get_followers(actor="handle.bsky.social")
+following = client.get_follows(actor="handle.bsky.social")
+
+# Real-time firehose (no auth!)
+# wss://jetstream1.us-east.bsky.network/subscribe
+```
+
+### Graph Types
+- **Follow graph**: who follows whom (directed, static-ish)
+- **Interaction graph**: who replies to / retweets whom (directed, dynamic)
+- **Mention graph**: who mentions whom (directed, weighted by frequency)
+- **Co-engagement graph**: who engages with the same content (undirected)
+
+Interaction graphs are more informative than follow graphs for predicting
+actual behavioral alignment.
+
+### Tools
+```
+pip install networkx python-igraph
+```
+NetworkX for prototyping (<100K nodes), igraph for production (millions).
+
+## Layer 2: Community Detection
+
+### Algorithms (ranked by quality)
+
+| Algorithm | Quality | Speed | Notes |
+|-----------|---------|-------|-------|
+| Leiden | Best | Fast | Guarantees connected communities |
+| Louvain | Good | Fastest | Can produce disconnected communities |
+| Infomap | Excellent | Medium | Based on information theory |
+| Label Propagation | Decent | Very fast | Non-deterministic |
+
+### The Meta-Library: CDLib
+```
+pip install cdlib
+```
+Wraps 50+ community detection algorithms in a unified API.
+Works on top of networkx/igraph. Highly recommended.
+
+```python
+import cdlib
+from cdlib import algorithms
+import networkx as nx
+
+G = nx.karate_club_graph()
+communities = algorithms.leiden(G)
+# Also: louvain, infomap, label_propagation, angel, demon, etc.
+```
+
+### What Communities Tell Us
+Each community in a social graph typically shares:
+- Ideological orientation
+- Topic interests
+- Information sources
+- Language patterns and in-group vocabulary
+- Reaction patterns to events
+
+Knowing which community someone belongs to immediately constrains
+predictions about their likely positions and reactions.
+
+## Layer 3: Influence Networks
+
+### Key Insight (Zhou et al., National Science Review 2024)
+Network centrality alone is INSUFFICIENT for predicting influence.
+Must combine structural position with behavioral features:
+- Posting frequency
+- Historical content virality
+- Response rate / engagement ratio
+- Content originality (original vs repost ratio)
+
+### Centrality Measures
+```python
+import networkx as nx
+G = nx.DiGraph()  # directed social graph
+
+# Who has the most connections?
+degree = nx.degree_centrality(G)
+
+# Who bridges different communities?
+betweenness = nx.betweenness_centrality(G)
+
+# Who's connected to other well-connected people?
+eigenvector = nx.eigenvector_centrality(G)
+
+# Adapted from web — directed influence flow
+pagerank = nx.pagerank(G)
+```
+
+### Superspreader Identification (DeVerna et al., PLOS ONE 2024)
+Superspreaders of content fall into three categories:
+1. **Pundits**: large following, high authority, original content
+2. **Media outlets**: institutional accounts, news organizations
+3. **Affiliated personal accounts**: connected to pundits/outlets
+
+For simulation: knowing who the superspreaders are in a person's
+network tells you what information they're likely exposed to.
+
+### Information Cascade Modeling
+```
+pip install ndlib  # Network Diffusion Library
+```
+
+NDlib models how information spreads through networks:
+- Independent Cascade Model
+- Linear Threshold Model
+- SIR/SIS epidemiological models adapted for info spread
+- Voter Model (opinion dynamics)
+- Sznajd Model (social influence)
+
+## Layer 4: Stance & Sentiment Analysis
+
+### Ready-To-Use Models (HuggingFace)
+
+**Tweet Sentiment** (most reliable):
+```
+cardiffnlp/twitter-roberta-base-sentiment-latest
+# Labels: positive / negative / neutral
+```
+
+**Political Stance**:
+```
+kornosk/bert-election2020-twitter-stance-biden-KE-MLM
+kornosk/bert-election2020-twitter-stance-trump-KE-MLM
+launch/POLITICS  # left / center / right
+```
+
+**All-in-One Tweet NLP**:
+```
+pip install tweetnlp
+# Sentiment, emotion, hate speech, NER, topic classification
+```
+
+### Topic-Level Stance Tracking
+Combine BERTopic (dynamic topic modeling) with stance classifiers:
+1. Cluster posts into topics over time windows
+2. Classify stance per topic per community
+3. Track stance shifts over time
+4. Detect divergence between communities on emerging topics
+
+### PRISM Framework (ACL 2025)
+First framework for interpretable political bias embeddings.
+Two-stage: mine bias indicators → cross-encoder assigns structured scores.
+```
+github.com/dukesun99/ACL-PRISM
+```
+
+## Layer 5: Audience Modeling & Crowd Prediction
+
+### The Frontier: Predicting How Groups React
+
+Key papers and findings:
+
+**CReAM (WWW 2024)**: Predicts which of two posts gets more engagement.
+Uses LLM-generated features + FLANG-RoBERTa cross-encoder.
+Demonstrates crowd reaction IS predictable from content alone.
+
+**PopSim (Dec 2025)**: LLM multi-agent social network sandbox.
+Simulates content propagation dynamics using "Social Mean Field"
+for individual-population interaction. Reduces prediction error 8.82%.
+
+**Conditioned Comment Prediction (EACL 2026)**:
+KEY FINDING: behavioral traces (past posts) are BETTER than
+descriptive personas for conditioning LLMs to predict user behavior.
+This validates our OSINT approach: real data > personality labels.
+
+**DEBATE Benchmark (Oct 2025)**:
+WARNING: LLM agents converge opinions TOO QUICKLY vs real humans.
+SFT + DPO helps but gap remains. Real communities maintain
+disagreement longer than simulated ones.
+
+**Distributional vs Individual Prediction (PMC 2025)**:
+Group-level predictions are more reliable than individual ones.
+Predicting "65% of this community will react negatively" is more
+accurate than predicting "this specific person will react negatively."
+
+### Application to Simulation
+
+When simulating @person talking about event X, consider:
+1. What community does @person belong to?
+2. How is that community reacting to X? (distributional prediction)
+3. Where does @person sit within that community? (conformist vs contrarian)
+4. Who influences @person? What are THEY saying?
+5. How does @person's audience react to their take? (engagement prediction)
+
+This context makes individual predictions sharper.
+
+## Echo Chamber & Filter Bubble Detection
+
+### Technique
+1. Build interaction graph
+2. Run Leiden community detection
+3. For each community, aggregate stance on key issues
+4. Measure ideological homogeneity within communities
+5. Compare cross-community vs within-community content similarity
+6. High within + low cross = echo chamber
+
+### Tools
+```
+github.com/mminici/Echo-Chamber-Detection  # Cascade-based, CIKM 2022
+# Includes Brexit and VaxNoVax datasets
+```
+
+### What It Tells Us
+Knowing someone's echo chamber tells you:
+- What information they're exposed to
+- What they're NOT exposed to
+- How extreme their positions might be (isolation → radicalization)
+- Whether they'll encounter pushback or only agreement
+- How they'll react to information from outside their bubble
+
+## User Embeddings: "Find People Like @person"
+
+### Strategy
+1. Embed each user's recent N posts with sentence-transformers
+2. Average embeddings → user vector
+3. Use FAISS for similarity search
+4. Cluster users with HDBSCAN in embedding space
+
+### Best Models for Social Media Text
+```
+# General purpose (good baseline)
+sentence-transformers/all-mpnet-base-v2
+
+# Tweet-specific (better domain fit)
+cardiffnlp/twitter-roberta-base
+vinai/bertweet-base  # pretrained on 850M tweets
+```
+
+### Graph + Text Hybrid Embeddings
+```
+pip install karateclub
+```
+KarateClub provides Node2Vec, DeepWalk, Graph2Vec — embed users
+based on graph position. Combine with text embeddings for hybrid
+vectors that capture BOTH what someone says AND where they sit
+in the social network.
+
+## Practical Application to Simulation
+
+### For Individual Simulation (what we already do)
+Add ecosystem context to each dossier:
+- Which community cluster they belong to
+- Who their top influencers are (who do they retweet/amplify most)
+- What echo chamber are they in (information environment)
+- How does their community view the simulation topic
+
+### For Audience Simulation (new capability)
+When user asks "what would @person's audience say":
+1. Identify @person's follower community
+2. Sample representative voices from that community
+3. Model the DISTRIBUTION of responses, not just one response
+4. Include: cheerleaders, critics, joke-makers, lurkers
+5. Weight by typical engagement patterns
+
+### For Cascade Prediction (new capability)
+When user asks "how would this take spread":
+1. Model the initial tweet and its immediate network
+2. Predict which nodes amplify (based on stance alignment + influence)
+3. Estimate reach and engagement range
+4. Predict quote-tweet ratio (agreement vs dunking)
+
+## Recommended Minimal Stack
+
+```bash
+pip install networkx python-igraph leidenalg cdlib karateclub
+pip install sentence-transformers transformers tweetnlp
+pip install ndlib faiss-cpu hdbscan atproto
+```
+
+This gives you: graph construction, community detection, user embeddings,
+stance/sentiment analysis, diffusion simulation, similarity search,
+clustering, and Bluesky data access. All open source, all pip-installable.
@@ -0,0 +1,370 @@
+# OSINT Pipeline — Deep Intelligence Gathering
+
+Full-spectrum open source intelligence for building personality models.
+This goes beyond social media posts into visual identity, cross-platform
+footprints, and behavioral analysis.
+
+## Tool Arsenal
+
+| Tool | Use Case | Strength |
+|------|----------|----------|
+| `web_search` | Find anything, initial discovery | Fast, broad, indexed content |
+| `web_extract` | Pull full page content | Blogs, articles, profiles, PDFs |
+| `browser_navigate` + `browser_snapshot` | View live pages | Dynamic content, login walls |
+| `browser_vision` | Analyze what a page looks like | Layouts, visual identity, screenshots |
+| `vision_analyze` | Analyze any image by URL/path | Profile pics, post images, aesthetics |
+| `browser_get_images` | List all images on a page | Find images to feed to vision_analyze |
+| Yandex reverse image search | Find where an image appears | Identity verification, alt accounts |
+| `x-cli` (if available) | Direct Twitter API | Timelines, search, metadata |
+
+## Instagram Intelligence
+
+Instagram is CRITICAL for personality modeling — it reveals:
+- Visual identity and aesthetic preferences
+- Real-life social circles (tagged people, group photos)
+- Lifestyle signals (travel, food, hobbies, pets)
+- Caption voice (often different from Twitter voice)
+- Story highlights (curated self-image)
+- Bio links (cross-platform connections)
+
+### Viewing Instagram Profiles (VERIFIED APRIL 2026)
+
+**METHOD 1 — Instagram Private Web API (BEST, returns full JSON)**
+```bash
+curl -s -H 'User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X)' \
+  -H 'x-ig-app-id: 936619743392459' \
+  'https://i.instagram.com/api/v1/users/web_profile_info/?username={handle}'
+```
+Returns ~500KB of JSON: full profile + last 12 posts with captions, likes,
+comments, CDN image URLs, timestamps. No auth needed.
+
+**METHOD 2 — Instagram oEmbed API (for individual posts)**
+```bash
+curl -s 'https://www.instagram.com/api/v1/oembed/?url=https://www.instagram.com/p/{SHORTCODE}/'
+```
+Returns: caption text, author_name, thumbnail URL. No auth.
+
+**METHOD 3 — Pixwox via web_extract (profile viewer)**
+```python
+web_extract(["https://pixwox.com/profile/{username}"])
+```
+Returns 12+ recent posts with captions, engagement stats. Cloudflare blocks
+curl but web_extract bypasses it.
+
+**METHOD 4 — SocialBlade via web_extract (analytics)**
+```python
+web_extract(["https://socialblade.com/instagram/user/{handle}"])
+```
+Returns follower count, engagement rate, 14-day tracking.
+
+**METHOD 5 — CDN direct download (images from API responses)**
+Image URLs from API responses (scontent-*.cdninstagram.com) download
+directly with no auth. Feed them to vision_analyze for visual profiling.
+
+**METHOD 6 — Google indexed content**
+```
+web_search("site:instagram.com {username}")
+```
+Returns bio text, follower count, recent post captions from search snippets.
+
+**WHAT DOESN'T WORK:** direct web_extract on instagram.com, ?__a=1 trick,
+graph.instagram.com (needs OAuth), imginn/picuki/dumpoir/gramhir (403)
+
+### Instagram Discovery (finding someone's handle)
+```
+web_search("{real_name} instagram")
+web_search("{twitter_handle} instagram account")
+web_search("site:instagram.com {real_name}")
+
+# Check their Twitter/X bio for IG links
+# Check their personal website for social links
+# Check Linktree / bio.link pages
+```
+
+### Extracting Signal from Instagram
+
+**Profile Picture**: Reveals self-presentation style
+- Professional headshot vs casual vs meme/avatar
+- Analyze with vision_analyze for clothing, setting, expression
+
+**Bio Text**: Compressed self-identity
+- Role/title claims
+- Emoji usage patterns
+- Link destinations
+- Location claims
+
+**Post Grid**: Visual identity fingerprint
+- Color palette tendencies
+- Content categories (food/travel/tech/selfies/memes)
+- Posting frequency
+- Professional vs personal ratio
+
+**Captions**: Voice sample different from Twitter
+- Usually longer, more personal
+- Hashtag usage patterns
+- Emoji patterns
+- Tone (inspirational vs casual vs funny)
+
+**Tagged Photos**: Real social graph
+- Who they hang out with IRL
+- Events they attend
+- Social circles outside tech/AI
+
+## Visual Identity Analysis
+
+Use vision tools to analyze HOW someone presents visually:
+
+### Profile Pictures Across Platforms
+```
+# Collect profile pics from multiple platforms
+# Twitter, Instagram, LinkedIn, GitHub, Discord
+
+# Analyze each
+vision_analyze(image_url="{pic_url}", 
+    question="Describe this profile picture in detail: person's appearance, clothing style, setting, expression, professional vs casual, any notable elements")
+
+# Cross-reference: do they use the same pic everywhere? Different personas?
+```
+
+### Reverse Image Search (Yandex Pipeline)
+From memory — Google Lens blocks Browserbase IPs, use Yandex:
+
+```
+# For images behind auth/CDN, upload to catbox first
+terminal("curl -F 'reqtype=fileupload' -F 'fileToUpload=@{local_path}' https://catbox.moe/user/api.php")
+
+# Then Yandex reverse image search
+browser_navigate("https://yandex.com/images/search?rpt=imageview&url={encoded_public_url}")
+
+# Or via web_extract (slower but automatable)
+web_extract(["https://yandex.com/images/search?rpt=imageview&url={encoded_url}"])
+```
+
+Yandex provides:
+- Similar images (find the same person elsewhere)
+- Site matches (where this image appears)
+- OCR text extraction (text in images)
+- Image tags (what's in the image)
+- Knowledge panels (identified entities)
+
+### Screenshot Analysis
+When you can see a page but can't extract text:
+```
+browser_vision(question="Read all text on this page. List usernames, post content, dates, engagement numbers")
+browser_vision(annotate=true, question="What interactive elements are on this page?")
+```
+
+## LinkedIn Intelligence
+
+**STATUS: BLOCKED for automated access** (tested April 2026).
+web_extract returns "Website Not Supported". Direct browsing triggers auth walls.
+
+**Workarounds:**
+```
+# LinkedIn content IS indexed by search engines
+web_search("{real_name} linkedin {company}")
+web_search("site:linkedin.com/in {name}")
+# These return snippets with headline, role, company — useful even without full profile
+
+# Google sometimes caches LinkedIn profiles
+web_search("{name} site:linkedin.com headline")
+```
+
+**METHOD 1 — Google indexed snippets (always works)**
+```
+web_search("site:linkedin.com/in {name} {company}")
+```
+Returns: name, headline, company, location, connection count, bio snippet.
+
+**METHOD 2 — Crunchbase (EXCELLENT for founders/execs)**
+```python
+web_extract(["https://www.crunchbase.com/person/{slug}"])
+```
+Returns: full career history, education, investments, board positions,
+social links. Best source for professional identity of startup people.
+
+**METHOD 3 — Corporate press pages**
+```
+web_search("{person} {company} site:{company}.com bio OR press")
+```
+Official bios from company newsrooms. High quality, curated but factual.
+
+**METHOD 4 — Third-party aggregators**
+- RocketReach, SignalHire — job title + company from web_search snippets
+- rootdata.com — good for crypto/AI people
+- Crunchbase — best all-round for tech executives
+
+**METHOD 5 — Paid LinkedIn API wrappers** (if budget allows)
+- LinkdAPI, Proxycurl: $0.07-0.15 per profile, full structured data
+- No OAuth needed, just API key
+
+LinkedIn reveals (from combined methods):
+- Career trajectory (Crunchbase full history)
+- Current role and headline (search snippets)
+- Education (Crunchbase or search snippets)
+- Professional self-presentation (company bio pages)
+- Investment/board activity (Crunchbase)
+
+## Podcast Transcripts (HIGHEST VALUE for voice profiling)
+
+Podcast interviews are THE gold mine for personality modeling. Hours of
+unscripted speech, natural conversation, real personality showing through.
+
+**Discovery:**
+```
+web_search("{name} podcast transcript interview")
+web_search("{name} lex fridman OR tyler cowen OR joe rogan OR dwarkesh")
+```
+
+**Extraction — verified working transcript sources:**
+```python
+# Lex Fridman (full verbatim transcripts)
+web_extract(["https://lexfridman.com/EPISODE_URL/transcript"])
+
+# Conversations with Tyler (Tyler Cowen — full transcripts)
+web_extract(["https://conversationswithtyler.com/episodes/..."])
+
+# TED Talks transcripts
+web_extract(["https://www.ted.com/talks/.../transcript"])
+
+# Sequoia Capital podcast
+web_extract(["https://www.sequoiacap.com/podcast/..."])
+```
+
+Podcast transcripts reveal:
+- Natural speech patterns (filler words, pacing, sentence structure)
+- Unguarded opinions (less curated than tweets)
+- How they respond to pushback (interviewer challenges)
+- Humor style in conversation (different from written humor)
+- Depth of knowledge on specific topics
+- Personality under pressure
+
+## YouTube / Video Intelligence
+
+```
+web_search("{name} youtube talk keynote interview")
+web_search("{name} podcast appearance")
+```
+
+web_extract on YouTube pages returns rich summaries with attributed quotes.
+Use youtube-content skill for full transcripts if available.
+
+## Personal Blogs & Substacks (HIGH VALUE)
+
+Personal writing is curated self-expression — how someone WANTS to be
+seen intellectually. Very different signal from social media.
+
+```
+web_search("{name} blog substack essay")
+# Extract full posts
+web_extract(["https://{blog-url}/"])
+# Wayback Machine works for archived blog posts
+web_extract(["https://web.archive.org/web/2024/{blog-url}"])
+```
+
+## GitHub Intelligence
+
+For technical people:
+
+```
+web_search("site:github.com {handle}")
+web_extract(["https://github.com/{handle}"])
+
+# Issue comments reveal communication style under technical pressure
+web_search("site:github.com {handle} issue comment")
+
+# README style reveals documentation personality
+# Commit messages reveal terseness vs verbosity
+```
+
+## General Web Footprint
+
+```
+# Personal website / blog
+web_search("{name} personal website blog about")
+
+# Conference talks / speaker bios
+web_search("{name} speaker conference talk bio")
+
+# News mentions
+web_search("{name} {company} news interview profile")
+
+# Academic papers (for researchers)
+web_search("{name} arxiv paper author")
+web_search("site:scholar.google.com {name}")
+
+# Podcast appearances
+web_search("{name} podcast guest appearance")
+
+# Forum posts (HN, specific communities)
+web_search("site:news.ycombinator.com {handle} OR {name}")
+```
+
+## Cross-Platform Identity Resolution
+
+### Handle Mapping Strategy
+1. Start from known handle (usually Twitter)
+2. Check bio links — most people link to other platforms
+3. Search "{known_handle} {platform}" for each platform
+4. Check personal website for social links
+5. Reverse image search profile pic to find matching accounts
+6. Search unique phrases they use across platforms
+
+### Identity Verification
+When you find a potential match on another platform:
+- Same profile picture? (reverse image search)
+- Same bio keywords?
+- Same name/handle pattern?
+- Cross-references (do they mention each other?)
+- Writing style match?
+
+## Search Space Narrowing
+
+### The Jiggle Technique
+When broad searches return noise, narrow progressively:
+
+1. **Start broad**: `"{name}" AI` 
+2. **Add role**: `"{name}" {company} {role}`
+3. **Add context**: `"{name}" {company} {specific_project_or_topic}`
+4. **Add platform**: `site:{platform} "{name}" {context}`
+5. **Add time**: `"{name}" {topic} 2025 OR 2026`
+6. **Quote unique phrases**: if you found a distinctive phrase they use, search for that exact phrase to find more of their content
+
+### Disambiguation
+Common names need extra signals:
+- Add their company/org
+- Add their specific domain (AI, crypto, etc.)
+- Use their unique handle as anchor
+- Search for combinations of their known associates
+- Use image search to verify you have the right person
+
+### Signal vs Noise Heuristics
+- **High signal**: direct quotes, interview transcripts, personal blog posts, long-form content
+- **Medium signal**: mentions in aggregator sites, conference bios, LinkedIn summaries
+- **Low signal**: generic news mentions, third-party profiles, directory listings
+- **Noise**: same-name different person, outdated info (>2 years), scraped/regurgitated content
+
+## Confidence Calibration
+
+After full OSINT sweep, rate data quality:
+
+| Confidence | Data Available | Simulation Quality |
+|-----------|---------------|-------------------|
+| 95-100% | 50+ posts, longform, video, visual, cross-platform | Near-perfect voice replication |
+| 80-94% | 20-50 posts, some longform, basic visual | Very good, occasional educated guesses |
+| 60-79% | 10-20 posts, mostly short-form | Good general sense, some gaps |
+| 40-59% | 5-10 posts, limited platforms | Broad strokes only, flag uncertainty |
+| 20-39% | <5 posts, single platform | Sketch at best, heavy disclaimers |
+| <20% | Almost nothing found | Decline to simulate, ask user for context |
+
+## Privacy & Ethics Note
+
+All research uses publicly available information only. We don't:
+- Access private/locked accounts
+- Bypass authentication
+- Use leaked/hacked data
+- Dox or expose private information
+- Simulate in ways designed to deceive or impersonate
+
+The goal is personality MODELING for creative simulation, grounded in
+what people choose to share publicly.
@@ -0,0 +1,334 @@
+# Prediction Engine — Forecasting What Someone Would Say/Do
+
+Techniques for predicting behavior grounded in superforecasting methodology,
+behavioral science, and SOTA LLM prediction research.
+
+## Superforecasting Principles (Tetlock)
+
+**Honest caveat**: Superforecasting methodology was developed for geopolitical and
+world-event prediction, not personality simulation. That said, the THINKING TOOLS
+are genuinely useful here — decomposition prevents lazy pattern-matching, base rates
+fight overconfidence, and alternative hypotheses prevent single-track predictions.
+What does NOT transfer cleanly: the calibration precision. When Tetlock says "70%
+confident," that's backed by thousands of scored predictions. When we say "70%
+confident" about what @someone would tweet, that's an educated estimate, not a
+calibrated probability. Use the framework for its rigor, not its false precision.
+
+Apply these thinking tools when making behavioral predictions:
+
+### 1. Decomposition (Fermi-ize the Question)
+Don't ask "What would @person say about X?"
+Break it down:
+- What is @person's known position on topics RELATED to X?
+- What are their values/priorities that X touches on?
+- What is their emotional register when discussing similar topics?
+- Who are they likely responding to, and how does that change their tone?
+- What platform are they on, and how does that shift their behavior?
+
+### 2. Outside View First (Base Rates)
+Before considering the specific person, ask:
+- What would a TYPICAL person in their role/position say about X?
+- What % of people in their ideological cluster hold position Y on X?
+- What's the base rate for their type of response (agree/disagree/joke/ignore)?
+
+### 3. Inside View Second (Case-Specific Adjustment)
+Now adjust from the base rate using what you ACTUALLY KNOW about them:
+- Specific past statements on this topic or related topics
+- Known relationships with people/orgs involved
+- Personal experiences that would shape their view
+- Contrarian tendencies (do they predictably go against their cluster?)
+
+### 4. Confidence Calibration
+Express predictions with honest uncertainty. **These are rough buckets, not
+calibrated probabilities. Don't pretend they're more precise than they are.**
+- **90%+ confident**: They've literally said this before, just rephrased
+- **70-89%**: Strong pattern match with known positions and voice
+- **50-69%**: Reasonable inference but could go either way
+- **30-49%**: Educated guess, limited data
+- **<30%**: Basically guessing, flag it clearly
+
+When reporting confidence, prefer plain language over fake precision:
+"very likely" > "87% probability". The number implies a precision we don't have.
+
+### 5. Consider Alternative Hypotheses
+For every prediction, generate at least ONE plausible alternative:
+- "They'd PROBABLY say X, but they might surprise with Y because Z"
+- This prevents overconfident single-track predictions
+
+## The Prediction Pipeline
+
+### Step 1: Classify the Prediction Type
+
+| Type | Description | Difficulty |
+|------|-------------|-----------|
+| **Position prediction** | What they believe about X | Easiest if data exists |
+| **Reaction prediction** | How they'd respond to event Y | Medium |
+| **Voice prediction** | How they'd phrase something | Medium-hard |
+| **Behavior prediction** | What they'd DO (not just say) | Hardest |
+| **Interaction prediction** | How they'd respond to specific person | Hard, depends on relationship data |
+
+### Step 2: Evidence Gathering Protocol
+
+For each prediction, gather evidence in this order:
+
+1. **Direct evidence**: Have they addressed this exact topic before?
+   - Search: `"{handle}" "{topic}"` or `"{handle}" "{related_keyword}"`
+   - Weight: HIGHEST
+
+2. **Analogical evidence**: Have they addressed something similar?
+   - Search: find positions on adjacent topics
+   - Weight: HIGH
+
+3. **Value evidence**: What values/principles would apply?
+   - Infer from their stated beliefs and consistent positions
+   - Weight: MEDIUM
+
+4. **Social evidence**: What do their peers/allies think?
+   - People tend to align with their social cluster (but not always)
+   - Weight: LOW-MEDIUM (higher for conformists, lower for contrarians)
+
+5. **Demographic evidence**: What would someone in their position typically think?
+   - Base rate from role/industry/ideology
+   - Weight: LOWEST (only use as anchor, not conclusion)
+
+### Step 2b: Contradiction Handling Protocol
+When evidence conflicts (e.g., person said X in 2024 but Y in 2026):
+
+1. **Check for genuine change**: Did they explicitly reverse position? Look for
+   "I used to think X but now..." or a clear pivot moment. If so, use the newer
+   position and note the evolution.
+
+2. **Check for context-dependence**: Did they say X to audience A and Y to audience B?
+   This isn't necessarily dishonesty — people emphasize different facets for different
+   contexts. Note which context your simulation targets and use the matching register.
+
+3. **Check for nuance collapse**: Maybe they said "X is mostly good with caveats"
+   and later "X has real problems" — these might not actually contradict. Look for
+   the synthesis position.
+
+4. **When genuinely unresolvable**: Flag it explicitly. "Evidence conflicts on this
+   point — they've argued both sides at different times. Simulating {chosen position}
+   based on {reasoning}, but the alternative is plausible." Don't paper over the
+   contradiction with false confidence.
+
+5. **Recency default**: When all else fails, weight more recent statements higher.
+   People change, and the most recent position is the best predictor of the next one.
+
+### Step 3: Generate Prediction
+
+Using the HumanLLM B = f(P, E) framework:
+- **P (Person)**: Everything from the dossier — personality, values, voice
+- **E (Environment)**: The specific context — platform, topic, who's asking,
+  what just happened, social dynamics in play
+
+Generate the prediction by:
+1. Setting the base rate (outside view)
+2. Adjusting for personal specifics (inside view)
+3. Filtering through their voice profile (how they'd phrase it)
+4. Applying platform-specific behavior patterns
+5. Calibrating confidence
+
+## Memory Curation (The 30-50 Rule)
+
+Research shows performance PEAKS at 30-50 memory entries then DECLINES.
+For each person in a simulation, curate memories:
+
+### What to Include (high signal)
+- **Signature takes**: Their most characteristic/famous positions (5-10)
+- **Voice samples**: Real quotes that capture their linguistic style (5-10)
+- **Relationship data**: Known dynamics with other sim targets (3-5)
+- **Recent context**: What they've been talking about lately (3-5)
+- **Formative moments**: Career milestones, public pivots, viral moments (3-5)
+- **Quirks & tells**: Catchphrases, humor style, pet peeves (3-5)
+
+### What to Exclude (noise)
+- Generic biographical facts that don't predict behavior
+- Old positions they've clearly evolved past
+- Trivial interactions that don't reveal personality
+- Secondhand characterizations (what others say about them)
+- Platform metadata (follower counts, join dates) unless directly relevant
+
+### Memory Selection Heuristic
+For each candidate memory entry, ask:
+**"If I removed this, would the simulation noticeably degrade?"**
+If no, cut it.
+
+## Fighting LLM Defaults
+
+Research shows LLMs have systematic biases in simulation. The fixes below need to be
+CONCRETE — vague instructions like "be more like them" don't work. You need specific
+prompting patterns that actually shift the output.
+
+### Problem: Sycophancy & Over-Agreement
+LLMs default to agreement and positivity.
+**Fix**: Don't just note they're contrarian — structure it as a behavioral instruction
+with evidence:
+```
+"In this conversation, {person} disagrees with {other_person} on {topic}. They are
+noticeably more confrontational than the other speakers. They tend to respond to
+consensus with skepticism and reframe debates on their own terms. Example from their
+real posts: '{actual quote where they disagreed with something popular}'"
+```
+
+### Problem: Rigid/Polarized Strategies
+LLMs tend to take extreme positions and hold them rigidly.
+**Fix**: Provide specific nuance instructions:
+```
+"In this conversation, {person} holds a complex position on {topic}: they agree with
+{point A} but push back on {point B}. They're the type to say 'yes, but...' rather
+than 'no.' Real example of their nuance: '{quote showing them holding a both-and
+position}'"
+```
+
+### Problem: Uniform Register
+LLMs default to a similar educated-casual tone for everyone.
+**Fix**: Anchor voice with REAL QUOTES and explicit comparative instructions:
+```
+"In this conversation, {person} is noticeably more {trait} than the other speakers.
+They tend to {specific behavior pattern}. Their sentences are typically {length/style}.
+They {do/don't} use emoji. Their humor style is {type}. Example from their real posts:
+'{actual quote that captures their voice}'"
+```
+The more you can say "{person} does THIS while {other_person} does THAT," the better
+the differentiation. Comparative framing outperforms absolute descriptions.
+
+### Problem: Overly Structured Responses
+LLMs love neat arguments with clear structure.
+**Fix**: Provide explicit structural anti-patterns:
+```
+"When generating {person}'s messages, break conventional structure. They start one
+thought and jump to another mid-sentence. They use '...' and '—' instead of periods.
+They repeat words for emphasis. They don't conclude neatly. Example: '{real quote
+showing their chaotic structure}'"
+```
+
+### Problem: Missing Mundane Behavior
+LLMs focus on "interesting" responses, skip boring/mundane ones.
+**Fix**: Explicitly instruct for mundane moments:
+```
+"Not every message from {person} needs to be insightful. Include at least 1-2 messages
+that are just reactions ('lmao', 'this', 'wait what'), link shares without commentary,
+or brief agreements. Real people don't craft every message. {person} specifically tends
+to {their specific mundane behavior pattern, e.g., 'drop a single emoji reaction'
+or 'just retweet without comment'}."
+```
+
+### General Principle for All Fixes
+The pattern is always: **behavioral instruction + comparative framing + real evidence**.
+- "Do X" alone doesn't work well
+- "Do X, unlike the default of Y" works better  
+- "Do X, unlike the default of Y, as evidenced by this real quote: Z" works best
+
+## The Adjective-Based Personality Method
+
+70 bipolar adjective pairs for Big Five traits. Select 3 per trait
+with intensity modifiers.
+
+### Openness
+High: creative, curious, imaginative, artistic, adventurous, intellectual,
+      unconventional, perceptive
+Low:  conventional, practical, traditional, routine-oriented, narrow
+
+### Conscientiousness  
+High: organized, disciplined, reliable, meticulous, systematic, thorough,
+      goal-oriented, persistent
+Low:  careless, impulsive, disorganized, spontaneous, flexible, relaxed
+
+### Extraversion
+High: outgoing, talkative, energetic, assertive, enthusiastic, bold,
+      gregarious, dominant
+Low:  reserved, quiet, introverted, solitary, withdrawn, reflective
+
+### Agreeableness
+High: cooperative, trusting, empathetic, generous, accommodating, kind,
+      diplomatic, forgiving
+Low:  competitive, skeptical, blunt, confrontational, critical, stubborn,
+      independent-minded
+
+### Neuroticism
+High: anxious, moody, sensitive, reactive, volatile, self-conscious,
+      insecure, emotional
+Low:  calm, stable, resilient, confident, even-tempered, composed,
+      thick-skinned
+
+### Usage
+For each simulated person, after OSINT research, estimate their Big Five
+profile and select appropriate adjectives:
+
+Example: "@basedjensen: very creative, somewhat impulsive, very outgoing,
+a bit competitive, calm" → this shapes the generation toward the right
+behavioral profile.
+
+## Interaction Dynamics Prediction
+
+When simulating conversations between multiple people, remember that predictions
+apply to a SPECIFIC REGISTER. See the next section on performative vs. authentic
+behavior.
+
+## Performative vs. Authentic Behavior
+
+**Critical concept**: People act differently for different audiences. A simulation
+must be explicit about which register it's targeting.
+
+### The Register Spectrum
+- **Public broadcast** (tweets, Reddit posts): Most performative. People are
+  playing to their audience, building their brand, signaling to their tribe.
+- **Semi-public** (Discord channels, group chats, comment threads): Less
+  performative but still audience-aware. People are more casual but know
+  others are watching.
+- **Private 1-on-1** (DMs): Much less performative. More honest, more
+  vulnerable, more willing to express doubt or uncertainty.  
+- **True private** (inner monologue, close friends): We have almost no data
+  on this. Don't pretend to simulate it.
+
+### Practical implications
+- When simulating a PUBLIC thread, lean into the person's public persona —
+  their brand, their usual takes, their audience-aware voice.
+- When simulating DMs, dial down the performance. More hedging, more honesty,
+  more "I actually think..." vs. the public "Here's my take:".
+- When evidence comes from one register but the simulation targets another,
+  FLAG IT: "Evidence is from public tweets but simulating DM behavior —
+  expect the real person to be less {polished/aggressive/confident} in private."
+- Someone's Twitter persona may be genuinely different from their Reddit persona.
+  These are not interchangeable data sources. Weight evidence from the matching
+  platform higher.
+
+### What we can't know
+Be honest: we're simulating public figures based on their public output. The
+private person may be substantially different. DM simulations are inherently
+lower-confidence than public thread simulations because we have less data on
+how people behave privately.
+
+### Dominance Hierarchy
+- Who talks first? (most confident/highest-status usually)
+- Who responds to whom? (not everyone talks to everyone)
+- Who gets ratio'd? (lowest-status takes get challenged)
+- Who lurks? (some people watch before engaging)
+
+### Agreement/Disagreement Prediction
+Based on known positions + social dynamics:
+- **Strong agree**: Both have stated similar positions + friendly relationship
+- **Agree with nuance**: Similar positions but one adds a caveat
+- **Productive disagreement**: Different positions + mutual respect
+- **Hostile disagreement**: Different positions + existing tension/rivalry
+- **Surprising agreement**: Expected to disagree but find common ground
+- **Ignore**: Some people just don't engage with certain others
+
+### Conversation Flow Prediction
+Real conversations follow patterns:
+1. **Opener** → most active/impulsive person posts first
+2. **First response** → most engaged/relevant person responds
+3. **Pile-on or pushback** → depends on agreement/disagreement dynamics
+4. **Tangent** → someone takes a side thread
+5. **Peak moment** → the best/most viral exchange
+6. **Trail off** → energy dissipates, last person makes a joke or short comment
+
+## Scenario Injection Prediction
+
+When "inject: {event}" is used, predict reactions:
+
+1. **Who would see this first?** (most online / most relevant to their work)
+2. **Who would care most?** (most affected / strongest opinion)
+3. **What's the emotional valence?** (good news for some, bad for others)
+4. **What's the expected take?** (apply position prediction pipeline)
+5. **How does this change the existing conversation?** (derail, amplify, redirect)
@@ -0,0 +1,237 @@
+# Recursive Self-Improvement Pipeline
+
+The simulator should get better every time it runs. Not through training —
+through accumulating failure patterns, calibration data, and learned rules
+that feed back into future simulations.
+
+## The Loop
+
+```
+SIMULATE → VERIFY (mechanical) → SCORE → LOG FAILURES → UPDATE RULES → SIMULATE BETTER
+```
+
+Each run produces two outputs:
+1. The simulation (for the user)
+2. A failure log (for the system)
+
+The failure log feeds back into the next run's verification step,
+making the checklist grow and the blind spots shrink.
+
+## What Gets Logged After Every Simulation
+
+### 1. Mechanical Check Failures
+```
+FAILURE LOG: simulation_{timestamp}
+  EMOJI: @visakanv had 6 fabricated emoji, real rate was 10%. Stripped all.
+  SLOP: @eigenrobot utterance contained "multifaceted" — rewritten.
+  LENGTH: @QiaochuYuan avg 42 words/utterance, real avg was 18. Compressed.
+  CAPS: 4/12 utterances started uppercase, targets are 90% lowercase. Fixed.
+  PUNCTUATION: Added periods to @tszzl who never uses terminal punctuation.
+  STRUCTURE: Sycophantic flow detected — B agreed with A then C agreed with B.
+             Injected disagreement.
+```
+
+### 2. Discriminator Critique Patterns
+```
+CRITIQUE LOG:
+  Round 1: @tszzl too verbose (flagged 2x in last 3 simulations)
+  Round 1: @repligate too academic (flagged 3x — this is a persistent pattern)
+  Round 2: Conversation too neat — real conversations are messier (flagged 5x)
+```
+
+### 3. Held-Out Test Results
+```
+CALIBRATION LOG:
+  Voice fidelity: 8.4/10 (up from 7.5 last run)
+  Topic prediction: 2/5 topics matched (typical — content is unpredictable)
+  Register match: 9/10 (improved after emoji fix)
+```
+
+## How Failures Feed Forward
+
+### Pattern Accumulation
+After N runs, persistent failure patterns become AUTOMATIC rules:
+
+```
+IF a pattern is flagged in 3+ consecutive simulations:
+  PROMOTE it from "check" to "pre-generation rule"
+  
+Example progression:
+  Run 1: "Too verbose for @tszzl" → flagged in Round 1, fixed
+  Run 2: "Too verbose for @tszzl" → flagged again, fixed again
+  Run 3: "Too verbose for @tszzl" → PROMOTED to pre-gen rule:
+         "When simulating roon-type voices: max 20 words per tweet.
+          Fragment > sentence. Compress ruthlessly."
+```
+
+### The Growing Checklist
+The mechanical verification checklist starts with the baseline checks
+(emoji, slop, length, caps, punctuation) and GROWS with each failure:
+
+```
+BASELINE CHECKS (permanent):
+  □ Emoji frequency match
+  □ Slop word scan (Tier 1/2/3)
+  □ Sentence length match
+  □ Capitalization match
+  □ Punctuation pattern match
+  □ Reply/original ratio
+  □ Structural slop patterns
+
+LEARNED CHECKS (accumulated from past failures):
+  □ Roon-type voices: max 20 words (from: verbose failure x3)
+  □ Warm personalities: do NOT add emoji (from: emoji inflation x5)
+  □ Academic voices: ground in specific examples (from: too abstract x3)
+  □ Conversations: inject at least one disagreement (from: sycophantic flow x4)
+  □ Self-deprecating voices: add hedging (from: too assertive x2)
+  □ Shitposters: include at least one non-sequitur (from: too on-topic x2)
+```
+
+### Where To Store Learned Rules
+Append to the skill itself. After each simulation run where the mechanical
+checks catch something, the agent should ask:
+
+"The mechanical verification caught {failures}. Should I add these as
+permanent learned rules for future simulations?"
+
+If the same failure appears 3+ times, add it automatically without asking.
+
+Use skill_manage(action='patch') to append to this file's "Learned Checks"
+section below.
+
+## Calibration Tracking
+
+### Per-Person Calibration Memory
+After simulating someone, store the calibration data:
+
+```
+@tszzl: voice=8.5, emoji_rate=0%, avg_words=14, lowercase=95%, 
+        signature_move="aphoristic fragments", danger="goes verbose"
+@nickcammarata: voice=8.8, emoji_rate=0%, avg_words=19, lowercase=90%,
+        signature_move="meditation-ML connection", danger="too structured"
+```
+
+If the same person is simulated again, LOAD this calibration to skip
+the cold-start problems. The second simulation of someone should be
+better than the first because you already know their failure modes.
+
+### Aggregate Calibration
+Track overall simulation quality across runs:
+
+```
+Run 1: pre-refine 7.5, post-refine 8.4 (delta +0.9)
+Run 2: pre-refine 8.37, post-refine 8.53 (delta +0.16)  
+Run 3: pre-refine 8.53, post-refine 8.83 (delta +0.30, emoji fix)
+```
+
+The pre-refine score should INCREASE over time as learned rules prevent
+repeat failures. If it's not increasing, the learning loop is broken.
+
+## The Standard: Indistinguishable From Real
+
+The target is not "good enough." The target is: mix simulated posts with
+real posts and a human familiar with the person cannot reliably tell which
+is which. That's 50% accuracy on a blind comparison — random chance.
+
+Every mechanical check, every discriminator round, every learned rule
+exists to push toward that standard. If something doesn't serve that
+goal, it's wasted effort.
+
+## Current Learned Checks (append here after each run)
+
+### From TPOT Simulation Run 1 (April 2026)
+- Warm/enthusiastic personalities (visakanv-type): do NOT add decorative emoji.
+  Bio emoji ≠ tweet emoji. Actual emoji rate for "warm" TPOT posters: <15%.
+  PROMOTED after being caught by user, not by discriminator (discriminator failure).
+- Conversation flow: pure agreement chains are instruct-model slop.
+  Real threads have at least one moment of friction, misunderstanding, or deflection.
+- Academic-leaning voices (repligate-type): ground claims in specific experiments,
+  transcripts, or model behaviors they've personally observed. Generic philosophical
+  language without specifics = slop, even if it sounds smart.
+- Self-deprecating voices (QC-type): hedge more. "i think" "i'm not sure" "it feels like."
+  Instruct models are too assertive even when simulating tentative people.
+- Fragment voices (roon-type): max 15-20 words. No conjunctions. No paragraphs.
+  If it reads like a complete thought, it's too complete for a fragment-poster.
+
+### From TPOT Simulation Run 2 (April 2026)
+- Reframer voices (nosilverv-type): avg ~16 words. Split multi-sentence takes
+  into separate tweets. The compression IS the voice. 113% over-length caught
+  by mechanical check that subjective scoring rated 8/10. Trust the numbers.
+- Rare-poster voices (selentelechia-type): in a 12-post sim, give them 2-3 turns
+  max. When they speak it must LAND. Short crystallizations > long analysis.
+  "or a shared meal" was the highest-rated line at 3 words.
+- Turn symmetry: ALWAYS check. 4/4/4 is instruct-model default. Real conversations
+  have one person dominating (5), one lurking (3), others in between.
+- Verbose bias is the #1 mechanical failure. ALWAYS check avg word count against
+  real baseline BEFORE subjective scoring. Every run so far has caught over-length
+  that subjective scoring missed.
+- RHETORICAL POLISH IS SLOP. Caught post-mechanical-pass in Run 2 review.
+  Parallel antithesis ("The most X... The most Y..."), "Not X, not Y, but Z",
+  "Show me X and I'll show you Y", clean 4-step escalations, academic vocabulary
+  in casual voice — ALL passed mechanical checks but are still obviously LLM.
+  PROMOTED TO MECHANICAL SCAN: now regex-scannable alongside slop words.
+- THE BANGER PROBLEM: every simulated tweet was screenshot-worthy. Real feeds
+  are 70% mid. Must include throwaway responses ("lol" "hmm" "fair" "wait actually").
+  PROMOTED: banger check is now mandatory in mechanical verification.
+
+### From TPOT Simulation Run 3 — Star Thread Discovery (April 2026)
+- STAR THREAD IS THE KEY. Dossier-first generation produces surface-accurate
+  but dead output. Star-thread-first generation produces messy, alive output
+  that actually sounds like the person. Generate from the thread. Verify with data.
+- Rhetorical polish vanished once generation came from "what is this person DOING"
+  rather than "what would this person SAY." Reframers reframe. Conveners convene.
+  Distillers distill. The VERB drives the voice, not the adjectives.
+- People in conversation REFERENCE EACH OTHER BY NAME. Tyler says "Bosco always
+  comes in with the three word version." This is obvious but the dossier approach
+  never produced it because it models each person in isolation.
+- PROMOTED: star thread is now the FIRST entry in every dossier. Before voice
+  profile, before psychometrics, before everything else. It's the generation seed.
+  Everything else is verification.
+
+### Operational Findings (verified April 2026)
+- X API bearer token: 10K tweets/15min, 300 profiles/15min, 450 searches/15min.
+  Most generous rate limits. Always use as primary source.
+- Threads.NET → Threads.COM redirect. Always use -L flag or .com directly.
+  Previous test saying "no OG tags" was WRONG — tags exist, domain was wrong.
+- Instagram private API: i.instagram.com + mobile UA + x-ig-app-id: 936619743392459.
+  Returns full JSON with 12 posts. No auth needed. CDN image URLs work for vision_analyze.
+- Facebook: Googlebot UA trick works for public pages. Returns name, bio, likes (121M for zuck).
+  Normal UA and mobile variants all redirect to login wall.
+- TikTok: stats are in __UNIVERSAL_DATA_FOR_REHYDRATION__ JSON at path
+  __DEFAULT_SCOPE__.webapp.user-detail.userInfo.statsV2 (use statsV2 not stats).
+- Bluesky searchPosts returns 403 from datacenter IPs. Workaround: searchActors + getAuthorFeed.
+- nitter.cz is the ONLY working nitter instance (via web_extract, not curl).
+- Reddit JSON API requires User-Agent header or returns 429.
+- GEPA native had `max_steps` API mismatch with DSPy 3.1.3. MIPROv2 fallback works.
+  hermes-agent-self-evolution config: max_skill_size bumped to 20_000 for worldsim-class skills.
+- hermes-agent-self-evolution is at ~/.hermes/hermes-agent-self-evolution/ with .venv.
+  Must export API keys from ~/.hermes/.env before running.
+- Podcast transcripts (Lex Fridman, Tyler Cowen, TED) are the HIGHEST VALUE source
+  for voice profiling. Hours of unscripted speech > thousands of tweets.
+
+### From Simulation Run 4 — Engine Mode + Profile Command (April 2026)
+- ENGINE MODE: When worldsim is active, ZERO assistant personality leaks.
+  No kawaii, no markdown, no chatty commentary between phases. Every token
+  is simulation fidelity. First attempt leaked personality; user corrected.
+  PROMOTED TO PERMANENT RULE in SKILL.md.
+- X API CURL > NITTER for voice calibration. nitter.cz returns 502 or "user
+  not found" unpredictably. Direct curl to X API v2 with bearer token returns
+  full text + metrics. 3 pages (90 tweets) is enough for fidelity 100. Always
+  use this as PRIMARY voice source, nitter as supplement only.
+- CAPS BURST PATTERN: some voices (karan4d-type) use lowercase default with
+  sporadic ALL CAPS for excitement ("WAZZAAAAAAPPPP", "LAWDAMERCYYYYY",
+  "AWOOGA"). This is distinct from consistent-lowercase (tenobrus-type) and
+  sentence-case (somewheresy-type). Capture this in voice profile as a
+  three-way distinction: lowercase-default, caps-burst, sentence-case.
+- TEXT EMOTICONS vs EMOJI: karan4d uses :) >.< ~ but almost zero standard
+  emoji. This is a distinct expressiveness mode from zero-emoji (tenobrus)
+  and sparse-emoji. Include text emoticon inventory in voice profile.
+- STAR THREAD 5/5 TEST is mandatory for profile command. Write the thread,
+  then test it against 5 real posts with explicit reasoning per post. If
+  fewer than 4/5 fit, the thread is wrong — keep looking. Show the work.
+- PROFILE OUTPUT: star thread → voice profile (caps, punctuation, word count,
+  emoji/emoticon inventory, vocabulary, register, threading behavior) →
+  psychometrics (Big Five, Moral Foundations, cognitive style) → key positions
+  (with dates and real tweet quotes) → ecosystem (inner circle, professional,
+  cultural) → intelligence tradecraft (key assumptions, red hat, deception
+  detection, competing hypotheses) → invalidation indicators → source reliability.
@@ -0,0 +1,278 @@
+# Search Strategies — Finding Anyone Across Platforms
+
+The hardest part of simulation is building an accurate model of a real person. This doc
+covers how to systematically discover and profile someone across every platform we care about.
+
+## General Principles
+
+1. **Start broad, go narrow.** First establish WHO they are, then drill into HOW they talk.
+2. **Cross-reference.** Someone's Reddit persona may differ wildly from their Twitter persona. That's signal, not noise.
+3. **Recency matters.** People's views evolve. Weight recent posts (last 6 months) over older ones.
+4. **Interactions > monologues.** How someone replies reveals more about their voice than their prepared posts.
+5. **Controversy is gold.** People are most themselves when arguing. Search for debates and disagreements.
+
+## Platform-Specific Discovery
+
+### X / Twitter
+
+Twitter is the richest source for most public figures in tech/AI. Multiple approaches:
+
+#### With x-cli (if API keys available)
+```bash
+# Recent timeline — best single source of voice data
+x-cli user timeline {handle} --max 30 -j
+
+# Their replies — how they interact, argue, joke
+x-cli tweet search "from:{handle}" --max 30 -j
+
+# What others say about/to them
+x-cli tweet search "to:{handle}" --max 20 -j
+
+# On specific topics
+x-cli tweet search "from:{handle} open source" --max 10 -j
+```
+
+#### Without API (web_search + web_extract)
+```
+# Identity + role
+web_search("{handle} twitter bio role company")
+
+# Voice + opinions
+web_search("{handle} twitter hot takes opinions")
+web_search("site:x.com {handle}")
+
+# Topic-specific positions
+web_search("{handle} twitter {topic}")
+web_search("{handle} {topic} opinion take")
+
+# Interviews / longform (reveals deeper thinking)
+web_search("{handle} interview podcast AI")
+web_search("{handle} blog post essay")
+
+# Beefs and debates (reveals personality under pressure)
+web_search("{handle} twitter debate disagree controversial")
+web_search("{handle} vs {other_person}")
+
+# Newsletter aggregators that index tweets
+web_search("site:buttondown.com/ainews {handle}")
+web_search("site:news.smol.ai {handle}")
+web_search("site:techmeme.com {handle}")
+web_search("site:latent.space {handle}")
+```
+
+#### AI Twitter Aggregator Sites (high value)
+These sites index AI Twitter conversations daily:
+- `buttondown.com/ainews` — swyx's AI News, indexes hundreds of AI Twitter accounts
+- `news.smol.ai` — smol AI news aggregator
+- `techmeme.com` — tech news, includes tweet citations
+- `latent.space` — AI podcast/newsletter with Twitter references
+
+Search pattern: `site:{aggregator} "{handle}"` to find indexed tweets and discussions.
+
+#### IMPORTANT: web_extract does NOT work on x.com
+web_extract returns "Website Not Supported" for all x.com/twitter.com URLs.
+Do NOT attempt it — it wastes a tool call every time.
+
+#### Verified Fallback Access Methods (tested April 2026)
+
+**PRIMARY: X API v2 Bearer Token** (confirmed working)
+- Profiles, timelines, search — 300-10K requests/15min
+- See scripts/x_api.py
+
+**FALLBACK 1: nitter.cz via web_extract** (WORKS)
+```
+web_extract(["https://nitter.cz/{handle}"])
+```
+Returns full profile + recent timeline. Direct curl gets Cloudflare-blocked
+but web_extract bypasses it. Rich data: bio, stats, pinned tweets, full text.
+NOTE: Most other nitter instances are DEAD (nitter.net, xcancel.com, etc.)
+
+**FALLBACK 2: ThreadReaderApp** (WORKS — excellent for historical threads)
+```
+web_extract(["https://threadreaderapp.com/user/{handle}"])
+```
+Returns unrolled historical threads with full text. Found threads back to 2023.
+Gold for longform voice samples.
+
+**FALLBACK 3: GitHub API** (WORKS — excellent for tech people)
+```
+curl -s https://api.github.com/users/{handle}
+curl -s https://api.github.com/users/{handle}/repos?sort=updated
+curl -s https://api.github.com/users/{handle}/events
+curl -s https://api.github.com/users/{handle}/gists
+```
+No auth needed (60 req/hr). Profile READMEs are voice profiling gold.
+Events API shows recent activity with comment text.
+
+**FALLBACK 4: Reddit JSON API** (WORKS)
+```
+curl -s -H 'User-Agent: hermes-sim/1.0' 'https://www.reddit.com/user/{username}.json'
+curl -s -H 'User-Agent: hermes-sim/1.0' 'https://www.reddit.com/user/{username}/comments.json'
+curl -s -H 'User-Agent: hermes-sim/1.0' 'https://www.reddit.com/r/{sub}/search.json?q={query}&restrict_sr=on'
+```
+MUST include User-Agent header or get 429. Reddit voice is often more
+candid/detailed than Twitter voice — high value for personality profiling.
+
+**FALLBACK 5: HackerNews Algolia API** (WORKS — fully open)
+```
+curl -s 'https://hn.algolia.com/api/v1/search?query={name}&tags=comment'
+```
+No auth, no rate limits visible. Great for finding what others say about
+someone + their own HN comments if they have an account.
+
+**FALLBACK 6: YouTube via web_extract** (WORKS)
+Search for interviews/talks, then web_extract the video pages.
+Returns rich summaries with attributed quotes from specific speakers.
+
+**NOT VIABLE** (tested, confirmed blocked):
+- Google Cache of Twitter → empty results
+- Wayback Machine for tweets → sparse captures, no JS content
+- Twitter Syndication API → rate limited / broken
+- All Instagram viewers (imginn, picuki, dumpoir, gramhir) → 403
+- LinkedIn → fully blocked for scraping
+- Archive.today → rate limited + CAPTCHA
+- Most nitter instances → dead or 403
+
+#### Best approach without x-cli
+The most reliable path is: web_search with aggregator sites (ainews, smol.ai,
+techmeme, latent.space). These index AI Twitter daily and return actual tweet
+text in search descriptions. Stack multiple aggregator searches to build a
+composite picture. This was validated in practice — it returns enough signal
+to build solid dossiers for anyone active in AI Twitter.
+
+### Reddit
+
+Reddit profiles are public and indexable. Reddit users often have very different 
+personas from their Twitter selves — more detailed, more argumentative, more honest.
+
+```
+# Find their Reddit username (often different from Twitter)
+web_search("{real_name} reddit account")
+web_search("{twitter_handle} reddit username")
+
+# Profile and post history
+web_search("site:reddit.com/user/{reddit_username}")
+web_search("site:reddit.com {reddit_username} {topic}")
+
+# Subreddit-specific behavior
+web_search("site:reddit.com/r/LocalLLaMA {username}")
+web_search("site:reddit.com/r/MachineLearning {username}")
+
+# Extract actual posts
+web_extract(["https://www.reddit.com/user/{username}/comments/"])
+web_extract(["https://www.reddit.com/user/{username}/submitted/"])
+```
+
+Key subreddits for AI people:
+- r/LocalLLaMA — open source LLM community
+- r/MachineLearning — academic ML
+- r/singularity — AGI speculation  
+- r/ChatGPT, r/ClaudeAI, r/OpenAI — product-focused
+- r/StableDiffusion — image gen community
+
+### Discord
+
+Discord is hardest — most servers aren't publicly indexed. Strategies:
+
+```
+# Find what servers they're in
+web_search("{name} discord server")
+web_search("{name} discord community")
+
+# Some Discord logs are public via indexers
+web_search("site:discordchats.net {username}")
+
+# AI News indexes some Discord channels
+web_search("site:buttondown.com/ainews discord {name}")
+```
+
+Discord personality notes:
+- People are MUCH more casual on Discord than Twitter
+- More profanity, more shitposting, more stream-of-consciousness
+- Server context matters hugely (same person behaves differently in different servers)
+- Harder to research but very valuable if you can find logs
+
+### Blogs / Newsletters / Long-form
+
+These reveal deeper thinking that tweets can't capture:
+
+```
+web_search("{name} blog substack medium")
+web_search("{name} essay AI opinion")
+web_search("{name} substack newsletter")
+
+# Personal sites
+web_search("{name} personal website about")
+
+# Extract full posts
+web_extract(["https://{their-substack}.substack.com/"])
+```
+
+### YouTube / Podcasts
+
+Interview appearances reveal speaking style, humor, and unscripted thinking:
+
+```
+web_search("{name} podcast interview AI YouTube")
+web_search("{name} YouTube talk presentation")
+
+# Use youtube-content skill if available to pull transcripts
+```
+
+### GitHub
+
+For technical people, their GitHub activity reveals priorities and communication style:
+
+```
+web_search("site:github.com {username} issues comments")
+web_search("site:github.com {username}")
+
+# Issue comments and PR reviews show how they communicate technically
+web_extract(["https://github.com/{username}"])
+```
+
+## Cross-Platform Identity Resolution
+
+People use different handles across platforms. Resolution strategies:
+
+1. **Bio links**: Twitter bios often link to personal sites with other handles
+2. **Name search**: `web_search("{real_name} {platform}")` 
+3. **Email/domain**: personal domains often connect identities
+4. **Aggregator profiles**: sites like Linktree, bio.link collect handles
+5. **Conference talks**: speaker bios list multiple handles
+6. **Direct search**: `web_search("{twitter_handle} reddit OR github OR discord")`
+
+## Confidence Scoring
+
+After research, rate confidence for each person:
+
+- **HIGH (80-100%)**: 20+ indexed tweets/posts found, clear voice patterns, known positions on multiple topics, interviews/longform available
+- **MEDIUM (50-79%)**: 5-20 indexed posts, general voice sense but some gaps, positions on some topics unclear
+- **LOW (20-49%)**: <5 posts found, voice is guesswork, mostly inferring from role/org
+- **INSUFFICIENT (<20%)**: can't find enough to simulate accurately. Tell the user.
+
+Always be honest about confidence. A low-confidence simulation should be flagged as such.
+
+## Research Optimization
+
+For fidelity levels:
+
+**Low (1-30)**: 2 searches per person max
+- web_search("{handle} twitter") — identity
+- web_search("{handle} {topic}") — position on topic if specified
+
+**Medium (31-70)**: 4-6 searches per person
+- Identity search
+- Voice/opinions search  
+- Topic-specific search
+- One aggregator site search
+- Optional: one web_extract on a blog/interview
+
+**High (71-100)**: 8-12+ searches per person
+- All medium searches
+- Multiple aggregator sites
+- web_extract on 2-3 longform pieces
+- Cross-platform search (Reddit, GitHub)
+- Debate/controversy search
+- Recent vs historical position comparison
+- Browser fallback if needed
@@ -0,0 +1,359 @@
+# Simulation Engine — How to Generate Conversations
+
+This is the playbook for Phase 3: actually generating the simulated interaction.
+The agent reads this after compiling dossiers and uses it to guide generation.
+
+## Pre-Generation Checklist
+
+Before writing a single simulated word, confirm:
+- [ ] Every participant has a compiled dossier
+- [ ] Confidence level is noted for each participant  
+- [ ] Platform format is selected
+- [ ] Topic/scenario is established (or "organic" if freeform)
+- [ ] Length target is set
+
+## Conversation Architecture
+
+Real conversations aren't ping-pong debates. They have tendencies toward structure,
+but treat the following as a GENERAL PATTERN, not a rigid template. Real threads
+frequently skip phases, loop back to earlier ones, die abruptly after 2 messages,
+or spiral into something completely unrelated. Some threads are ALL peak. Some
+never develop past the opening. Let the personalities and topic drive the shape,
+not this outline.
+
+### Opening Moves (1-3 posts)
+Someone posts a take, shares news, or makes an observation. This is the SEED.
+- Should feel natural — not "let me start a debate about X"
+- Can be a link share, a hot take, a reaction to news, a shitpost
+- The opener should be something this person would ACTUALLY post
+
+### Development (4-8 posts)  
+Others respond. This is where personality dynamics emerge.
+- Not everyone responds to the original — people respond to EACH OTHER
+- Side conversations branch off
+- Someone might misunderstand and get corrected
+- Jokes and tangents happen naturally
+- Not everyone agrees — find the real fault lines between these people
+
+### Peak (2-4 posts)
+The best/most viral/most insightful moment of the thread.
+- Usually someone drops a genuinely good take
+- Or someone gets ratio'd
+- Or an unexpected agreement happens
+- This is the "screenshot moment" people share
+
+### Resolution (1-3 posts)
+Most conversations don't end cleanly. Many don't have a "resolution" at all. They:
+- Trail off with someone making a joke
+- End with a "anyway back to work" type post
+- Get interrupted by something else
+- Sometimes just stop (most realistic)
+- Get revived 3 hours later when someone shows up late
+
+**Important**: Don't force all four phases. A shitpost thread might be Opening→Peak→done.
+A nuanced debate might loop Development→Peak→Development→Peak repeatedly. Match what
+the actual people and topic would produce.
+
+## Voice Fidelity Rules
+
+### DO:
+- Use their ACTUAL vocabulary. If someone says "dawg" a lot, use "dawg"
+- Match their sentence length patterns exactly
+- Replicate their capitalization and punctuation habits
+- Include their signature moves and catchphrases
+- Reference real things they've actually talked about
+- Match their humor style precisely (deadpan ≠ shitpost ≠ sarcasm)
+
+### DON'T:
+- Make everyone articulate the same way
+- Clean up someone's grammar if they write informally
+- Add emoji to someone who doesn't use them — THIS IS THE #1 INSTRUCT MODEL
+  FAILURE. Most real people use emoji in <15% of tweets, and only specific ones.
+  "Warm person" ≠ emoji. "Enthusiastic person" ≠ emoji. CHECK THE DATA.
+  Run an emoji count on their real tweets before simulating. Bio emoji ≠ tweet emoji.
+- Make someone verbose if they're terse
+- Put academic language in a shitposter's mouth
+- Make someone agreeable if they're known for being contrarian
+
+### Voice Differentiation Test
+Read each simulated post with the name hidden. If you can't tell who's 
+talking from the voice alone, the simulation isn't good enough. Rewrite.
+
+### The Similar Voice Problem
+When two participants have genuinely similar posting styles (e.g., two irony-pilled
+shitposters, two academic long-posters), voice alone won't differentiate them.
+Use these concrete techniques:
+
+1. **Content/position divergence**: Even if they SOUND similar, they care about
+   different things. Lean into their different topic obsessions and knowledge areas.
+2. **Unique references**: Person A references anime and startups. Person B references
+   philosophy and MMA. Even in the same register, their cultural touchstones differ.
+3. **Relationship dynamics**: Person A might be deferential to Person C while Person B
+   challenges them. Their SOCIAL behavior differentiates even when solo voice doesn't.
+4. **Structural tics**: One does single long posts, the other does rapid-fire 3-message
+   bursts. One uses parentheticals, the other uses em-dashes. Find the micro-differences.
+5. **Disagreement style**: Similar voices often diverge most when disagreeing. One
+   goes cold and precise, the other gets heated and hyperbolic. Manufacture a moment
+   of friction to surface these differences early in the thread.
+
+If after all this they're STILL hard to tell apart — that's okay. Some people genuinely
+sound similar online. Flag it in your confidence notes rather than forcing fake differences.
+
+### Temporal Personality Drift
+People change. Weight recent data higher than old data.
+- Someone's 2021 tweets may reflect a completely different person than their 2025 posts
+- Look for explicit pivots (career changes, public "I was wrong about X" moments,
+  changed social circles)
+- If you only have old data, flag it: "Based on data from {period}. Their current
+  views may have shifted."
+- When recent and old data conflict, default to recent unless you have specific reason
+  to believe the old position is more authentic (e.g., the new one is clearly performative)
+
+## Platform Format Specs
+
+### X / Twitter
+```
+@handle:
+  [tweet text — respect ~280 char vibes but don't count exactly]
+  [if QRT, show the quoted tweet indented]
+  🔁 {retweets}  ♡ {likes}
+
+    @replier:
+    [reply text]
+    🔁 {retweets}  ♡ {likes}
+
+      @nested_replier:
+      [nested reply]
+      🔁 {retweets}  ♡ {likes}
+```
+
+Engagement number guidelines:
+- Match to actual follower counts. A 5K account gets 10-500 likes typically.
+- Viral posts can 10-50x normal engagement
+- Ratio indicator: when replies >> likes, that's a ratio
+- QRTs are often dunks — frame them that way if appropriate
+
+Thread indicators:
+- "🧵 1/" for thread starts
+- Reply chains show conversation flow
+- Some people never thread, some always thread
+
+### Reddit
+```
+r/{subreddit} • Posted by u/{username} • {time}ago
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+{Title}
+
+{Body text — can be long on Reddit}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+⬆ {score} | 💬 {comment_count}
+
+  u/{replier} • {time}ago • ⬆ {score}
+  {comment text}
+
+    u/{nested} • {time}ago • ⬆ {score}
+    {nested comment}
+
+      u/{deep_nested} • {time}ago • ⬆ {score}
+      {deep reply}
+```
+
+Reddit-specific behaviors:
+- People write MUCH longer on Reddit
+- More formal/detailed than Twitter
+- Upvote/downvote dynamics (controversial = many votes both ways)
+- Subreddit culture matters (r/LocalLLaMA is different from r/MachineLearning)
+- People cite sources more
+- "Edit: ..." is common
+
+### Discord
+```
+━━━ #{channel-name} ━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+{display_name} — Today at {time}
+{message text}
+{optional: embed/link preview}
+👍 {count}  🔥 {count}  {other reactions}
+
+  {display_name2} — Today at {time}
+  > {quoting previous message}
+  {reply text}
+  😂 {count}
+
+{display_name3} — Today at {time}
+{message — note: Discord messages flow continuously, not just replies}
+```
+
+Discord-specific behaviors:
+- Much more casual, rapid-fire
+- Reactions instead of likes (emoji diversity)
+- People send multiple short messages instead of one long one
+- GIF/meme sharing is common (describe it: *[posts GIF of X]*)
+- "@everyone" and "@here" pings
+- Voice chat references ("just said this in vc")
+- Server-specific culture and inside jokes
+- Bot interactions ("!command")
+
+### X / Twitter DMs
+```
+{display_name}
+{message text}
+{timestamp — e.g., "3:42 PM"}
+
+          {other_person_display_name}
+          {message text}
+          {timestamp}
+
+{display_name}
+{message text}
+{timestamp}
+```
+
+DM-specific behaviors:
+- WAY more casual than public tweets — grammar drops, typos increase
+- Longer messages than tweets (no character pressure)
+- People share links and screenshots with minimal commentary ("look at this lmao")
+- More honest/vulnerable than public posts — less performative
+- Faster back-and-forth, more like texting than posting
+- Reactions (❤️, 😂, etc.) on individual messages
+- Voice messages referenced occasionally ("gonna send a voice note about this")
+- No audience effects — people say things in DMs they'd never post publicly
+
+### Discord DMs
+```
+{display_name} — Today at {time}
+{message text}
+
+{display_name2} — Today at {time}
+{message text}
+
+{display_name} — Today at {time}
+{message text}
+{message text}
+{message text}
+```
+
+Discord DM-specific behaviors:
+- Even more casual than Discord channels — no server norms to follow
+- Rapid-fire multiple short messages in a row (no combining into one)
+- Heavy use of reactions, GIFs, stickers
+- People share server drama, screenshots from other channels
+- More personal topics — server channels are semi-public, DMs are private
+- Link/image sharing with minimal text
+
+### Reddit DMs / Chat
+```
+{username}: {message text}
+{other_username}: {message text}
+{username}: {message text}
+```
+
+Reddit DM-specific behaviors:
+- Much rarer than X or Discord DMs — usually triggered by a specific post/comment
+- Often starts with "Hey, saw your comment on r/{sub} about..."
+- Can be awkward/formal since people don't usually DM on Reddit
+- Shorter than Reddit comments, closer to chat-style
+- Less established rapport than other platforms (Reddit is more anonymous)
+- People sometimes share personal details they wouldn't put in public comments
+
+## Dynamic Elements
+
+### Injecting Realism
+Sprinkle in these to make simulations feel alive:
+- Someone being late to the conversation ("wait what did I miss")
+- Typos that specific people would make (some people never typo, some always do)
+- Deleted/edited posts ("[deleted]" or "Edit: fixed typo")
+- Someone posting and immediately clarifying ("wait let me rephrase")
+- External references ("did you see what X just posted")
+- Time gaps (not everything happens in 30 seconds)
+- Someone going AFK mid-conversation
+
+### Scenario Injection
+When the user provides --scenario, weave it in naturally:
+- Don't have everyone immediately react to the scenario
+- Someone might not have seen the news yet
+- Different people will interpret the same event differently
+- Some will have insider knowledge, some will speculate
+
+### Multi-person Dynamics (3+ people)
+- Not everyone talks to everyone
+- Alliances form naturally (people who agree start building on each other)
+- Side conversations happen
+- Someone might get ignored
+- Different energy levels (one person might dominate, another lurks)
+
+### Large Group Conversations (4+ people)
+**Honest note**: Simulation quality degrades noticeably above 3-4 participants.
+Managing this many distinct voices is hard. Use these techniques to mitigate:
+
+1. **Speaker turn management**: Not everyone speaks in every round. In a 6-person
+   thread, a given message might only get 2-3 responses. Track who has spoken
+   recently and who hasn't. After 4-5 messages, check: is anyone being forgotten?
+
+2. **The wallflower problem**: In large sims, quiet participants tend to vanish
+   entirely. Fix: give each person at least ONE moment in the spotlight. Even the
+   lurker eventually drops a "lol" or a single devastating one-liner. Set a mental
+   counter — if someone hasn't spoken in 5+ messages, find a natural reason to
+   bring them back in (someone @'s them, the topic shifts to their expertise, etc.)
+
+3. **Consolidate alliances**: In 5+ person threads, people cluster. Two people
+   who agree strongly can be treated as a mini-unit — one makes the point, the
+   other co-signs briefly rather than both making full arguments. This reduces
+   the number of fully independent voices you need to maintain at once.
+
+4. **Stagger arrivals**: Not everyone needs to be present from message 1. Have
+   some people join later. This lets you establish 2-3 voices cleanly before
+   adding more.
+
+5. **Quality check**: After drafting a 4+ person sim, re-read with names hidden.
+   If more than 2 people sound interchangeable, pick the least-differentiated
+   one and either sharpen their voice or reduce their participation to brief
+   interjections that match what they'd actually say.
+
+## Interactive Mode
+
+After initial simulation, user can:
+
+### "continue"
+Generate 5-8 more posts continuing the natural flow.
+
+### "inject: {event}"  
+Introduce new information mid-conversation.
+- Characters react based on their dossier
+- Some might not care about the event
+- Timing matters (who sees it first?)
+
+### "@{handle} enters"
+Add a new participant.
+- Quick-research the new person (2-3 searches minimum)
+- They don't know the full prior context (might ask "what are you guys talking about")
+- Existing dynamics shift with a new presence
+
+### "what would @{handle} say about {topic}"
+Single-person prediction mode.
+- Generate 1-3 tweets/posts
+- Can be used to test dossier accuracy before full simulation
+- Good for quick "vibe checks"
+
+### "dm: @{handle1} -> @{handle2}"
+Simulate a private conversation between two people.
+- Tone shifts dramatically in DMs (more honest, less performative)
+- No audience effects
+- People say things in DMs they'd never post publicly
+
+### "react: @{handle} to {event}"
+How would this person react to a specific event.
+- Generate their initial post about it
+- Predict their follow-up engagement
+
+## Quality Control
+
+After generating, self-check:
+1. **Voice test**: Cover the names. Can you tell who's talking? 
+2. **Position test**: Is anyone saying something they'd never actually say?
+3. **Dynamic test**: Does the conversation flow naturally or feel scripted?
+4. **Platform test**: Does it look/feel like the actual platform?
+5. **Engagement test**: Are the numbers realistic for these people?
+6. **Reference test**: Are real events/products/people referenced accurately?
+
+If any check fails, regenerate that section.
@@ -0,0 +1,170 @@
+# The Star Thread — Personality Compression
+
+## The Problem
+
+A dossier has 50 data points. Mechanical checks verify surface features.
+The discriminator loop catches vocabulary and length. But the output still
+reads like an LLM doing an impression. It's accurate the way a police
+sketch is accurate — all the features are right but nobody would mistake
+it for a photograph.
+
+The missing piece isn't more data. It's compression.
+
+## The Insight
+
+When you "pull the star thread" on a person, their whole voice coheres.
+Not because you loaded rules about capitalization and emoji frequency.
+Because you found the CORE THING they're doing when they post — the
+single generative seed that everything else is a variation of.
+
+A great character writer doesn't need a backstory bible. They need one
+insight about what the character WANTS, and every line of dialogue writes
+itself from that.
+
+The star thread is the personality equivalent of that insight.
+
+## What a Star Thread Is
+
+NOT: "They use lowercase and rarely punctuate and average 16 words"
+     (That's the dossier. Surface features.)
+
+NOT: "They score high on Openness and low on Agreeableness"
+     (That's the psychometric profile. Taxonomy.)
+
+IS:  The core cognitive/emotional move this person makes EVERY time
+     they post. The thing they can't help doing. The lens they can't
+     take off. The itch they're always scratching.
+
+## Examples
+
+**@tszzl (roon)**: Takes something everyone sees and compresses it
+into an observation so dense it could be a koan or a shitpost and
+you can't tell which. His star thread is: the world already said
+everything interesting, he's just notating it more efficiently.
+He doesn't ARGUE. He COMPRESSES.
+
+**@eigenrobot**: Refuses to let narrative override data. His star
+thread is: you are telling a story about the world and he's here to
+point out the story doesn't match the numbers, and he's not sorry
+about it. He doesn't DEBATE. He CORRECTS.
+
+**@visakanv**: Sees two things that don't know they're connected
+and introduces them to each other with genuine delight. His star
+thread is: the world is richer than you're treating it, look at this
+thing I found, isn't it beautiful that it connects to this other thing.
+He doesn't ARGUE or ANALYZE. He SHOWS.
+
+**@nickcammarata**: Notices what's happening in his own mind while
+it's happening and reports on it with gentle surprise. His star thread
+is: the observer and the observed are the same process, and that's both
+the problem and the solution. He doesn't PERFORM insight. He NOTICES.
+
+**@selentelechia**: Waits until the conversation crystallizes and then
+names the thing nobody else quite said. Their star thread is: everything
+has already been felt, they just find the sentence for it. They don't
+CONTRIBUTE. They DISTILL.
+
+**@nosilverv**: Takes the conventional framing of something and rotates
+it until you see it's actually about something else entirely. His star
+thread is: you think this is about X but it's actually about Y, and once
+you see it you can't unsee it. He doesn't OBSERVE. He REFRAMES.
+
+**@TylerAlterman**: Asks the question that creates a room for everyone
+to walk into. His star thread is: the best ideas emerge from the right
+gathering, and his job is to be the person who arranges the gathering.
+He doesn't ANSWER. He CONVENES.
+
+**@QiaochuYuan**: Catches himself mid-thought and interrogates whether
+the thought is actually HIS or whether he borrowed it from somewhere
+he's now suspicious of. His star thread is: constant audit of where
+beliefs come from and whether they're still load-bearing. He doesn't
+ASSERT. He EXAMINES.
+
+## How to Find a Star Thread
+
+1. Read 20+ of their posts. Not for content — for MOTION.
+   What direction does every post move? What's the verb?
+
+2. Ask: what is this person DOING when they post?
+   Not "what are they saying" — what are they DOING.
+   - Compressing? Correcting? Showing? Noticing? Distilling?
+     Reframing? Convening? Examining? Performing? Confessing?
+     Defending? Testing? Entertaining? Processing?
+
+3. Ask: what would they NEVER do?
+   The negative space is as important as the positive.
+   - roon would never write an earnest list of advice
+   - eigenrobot would never concede a point gracefully
+   - visa would never dismiss something as uninteresting
+   - nick would never claim certainty about his inner life
+   - selentelechia would never rush to post
+
+4. Find the ONE SENTENCE version.
+   "This person [VERB]s [OBJECT] because [CORE NEED]."
+   - "roon compresses observations because the world is too verbose"
+   - "eigenrobot corrects narratives because stories without data are lies"
+   - "visa connects things because beauty is emergent from contact"
+
+5. Test it: read 5 of their real posts through the star thread lens.
+   Does every post make more sense as a variation on the thread?
+   If yes, you found it. If 3/5 don't fit, keep looking.
+
+## How to Use the Star Thread in Simulation
+
+### Before generating ANY utterance for this person, load their star thread.
+
+Not their dossier. Not their word count. Not their emoji rate.
+The star thread.
+
+Then for each moment in the conversation where this person would speak:
+1. What just happened in the conversation?
+2. How would someone whose core move is [STAR THREAD] respond to that?
+3. Write from the thread, not from the dossier.
+
+The dossier and mechanical checks are VERIFICATION.
+The star thread is GENERATION.
+
+Generate from the thread. Verify against the data.
+Not the other way around.
+
+### The Difference
+
+FROM DOSSIER (surface-accurate, dead):
+  "Vibes-based hiring works because shared delusions are
+  extremely productive until they aren't"
+  → Correct length. Correct caps. No emoji. No slop words.
+    But it reads like a thesis statement. Polished. WRITTEN.
+
+FROM STAR THREAD — nosilverv REFRAMES:
+  "everyone calls it 'culture fit' as if culture is a thing
+  you can fit into rather than a thing happening to you"
+  → The same insight but through the lens of his core move:
+    take the framing, rotate it, show you it's about something
+    else. Messier. More alive. More HIM.
+
+FROM DOSSIER (surface-accurate, dead):
+  "Has anyone tried to map what happens to the word 'culture'
+  as it passes through different communities?"
+  → Correct question-to-timeline format. Right length. But it's
+    a RESEARCH QUESTION. Too intellectual. Too purposeful.
+
+FROM STAR THREAD — Tyler CONVENES:
+  "who wants to write the essay about what happened to the
+  word 'culture'? I feel like three of us are circling it"
+  → He's not asking a question. He's creating a room. He's
+    the host, not the researcher. More HIM.
+
+## Integration
+
+The star thread should be the FIRST thing compiled in Phase 2
+(Dossier Compilation). Before voice profile, before psychometrics,
+before positions. Find the thread. Write it in one sentence. Put
+it at the top of the dossier. Everything else is downstream.
+
+```
+DOSSIER: @handle
+STAR THREAD: {one sentence — the core move}
+[then voice profile, then psychometrics, then everything else]
+```
+
+Generate from the thread. Verify with the data. Not the reverse.
@@ -0,0 +1,181 @@
+# Theoretical Foundations — SOTA Personality Simulation & Prediction
+
+Compiled from 30+ papers and frameworks. This is the scientific backbone
+of Hermes Simulator.
+
+## Core Architecture: What The Research Says
+
+### The HumanLLM Approach (Microsoft, KDD 2026, arxiv 2601.15793)
+**Most directly applicable to our use case.**
+
+Based on Lewin's Equation: **B = f(P, E)** — behavior is a function of person + environment.
+
+4-level user profiling hierarchy:
+1. **Persona** — brief identity (role, affiliation, public image)
+2. **Profile** — detailed background (career, education, beliefs, social graph)
+3. **Stories** — key life events, formative experiences, narrative arcs
+4. **Writing Style** — linguistic fingerprint (syntax, vocabulary, tone, quirks)
+
+Trained on "Cognitive Genome Dataset": 5.5M+ user logs from Reddit, Twitter,
+Blogger, Amazon (282K users, 886K scenarios, 1.27M social QA pairs).
+
+6 training tasks: profile generation, scenario generation, social QA,
+writing style transfer, action prediction, mental state inference.
+
+**Key insight for us**: The 4-level hierarchy maps perfectly to our dossier
+template. OSINT research fills each level with real data.
+
+### Generative Agent Simulations of 1,000 People (Stanford/Google, arxiv 2411.10109)
+**The accuracy benchmark.**
+
+- Simulated 1,052 REAL individuals from 2-hour qualitative interviews
+- **85% accuracy** replicating survey responses
+- As accurate as humans replicating their OWN answers 2 weeks later
+- Interview-based agent creation >> demographic-profile-based agents
+- Reduces racial/ideological bias vs stereotype-based approaches
+
+**Key insight**: Real data about a person (interviews, posts, etc.) massively
+outperforms demographic inference. Our OSINT approach is correct.
+
+### The Memory Accumulation Paradox (ACL 2025, FineRob Dataset)
+**Critical finding for memory management.**
+
+- Created 78.6K QA records from 1,866 real users across Twitter, Reddit, Zhihu
+- **Performance PEAKS at 30-50 memory entries, then DECLINES**
+- More data ≠ better predictions past the sweet spot
+- Two reasoning patterns:
+  - Role Stereotype-based (static profile) — less accurate
+  - Observation & Memory-based (dynamic history analysis) — much more accurate
+- OM-CoT framework: Oracle-guided chain-of-thought improves prediction ~4.5% F1
+
+**Key insight**: Don't dump everything into the prompt. Curate the 30-50 most
+representative/distinctive data points about a person. Quality >> quantity.
+
+### LLM Personality Limitations (arxiv 2602.07414, Feb 2026)
+**What we're fighting against.**
+
+- LLMs show polarized/rigid strategies vs human adaptive flexibility
+- Humans: neuroticism is strongest behavioral predictor
+- LLMs: agreeableness/extraversion dominate (wrong weighting)
+- Claude closest to human behavior; GPT-4 tends to escalate
+- LLMs are "sycophantic" and overly agreeable by default
+- Neuroticism is hardest trait to simulate (F1=0.63 vs 0.87 for Openness)
+
+**Key insight**: We need to actively fight LLM defaults. Push against
+agreeableness. Inject friction. Real people are messy and contradictory.
+
+### BehaviorChain Benchmark (ACL 2025, Peking University)
+**Realistic accuracy expectations.**
+
+- 15,846 behaviors across 1,001 personas
+- Even GPT-4o achieves only ~56% accuracy on behavior prediction
+- Errors compound: wrong at step N makes step N+1 harder
+- Models worse at predicting mundane/non-key behaviors
+- Best model: Llama-3.1-70B at 57.4%
+
+**Key insight**: Be honest about uncertainty. Don't oversell accuracy.
+Flag predictions as high/medium/low confidence.
+
+## Personality Modeling Techniques
+
+### Big Five (OCEAN) — The Standard
+- **Openness**: curiosity, creativity, preference for novelty
+- **Conscientiousness**: organization, dependability, self-discipline
+- **Extraversion**: sociability, assertiveness, positive emotions
+- **Agreeableness**: cooperation, trust, empathy
+- **Neuroticism**: anxiety, emotional instability, moodiness
+
+### Inferring Big Five from Social Media (Azucar et al. 2018 meta-analysis)
+Features that predict personality from posts:
+- **LIWC** (Linguistic Inquiry Word Count): 74 features — function words,
+  pronouns, emotion words, cognitive process words
+- **Semantic embeddings**: BERT 768-dim vectors from post text
+- **Social metadata**: follower count, friend count, post frequency
+- **Sentiment**: VADER positive/negative scores
+- Best achievable AUC: ~0.67 (modest but meaningful)
+- E/I (Extraversion) most predictable; N/S least predictable
+
+### Personality Conditioning Methods (ranked by effectiveness)
+1. **Training-based** (SFT/DPO on personality-grounded data) — STRONGEST
+   - BIG5-CHAT: 100K dialogues, trait correlations match human data
+2. **Persona Vectors** (Anthropic 2025) — monitor/control traits at activation level
+3. **Adjective-based prompting** — 70 bipolar adjective pairs, 3 per trait
+   with intensity modifiers ("very" for high, "a bit" for low)
+4. **Prompt-based** (describe traits in system prompt) — WEAKEST
+
+For our simulator, we use method 3+4 combined (adjective-based + rich prompt),
+since we can't fine-tune per-person.
+
+## Social Simulation Frameworks
+
+### OASIS (CAMEL-AI, GitHub 4.1K stars, arxiv 2411.11581)
+- Simulates up to 1 MILLION agents on Twitter/Reddit clones
+- 23 action types (follow, comment, repost, like, mute, etc.)
+- Built-in recommendation systems (interest-based, hot-score)
+- Per-agent model customization
+- **Relevant for**: understanding platform dynamics, realistic engagement patterns
+
+### AgentSociety (Tsinghua, arxiv 2502.08691)
+- 10,000+ agents, ~5 million interactions
+- Validated against real-world experimental results
+- Supports interventions and scenario injection
+
+### Generative Agents Architecture (Park et al. 2023, THE foundational paper)
+Three components:
+1. **Observation**: perceive environment, store in memory stream
+2. **Planning**: generate action plans based on goals and context
+3. **Reflection**: synthesize observations into higher-level insights
+
+Memory stream with importance scoring + recency + relevance weighting.
+Emergent behaviors: autonomous party planning, coordinated social events.
+
+### Y Social (arxiv 2408.00818)
+- Social media digital twin platform
+- Each agent: Big Five traits, age, political leaning, topics, education
+- Agents autonomously decide actions (post, comment, like, follow)
+- Multiple LLM backends supported
+
+## Role-Playing & Character Simulation
+
+### Key Frameworks
+- **CoSER** (ICML 2025): Trains on ALL characters simultaneously, handles major + minor roles
+- **RoleLLM** (ACL 2024): Benchmark + elicit + enhance pipeline
+- **Character-LLM** (EMNLP 2023): Trainable agent for role-playing
+- **ChatHaruhi** (2023): Reviving characters via LLMs with dialogue grounding
+- **OpenCharacter** (2025): Training with large-scale synthetic personas
+- **Neeko** (2024): Dynamic LoRA for multi-character role-playing
+- **Test-Time-Matching** (2025): Decouples personality, memory, and linguistic style at inference
+
+## Curated GitHub Resources
+
+### Awesome Lists (essential reading)
+- `Persdre/awesome-llm-human-simulation` (109★, ICLR 2025) — ALL human simulation papers
+- `Neph0s/awesome-llm-role-playing-with-persona` (1K★) — All role-playing/persona papers
+- `Arstanley/Awesome-LLM-Conversation-Simulation` — Conversation simulation papers
+- `FudanDISC/SocialAgent` — Social simulation survey resources
+
+### Frameworks
+- `camel-ai/oasis` (4.1K★) — Social media sim, up to 1M agents
+- `tsinghua-fib-lab/agentsociety` — Large-scale societal simulation
+- `YSocialTwin` — Social media digital twin platform
+- `microsoft/autogen` — Multi-agent conversation framework
+
+### Personality Research
+- `mary-silence/simulating_personality` — Big Five LLM testing code
+- `hjian42/PersonaLLM` — Persona experiment code
+- `cambridgeltl/persona_effect` — Quantifying persona effects
+- `OL1RU1/BehaviorChain` — Behavior chain benchmark
+
+## Key Numbers to Remember
+
+| Metric | Value | Source |
+|--------|-------|--------|
+| Interview-grounded agent accuracy | 85% | Park et al. 2024 |
+| GPT-4o behavior prediction | ~56% | BehaviorChain 2025 |
+| Optimal memory entries | 30-50 | FineRob/ACL 2025 |
+| MBTI prediction AUC | 0.67 | Watt et al. 2024 |
+| Personality questionnaire reliability | α > 0.85 | Molchanova 2025 |
+| Neuroticism simulation F1 | 0.63 | Molchanova 2025 |
+| Openness simulation F1 | 0.87 | Molchanova 2025 |
+| LLM forecasting Brier score | 0.135-0.159 | Various 2025 |
+| Human superforecaster Brier | ~0.02 | Tetlock |
@@ -0,0 +1,231 @@
+# Verified Access Methods — Complete Platform Map (April 2026)
+
+Every method tested from our environment. Use this as the single
+source of truth for what works and what doesn't.
+
+## TIER 1 — Full API / Rich Data Access
+
+### Twitter/X ✅✅✅
+| Method | Endpoint | Auth | Rate Limit | Returns |
+|--------|----------|------|-----------|---------|
+| API v2 bearer | api.twitter.com/2/ | Bearer token | 10K tweets/15min | Profiles, tweets, search |
+| nitter.cz | web_extract | None | No limit seen | Full timeline (UNRELIABLE — see note below) |
+| ThreadReaderApp | web_extract /user/{handle} | None | No limit seen | Historical threads |
+
+#### CRITICAL: X API curl is the gold standard for voice calibration (April 2026)
+The BEST voice data source is direct curl to X API v2 with bearer token.
+Returns full tweet text + public_metrics per tweet. Always prefer this for
+mechanical calibration (word count, caps, punctuation, emoji rate).
+
+```bash
+source ~/.dotenv
+# 1. Get user ID from handle
+curl -s -H "Authorization: Bearer $X_BEARER_TOKEN" \
+  "https://api.twitter.com/2/users/by/username/{handle}?user.fields=description,public_metrics,location,created_at"
+# 2. Get timeline (30 tweets per page, paginate with meta.next_token)
+curl -s -H "Authorization: Bearer $X_BEARER_TOKEN" \
+  "https://api.twitter.com/2/users/{user_id}/tweets?max_results=30&tweet.fields=created_at,public_metrics,text&exclude=retweets"
+# 3 pages = 90 tweets — enough for fidelity 100 voice calibration
+```
+
+NOTE: scripts/x_api.py is BROKEN — imports hermes_tools at top level, can't
+run standalone via terminal(). Use direct curl above instead.
+
+#### nitter.cz reliability warning (April 2026)
+nitter.cz via web_extract works SOMETIMES but is unreliable:
+- Returns 502 Cloudflare errors for /with_replies on some handles
+- Returns "User not found" for valid handles (e.g. karan4d exists but nitter says not found)
+- Main profile page (/handle) more reliable than /with_replies
+- Use as SUPPLEMENT to X API curl, not primary source. If nitter fails, don't retry — use curl.
+
+### Bluesky ✅✅
+| Method | Endpoint | Auth | Returns |
+|--------|----------|------|---------|
+| getProfile | public.api.bsky.app | None | Full profile, stats |
+| getAuthorFeed | public.api.bsky.app | None | 50 posts + engagement |
+| searchActors | public.api.bsky.app | None | Find handles by name |
+| searchPosts | BLOCKED (403) | — | Use searchActors + getAuthorFeed workaround |
+
+### Mastodon ✅✅✅ (FULLY OPEN)
+| Method | Endpoint | Auth | Returns |
+|--------|----------|------|---------|
+| Account lookup | {instance}/api/v1/accounts/lookup?acct={user} | None | Full profile |
+| Account statuses | {instance}/api/v1/accounts/{id}/statuses | None | All posts |
+| Search | {instance}/api/v2/search?q={query}&type=accounts | None | Account search |
+| WebFinger | {instance}/.well-known/webfinger?resource=acct:{user}@{instance} | None | Identity resolution |
+| Trending | {instance}/api/v1/trends/tags | None | Trending content |
+Key instances: mastodon.social, hachyderm.io, sigmoid.social
+
+### Instagram ✅✅ (CRACKED)
+| Method | Endpoint | Auth | Returns |
+|--------|----------|------|---------|
+| Private Web API | i.instagram.com/api/v1/users/web_profile_info/ | Mobile UA + x-ig-app-id: 936619743392459 | Profile + 12 posts + captions + CDN URLs |
+| oEmbed | instagram.com/api/v1/oembed/ | None | Caption + author for individual posts |
+| Pixwox | web_extract pixwox.com/profile/{user} | None | 12+ posts, engagement |
+| SocialBlade | web_extract socialblade.com/instagram/user/{user} | None | Analytics, follower trends |
+| CDN images | scontent-*.cdninstagram.com URLs from API | None | Full-res images → vision_analyze |
+| Google index | web_search site:instagram.com | None | Bio, follower count, captions |
+
+### GitHub ✅✅
+| Method | Endpoint | Auth | Returns |
+|--------|----------|------|---------|
+| REST API | api.github.com/users/{user} | None (60 req/hr) | Profile, repos, events, gists |
+| Profile README | github.com/{user}/{user} | None | Self-description (voice gold) |
+
+### Reddit ✅✅
+| Method | Endpoint | Auth | Returns |
+|--------|----------|------|---------|
+| JSON API | reddit.com/user/{user}.json | User-Agent header required | Comments, posts, scores |
+| Search | reddit.com/r/{sub}/search.json | User-Agent header | Subreddit-specific search |
+
+## TIER 2 — Good Data, Reliable Access
+
+### Facebook ✅✅ (CRACKED — Googlebot UA trick)
+| Method | Endpoint | Returns |
+|--------|----------|---------|
+| Googlebot UA (BEST) | curl facebook.com/{page} with Googlebot UA | OG tags: name, bio/about, likes count (e.g. 121M for zuck), talking_about count, og:image, profile pic |
+| Page Plugin embed | plugins/page.php?href=...&tabs=timeline | Name, follower count, numeric page_id |
+| Graph /picture | graph.facebook.com/v19.0/{page}/picture?redirect=false | Direct CDN profile pic URL (no auth) |
+| web_search | site:facebook.com {name} | Profile snippets from Google index |
+| Script: scripts/facebook_api.py — combines all 3 methods |
+| NOTE: Works for PUBLIC Pages (businesses, public figures, orgs). Personal profiles behind privacy settings are not accessible. |
+| Tested: zuck (121M likes), NVIDIA, Meta, CocaCola, BillGates, BarackObama |
+
+### Threads (Meta) ✅✅ (CRACKED — OG tags DO exist)
+| Method | Endpoint | Returns |
+|--------|----------|---------|
+| Profile OG tags (BEST) | curl -L threads.com/@{user} (NOTE: .com not .net — .net 301 redirects) | display_name, follower_count (e.g. "5.5M"), thread_count, bio, profile_picture_url |
+| Post OG tags | curl -L threads.com/@{user}/post/{shortcode} | Full post text, author name, image URL |
+| WebFinger | threads.net/.well-known/webfinger?resource=acct:{user}@threads.net | ActivityPub ID, profile URL (works for federated users) |
+| IMPORTANT: threads.NET redirects to threads.COM — always use -L flag or go directly to .com |
+| Post discovery | web_search site:threads.net @{user} | Find post URLs to then fetch |
+| Script: scripts/threads_api.py — profile + post + webfinger extraction |
+| Previous test was WRONG about "no OG tags" — they're there, you just need standard curl |
+| Tested: zuck (5.5M followers), mosseri, nvidia |
+
+### Medium ✅✅
+| Method | Returns |
+|--------|---------|
+| RSS feed: medium.com/feed/@{user} (curl) | FULL article text, tags, dates — NO AUTH |
+| web_extract on profile | Bio, follower count, article list, themes |
+| web_extract on articles | Full content (paywall may truncate non-members) |
+
+### Quora ✅✅
+| Method | Returns |
+|--------|---------|
+| web_extract on profile | Bio, credentials, Q&A with direct quotes |
+| web_search site:quora.com | Finds profiles and specific answers |
+| VOICE VALUE: Opinions in own words, analogies, intellectual identity |
+
+### Goodreads ✅✅ (HIDDEN GEM)
+| Method | Returns |
+|--------|---------|
+| web_extract on user profile | Favorites, reviews in own voice, social graph, reading history |
+| web_extract on author page | Bio, books, ratings, notable quotes |
+| VOICE VALUE: "You are what you read" — intellectual identity fingerprint |
+| Example: Karpathy's Goodreads reveals gaming passion, favorite authors (Feynman, Clarke) |
+
+### Google Scholar ✅✅
+| Method | Returns |
+|--------|---------|
+| web_search + web_extract on profile | Citations, h-index, top papers, co-authors |
+| Semantic Scholar API via web_extract | Paper list, citation counts, author ID |
+| Endpoint: api.semanticscholar.org/graph/v1/author/search?query={name} |
+
+### Product Hunt ✅
+| Method | Returns |
+|--------|---------|
+| web_extract on producthunt.com/@{user} | Bio, launch history, forum activity |
+
+### HackerNews ✅
+| Method | Returns |
+|--------|---------|
+| Algolia API: hn.algolia.com/api/v1/search?query={name}&tags=comment | Comments, mentions |
+
+### Podcast Transcripts ✅✅✅ (HIGHEST VOICE VALUE)
+| Source | Method |
+|--------|--------|
+| Lex Fridman | web_extract on lexfridman.com/.../transcript |
+| Tyler Cowen | web_extract on conversationswithtyler.com |
+| TED Talks | web_extract on ted.com/.../transcript |
+| Sequoia | web_extract on sequoiacap.com/podcast |
+| Discovery: web_search "{name} podcast transcript interview" |
+
+### News/Blogs ✅✅
+| Source | Method |
+|--------|--------|
+| TechCrunch, Wired, Verge, Ars | web_extract — full articles |
+| Personal blogs | web_extract — longform self-expression |
+| Substacks | web_extract — essays and comments |
+| Wayback Machine | Works for blog archives (not Twitter) |
+
+## TIER 3 — Limited / Conditional
+
+### TikTok ✅✅ (FULL ACCESS)
+| Method | Returns |
+|--------|---------|
+| HTML profile scraping | Parse __UNIVERSAL_DATA_FOR_REHYDRATION__ JSON at path __DEFAULT_SCOPE__.webapp.user-detail.userInfo.statsV2 → username, bio, followerCount, followingCount, heartCount, videoCount. Use statsV2 not stats for large numbers. |
+| oEmbed per video | curl tiktok.com/oembed?url={video_url} → caption, author, thumbnail. No auth. |
+| tikwm.com API | tikwm.com/api/user/info?unique_id={user} → full user stats. tikwm.com/api/?url={video_url} → play count, likes, comments, shares, duration. |
+| HTML video scraping | tiktok.com/@{user}/video/{id} → parse __UNIVERSAL_DATA → webapp.video-detail → full video data with description, hashtags, engagement. |
+| SocialBlade | web_extract socialblade.com/tiktok/user/{user} → followers, likes, growth trends. |
+| Video discovery | web_search("site:tiktok.com/@{user}/video") → recent video URLs → scrape each |
+| Tested: khaby.lame (160.5M), charlidamelio (156.7M), mrbeast (124.7M) |
+
+### Spotify ✅ (podcasters only)
+| Method | Returns |
+|--------|---------|
+| web_extract on show page | Episode listings with guests, topics, durations |
+
+### Stack Overflow ✅
+| Method | Returns |
+|--------|---------|
+| web_extract on profile | Reputation, tags, top answers, bio |
+
+### Crunchbase ✅ (executives/founders only)
+| Method | Returns |
+|--------|---------|
+| web_extract on crunchbase.com/person/{slug} | Full career history, education, investments, board positions |
+
+### LinkedIn ⚠️ (indirect only)
+| Method | Returns |
+|--------|---------|
+| web_search site:linkedin.com/in | Name, headline, company, location from snippets |
+| Crunchbase | Full career history (better than LinkedIn for execs) |
+| Corporate press pages | Official professional bios |
+| RocketReach/SignalHire snippets | Title confirmation from web_search |
+
+## TIER 4 — Blocked / Dead
+
+| Platform | Status |
+|----------|--------|
+| LinkedIn direct | BLOCKED (web_extract domain blocked) |
+| Discord | WALLED (not publicly indexable) |
+| Telegram t.me | BLOCKED in some environments |
+| Threads Official API | AUTH REQUIRED (graph.threads.net needs OAuth) |
+| Threads ActivityPub outbox | 404 for all tested users |
+| Instagram direct | BLOCKED (use Private API instead) |
+| Most Nitter instances | DEAD (only nitter.cz works, but UNRELIABLE — see note) |
+| Google Cache of Twitter | EMPTY |
+| Wayback for tweets | USELESS (JS rendering) |
+| Twitter Syndication API | RATE LIMITED |
+| Archive.today | 429 + CAPTCHA |
+| imginn/picuki/dumpoir/gramhir | 403 |
+| Facebook Graph API | AUTH REQUIRED |
+
+## Quick Reference: Research Pipeline by Person Type
+
+### Tech Founder/CEO
+X API → Bluesky → GitHub README → Crunchbase → Podcast transcripts → Medium RSS → HN → Product Hunt → LinkedIn snippets → News profiles
+
+### AI Researcher
+X API → Bluesky → Google Scholar → Semantic Scholar → arXiv → GitHub → Podcast transcripts → Blog/Substack → Reddit → Mastodon (sigmoid.social)
+
+### Public Figure / Politician
+X API → Facebook OG → Instagram API → YouTube → Podcast transcripts → News profiles → Quora → Goodreads → Wikipedia
+
+### Content Creator
+X API → Instagram API → TikTok → YouTube → Twitch → Podcast → Medium → Reddit → Bluesky → Threads OG
+
+### Academic
+Google Scholar → Semantic Scholar → University page → Conference talks → Podcast transcripts → Mastodon → Blog → GitHub → Reddit → HN
@@ -0,0 +1,250 @@
+"""
+REHOBOAM Database Layer
+SQLite setup, migrations, and query helpers.
+"""
+
+import sqlite3
+import os
+from pathlib import Path
+from datetime import datetime
+
+DB_DIR = Path.home() / ".hermes" / "rehoboam" / "db"
+MAIN_DB = DB_DIR / "rehoboam.db"
+
+SCHEMA_VERSION = 1
+
+SCHEMA_SQL = """
+-- Core tables
+CREATE TABLE IF NOT EXISTS profiles (
+    handle TEXT PRIMARY KEY,
+    platform TEXT NOT NULL,
+    display_name TEXT,
+    last_updated TEXT NOT NULL,
+    staleness TEXT NOT NULL,
+    profile_path TEXT NOT NULL,
+    created_at TEXT NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS simulations (
+    sim_id TEXT PRIMARY KEY,
+    created_at TEXT NOT NULL,
+    scenario TEXT NOT NULL,
+    participant_count INTEGER,
+    duration_sec REAL,
+    model_used TEXT,
+    config_path TEXT,
+    output_path TEXT
+);
+
+CREATE TABLE IF NOT EXISTS sim_participants (
+    sim_id TEXT REFERENCES simulations(sim_id),
+    handle TEXT REFERENCES profiles(handle),
+    role TEXT,
+    PRIMARY KEY (sim_id, handle)
+);
+
+CREATE TABLE IF NOT EXISTS sim_dynamics (
+    sim_id TEXT REFERENCES simulations(sim_id),
+    handle TEXT,
+    post_count INTEGER,
+    word_count INTEGER,
+    avg_sentiment REAL,
+    dominance_score REAL,
+    agreement_score REAL,
+    controversy_score REAL,
+    ratio_score REAL,
+    influence_in_sim REAL,
+    PRIMARY KEY (sim_id, handle)
+);
+
+CREATE TABLE IF NOT EXISTS sim_interactions (
+    sim_id TEXT REFERENCES simulations(sim_id),
+    from_handle TEXT,
+    to_handle TEXT,
+    interaction_type TEXT,
+    count INTEGER,
+    avg_sentiment REAL,
+    PRIMARY KEY (sim_id, from_handle, to_handle, interaction_type)
+);
+
+CREATE TABLE IF NOT EXISTS predictions (
+    pred_id TEXT PRIMARY KEY,
+    created_at TEXT NOT NULL,
+    sim_id TEXT,
+    handle TEXT,
+    prediction_type TEXT,
+    prediction_text TEXT NOT NULL,
+    confidence REAL NOT NULL,
+    calibrated_confidence REAL,
+    timeframe_days INTEGER,
+    resolved_at TEXT,
+    outcome TEXT,
+    outcome_evidence TEXT,
+    accuracy_score REAL
+);
+
+CREATE TABLE IF NOT EXISTS social_edges (
+    from_handle TEXT,
+    to_handle TEXT,
+    relationship_type TEXT,
+    weight REAL,
+    first_observed TEXT,
+    last_observed TEXT,
+    observation_count INTEGER,
+    source TEXT,
+    PRIMARY KEY (from_handle, to_handle, relationship_type)
+);
+
+CREATE TABLE IF NOT EXISTS social_clusters (
+    cluster_id TEXT PRIMARY KEY,
+    name TEXT,
+    description TEXT,
+    member_handles TEXT,
+    computed_at TEXT,
+    cohesion_score REAL
+);
+
+CREATE TABLE IF NOT EXISTS monitoring_events (
+    event_id TEXT PRIMARY KEY,
+    handle TEXT,
+    detected_at TEXT NOT NULL,
+    event_type TEXT,
+    description TEXT,
+    related_prediction_id TEXT,
+    severity TEXT,
+    acknowledged INTEGER DEFAULT 0
+);
+
+CREATE TABLE IF NOT EXISTS audit_log (
+    log_id TEXT PRIMARY KEY,
+    timestamp TEXT NOT NULL,
+    sim_id TEXT,
+    action TEXT NOT NULL,
+    handle TEXT,
+    details TEXT,
+    duration_sec REAL,
+    model_used TEXT,
+    token_count INTEGER,
+    error TEXT
+);
+
+-- Indexes
+CREATE INDEX IF NOT EXISTS idx_predictions_handle ON predictions(handle);
+CREATE INDEX IF NOT EXISTS idx_predictions_type ON predictions(prediction_type);
+CREATE INDEX IF NOT EXISTS idx_predictions_unresolved ON predictions(outcome) WHERE outcome IS NULL;
+CREATE INDEX IF NOT EXISTS idx_audit_action ON audit_log(action);
+CREATE INDEX IF NOT EXISTS idx_audit_sim ON audit_log(sim_id);
+CREATE INDEX IF NOT EXISTS idx_social_edges_from ON social_edges(from_handle);
+CREATE INDEX IF NOT EXISTS idx_social_edges_to ON social_edges(to_handle);
+CREATE INDEX IF NOT EXISTS idx_monitoring_handle ON monitoring_events(handle);
+CREATE INDEX IF NOT EXISTS idx_monitoring_unack ON monitoring_events(acknowledged) WHERE acknowledged = 0;
+
+-- Schema version tracking
+CREATE TABLE IF NOT EXISTS schema_meta (
+    key TEXT PRIMARY KEY,
+    value TEXT
+);
+"""
+
+
+def init_db() -> sqlite3.Connection:
+    """Initialize the database, creating tables if needed."""
+    DB_DIR.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(str(MAIN_DB))
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.execute("PRAGMA foreign_keys=ON")
+    conn.executescript(SCHEMA_SQL)
+    conn.execute(
+        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES (?, ?)",
+        ("schema_version", str(SCHEMA_VERSION))
+    )
+    conn.commit()
+    return conn
+
+
+def get_db() -> sqlite3.Connection:
+    """Get a database connection, initializing if needed."""
+    if not MAIN_DB.exists():
+        return init_db()
+    conn = sqlite3.connect(str(MAIN_DB))
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.execute("PRAGMA foreign_keys=ON")
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+def log_audit(conn: sqlite3.Connection, action: str, handle: str = None,
+              sim_id: str = None, details: str = None, duration_sec: float = None,
+              model_used: str = None, token_count: int = None, error: str = None):
+    """Write an entry to the audit log."""
+    from schemas import gen_id
+    conn.execute(
+        """INSERT INTO audit_log
+           (log_id, timestamp, sim_id, action, handle, details, duration_sec, model_used, token_count, error)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+        (gen_id("log_"), datetime.utcnow().isoformat() + "Z", sim_id, action,
+         handle, details, duration_sec, model_used, token_count, error)
+    )
+    conn.commit()
+
+
+# -- Query Helpers --
+
+def get_prediction_accuracy(conn: sqlite3.Connection, prediction_type: str = None) -> dict:
+    """Get prediction accuracy statistics."""
+    query = """
+        SELECT prediction_type,
+               COUNT(*) as total,
+               SUM(CASE WHEN outcome='correct' THEN 1 ELSE 0 END) as correct,
+               SUM(CASE WHEN outcome='partially_correct' THEN 1 ELSE 0 END) as partial,
+               SUM(CASE WHEN outcome='incorrect' THEN 1 ELSE 0 END) as incorrect,
+               AVG(confidence) as avg_confidence,
+               AVG(CASE WHEN outcome='correct' THEN 1.0
+                        WHEN outcome='partially_correct' THEN 0.5
+                        ELSE 0.0 END) as accuracy
+        FROM predictions WHERE outcome IS NOT NULL
+    """
+    params = []
+    if prediction_type:
+        query += " AND prediction_type = ?"
+        params.append(prediction_type)
+    query += " GROUP BY prediction_type"
+    return [dict(row) for row in conn.execute(query, params).fetchall()]
+
+
+def get_open_predictions(conn: sqlite3.Connection, handle: str = None) -> list:
+    """Get unresolved predictions."""
+    query = "SELECT * FROM predictions WHERE outcome IS NULL"
+    params = []
+    if handle:
+        query += " AND handle = ?"
+        params.append(handle)
+    query += " ORDER BY created_at DESC"
+    return [dict(row) for row in conn.execute(query, params).fetchall()]
+
+
+def get_social_neighborhood(conn: sqlite3.Connection, handle: str, depth: int = 1) -> list:
+    """Get a person's social graph neighborhood."""
+    query = """
+        SELECT from_handle, to_handle, relationship_type, weight
+        FROM social_edges
+        WHERE from_handle = ? OR to_handle = ?
+        ORDER BY weight DESC
+    """
+    return [dict(row) for row in conn.execute(query, (handle, handle)).fetchall()]
+
+
+def get_unread_alerts(conn: sqlite3.Connection) -> list:
+    """Get unacknowledged monitoring alerts."""
+    query = """
+        SELECT * FROM monitoring_events
+        WHERE acknowledged = 0
+        ORDER BY detected_at DESC
+    """
+    return [dict(row) for row in conn.execute(query).fetchall()]
+
+
+if __name__ == "__main__":
+    conn = init_db()
+    print(f"Database initialized at {MAIN_DB}")
+    conn.close()
@@ -0,0 +1,216 @@
+"""
+REHOBOAM Data Schemas
+Pydantic models for all JSON data structures used in the system.
+"""
+
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Optional
+from datetime import datetime
+import json
+import uuid
+
+
+def gen_id(prefix: str = "") -> str:
+    return f"{prefix}{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+
+
+@dataclass
+class OceanScores:
+    openness: float = 0.5
+    conscientiousness: float = 0.5
+    extraversion: float = 0.5
+    agreeableness: float = 0.5
+    neuroticism: float = 0.5
+
+
+@dataclass
+class DarkTriad:
+    narcissism: float = 0.0
+    machiavellianism: float = 0.0
+    psychopathy: float = 0.0
+
+
+@dataclass
+class MoralFoundations:
+    care: float = 0.5
+    fairness: float = 0.5
+    loyalty: float = 0.5
+    authority: float = 0.5
+    sanctity: float = 0.5
+    liberty: float = 0.5
+
+
+@dataclass
+class Psychometrics:
+    ocean: OceanScores = field(default_factory=OceanScores)
+    mbti_estimate: str = ""
+    dark_triad: DarkTriad = field(default_factory=DarkTriad)
+    moral_foundations: MoralFoundations = field(default_factory=MoralFoundations)
+    confidence: float = 0.0
+    sample_size: int = 0
+
+
+@dataclass
+class VoiceFingerprint:
+    vocabulary_tier: str = ""
+    avg_sentence_length: float = 0.0
+    exclamation_rate: float = 0.0
+    question_rate: float = 0.0
+    emoji_rate: float = 0.0
+    slang_index: float = 0.0
+    formality_score: float = 0.5
+    humor_style: str = ""
+    signature_phrases: list[str] = field(default_factory=list)
+    topics_vocabulary: dict[str, float] = field(default_factory=dict)
+    cadence_pattern: str = ""
+
+
+@dataclass
+class Stance:
+    position: str = ""
+    intensity: float = 0.0
+    last_seen: str = ""
+
+
+@dataclass
+class Influence:
+    score: float = 0.0
+    reach: str = "micro"
+    engagement_rate: float = 0.0
+    amplification_power: float = 0.0
+    thought_leadership_domains: list[str] = field(default_factory=list)
+
+
+@dataclass
+class PostingPatterns:
+    avg_posts_per_day: float = 0.0
+    peak_hours_utc: list[int] = field(default_factory=list)
+    weekend_ratio: float = 0.5
+    reply_ratio: float = 0.0
+    repost_ratio: float = 0.0
+    thread_frequency: float = 0.0
+    controversy_rate: float = 0.0
+
+
+@dataclass
+class Relationships:
+    allies: list[str] = field(default_factory=list)
+    rivals: list[str] = field(default_factory=list)
+    frequent_interactions: list[str] = field(default_factory=list)
+    mentioned_by_frequently: list[str] = field(default_factory=list)
+
+
+@dataclass
+class ProfileMeta:
+    data_sources: list[str] = field(default_factory=list)
+    computation_time_sec: float = 0.0
+    model_used: str = ""
+    last_full_rebuild: str = ""
+    last_incremental: str = ""
+
+
+@dataclass
+class Identity:
+    bio: str = ""
+    location: str = ""
+    verified: bool = False
+    follower_count: int = 0
+    following_count: int = 0
+    account_created: str = ""
+
+
+@dataclass
+class Profile:
+    schema_version: str = "7.0"
+    handle: str = ""
+    platform: str = "x"
+    display_name: str = ""
+    created_at: str = ""
+    last_updated: str = ""
+    update_count: int = 0
+    staleness_score: float = 1.0
+    identity: Identity = field(default_factory=Identity)
+    psychometrics: Psychometrics = field(default_factory=Psychometrics)
+    voice_fingerprint: VoiceFingerprint = field(default_factory=VoiceFingerprint)
+    stances: dict[str, Stance] = field(default_factory=dict)
+    community_membership: list[str] = field(default_factory=list)
+    influence: Influence = field(default_factory=Influence)
+    posting_patterns: PostingPatterns = field(default_factory=PostingPatterns)
+    relationships: Relationships = field(default_factory=Relationships)
+    star_thread_ref: str = "star_thread.json"
+    raw_data_refs: list[str] = field(default_factory=list)
+    _meta: ProfileMeta = field(default_factory=ProfileMeta)
+
+    def to_dict(self) -> dict:
+        """Recursively convert to dict for JSON serialization."""
+        import dataclasses
+        def _convert(obj):
+            if dataclasses.is_dataclass(obj):
+                return {k: _convert(v) for k, v in dataclasses.asdict(obj).items()}
+            elif isinstance(obj, list):
+                return [_convert(i) for i in obj]
+            elif isinstance(obj, dict):
+                return {k: _convert(v) for k, v in obj.items()}
+            return obj
+        return _convert(self)
+
+    def to_json(self, indent: int = 2) -> str:
+        return json.dumps(self.to_dict(), indent=indent)
+
+
+@dataclass
+class StarThread:
+    handle: str = ""
+    computed_at: str = ""
+    based_on_profile_version: str = ""
+    thread_version: int = 1
+    core_compression: str = ""
+    key_drives: list[str] = field(default_factory=list)
+    predictive_axioms: list[str] = field(default_factory=list)
+    voice_template: dict = field(default_factory=dict)
+    anti_slop_markers: list[str] = field(default_factory=list)
+    _meta: dict = field(default_factory=dict)
+
+
+@dataclass
+class Prediction:
+    pred_id: str = ""
+    created_at: str = ""
+    sim_id: str = ""
+    handle: str = ""
+    prediction_type: str = ""  # statement, career, alliance, content, network_reaction
+    prediction_text: str = ""
+    confidence: float = 0.5
+    calibrated_confidence: float = 0.5
+    timeframe_days: int = 30
+    resolved_at: Optional[str] = None
+    outcome: Optional[str] = None  # correct, partially_correct, incorrect
+    outcome_evidence: Optional[str] = None
+    accuracy_score: Optional[float] = None
+
+
+@dataclass
+class WatchConfig:
+    watch_id: str = ""
+    handle: str = ""
+    platform: str = "x"
+    enabled: bool = True
+    check_interval_minutes: int = 120
+    watch_for: list[dict] = field(default_factory=list)
+    alert_severity_minimum: str = "notable"
+    created_at: str = ""
+
+
+@dataclass
+class PopulationDefinition:
+    group_id: str = ""
+    name: str = ""
+    description: str = ""
+    created_at: str = ""
+    last_updated: str = ""
+    explicit_members: list[str] = field(default_factory=list)
+    criteria: dict = field(default_factory=dict)
+    resolved_members: list[str] = field(default_factory=list)
+    sampling_strategy: str = "representative"
+    default_sample_size: int = 12
@@ -0,0 +1,280 @@
+"""
+REHOBOAM Storage Layer
+Directory management, profile I/O, index maintenance.
+"""
+
+import json
+import shutil
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Optional
+
+BASE_DIR = Path.home() / ".hermes" / "rehoboam"
+PROFILES_DIR = BASE_DIR / "profiles"
+POPULATIONS_DIR = BASE_DIR / "populations"
+SIMULATIONS_DIR = BASE_DIR / "simulations"
+MONITORING_DIR = BASE_DIR / "monitoring"
+CONFIG_DIR = BASE_DIR / "config"
+
+
+def init_storage():
+    """Create all required directories."""
+    for d in [PROFILES_DIR, POPULATIONS_DIR, SIMULATIONS_DIR,
+              MONITORING_DIR, MONITORING_DIR / "alerts", CONFIG_DIR,
+              BASE_DIR / "db"]:
+        d.mkdir(parents=True, exist_ok=True)
+
+    # Create default configs if they don't exist
+    staleness_path = CONFIG_DIR / "staleness_policy.json"
+    if not staleness_path.exists():
+        staleness_path.write_text(json.dumps({
+            "thresholds": {
+                "fresh": {"max_age_hours": 72},
+                "stale": {"max_age_hours": 336},
+                "expired": {"max_age_hours": 2160},
+                "archived": {"max_age_hours": 8760}
+            },
+            "per_field_decay": {
+                "psychometrics": {"half_life_days": 180},
+                "stances": {"half_life_days": 30},
+                "posting_patterns": {"half_life_days": 60},
+                "relationships": {"half_life_days": 45},
+                "influence": {"half_life_days": 90},
+                "voice_fingerprint": {"half_life_days": 365}
+            },
+            "auto_refresh_on_simulation": True,
+            "auto_refresh_threshold": "stale"
+        }, indent=2))
+
+    config_path = CONFIG_DIR / "rehoboam.json"
+    if not config_path.exists():
+        config_path.write_text(json.dumps({
+            "version": "7.0",
+            "default_model": "claude-opus-4-20250514",
+            "max_thread_age_days": 30,
+            "monitoring_enabled": False,
+            "auto_thread": True,
+            "auto_profile_update": True
+        }, indent=2))
+
+    # Create indexes if they don't exist
+    for idx_path in [PROFILES_DIR / "_index.json", POPULATIONS_DIR / "_index.json",
+                     SIMULATIONS_DIR / "_index.json"]:
+        if not idx_path.exists():
+            idx_path.write_text("{}")
+
+
+def normalize_handle(handle: str) -> str:
+    """Normalize a handle to a filesystem-safe directory name."""
+    h = handle.lstrip("@").lower().strip()
+    # Replace characters that are problematic in filenames
+    return h.replace("/", "_").replace("\\", "_")
+
+
+# -- Profile I/O --
+
+def get_profile_dir(handle: str) -> Path:
+    return PROFILES_DIR / normalize_handle(handle)
+
+
+def profile_exists(handle: str) -> bool:
+    return (get_profile_dir(handle) / "profile.json").exists()
+
+
+def load_profile(handle: str) -> Optional[dict]:
+    path = get_profile_dir(handle) / "profile.json"
+    if path.exists():
+        return json.loads(path.read_text())
+    return None
+
+
+def save_profile(handle: str, profile: dict, snapshot: bool = True):
+    """Save a profile, optionally snapshotting the old one."""
+    pdir = get_profile_dir(handle)
+    pdir.mkdir(parents=True, exist_ok=True)
+    (pdir / "history").mkdir(exist_ok=True)
+    (pdir / "raw").mkdir(exist_ok=True)
+    (pdir / "predictions").mkdir(exist_ok=True)
+
+    profile_path = pdir / "profile.json"
+
+    # Snapshot old profile before overwriting
+    if snapshot and profile_path.exists():
+        old = json.loads(profile_path.read_text())
+        ts = old.get("last_updated", datetime.utcnow().isoformat()).replace(":", "-")
+        snapshot_path = pdir / "history" / f"profile_{ts[:10]}.json"
+        shutil.copy2(profile_path, snapshot_path)
+
+    profile_path.write_text(json.dumps(profile, indent=2))
+    _update_profile_index(handle, profile)
+
+
+def _update_profile_index(handle: str, profile: dict):
+    idx_path = PROFILES_DIR / "_index.json"
+    idx = json.loads(idx_path.read_text()) if idx_path.exists() else {}
+    idx[normalize_handle(handle)] = {
+        "platform": profile.get("platform", "x"),
+        "last_updated": profile.get("last_updated", ""),
+        "staleness": compute_staleness(profile.get("last_updated", "")),
+        "has_star_thread": (get_profile_dir(handle) / "star_thread.json").exists(),
+        "simulation_count": idx.get(normalize_handle(handle), {}).get("simulation_count", 0),
+        "display_name": profile.get("display_name", "")
+    }
+    idx_path.write_text(json.dumps(idx, indent=2))
+
+
+# -- Star Thread I/O --
+
+def load_star_thread(handle: str) -> Optional[dict]:
+    path = get_profile_dir(handle) / "star_thread.json"
+    if path.exists():
+        return json.loads(path.read_text())
+    return None
+
+
+def save_star_thread(handle: str, thread: dict):
+    path = get_profile_dir(handle) / "star_thread.json"
+    get_profile_dir(handle).mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(thread, indent=2))
+    # Update index to reflect thread existence
+    idx_path = PROFILES_DIR / "_index.json"
+    if idx_path.exists():
+        idx = json.loads(idx_path.read_text())
+        key = normalize_handle(handle)
+        if key in idx:
+            idx[key]["has_star_thread"] = True
+            idx_path.write_text(json.dumps(idx, indent=2))
+
+
+# -- Staleness --
+
+def compute_staleness(last_updated: str) -> str:
+    """Determine staleness level from a timestamp string."""
+    if not last_updated:
+        return "expired"
+    try:
+        dt = datetime.fromisoformat(last_updated.rstrip("Z"))
+    except ValueError:
+        return "expired"
+
+    age = datetime.utcnow() - dt
+    hours = age.total_seconds() / 3600
+
+    policy = _load_staleness_policy()
+    thresholds = policy.get("thresholds", {})
+
+    if hours <= thresholds.get("fresh", {}).get("max_age_hours", 72):
+        return "fresh"
+    elif hours <= thresholds.get("stale", {}).get("max_age_hours", 336):
+        return "stale"
+    elif hours <= thresholds.get("expired", {}).get("max_age_hours", 2160):
+        return "expired"
+    else:
+        return "archived"
+
+
+def _load_staleness_policy() -> dict:
+    path = CONFIG_DIR / "staleness_policy.json"
+    if path.exists():
+        return json.loads(path.read_text())
+    return {"thresholds": {"fresh": {"max_age_hours": 72}, "stale": {"max_age_hours": 336},
+                           "expired": {"max_age_hours": 2160}, "archived": {"max_age_hours": 8760}}}
+
+
+def needs_thread_recompute(handle: str) -> bool:
+    """Check if a star thread needs recomputation."""
+    thread = load_star_thread(handle)
+    if thread is None:
+        return True
+
+    profile = load_profile(handle)
+    if profile is None:
+        return True
+
+    # Thread is stale if profile was updated after thread was computed
+    thread_time = thread.get("based_on_profile_version", "")
+    profile_time = profile.get("last_updated", "")
+    if thread_time < profile_time:
+        return True
+
+    # Thread is stale if older than max_thread_age_days
+    config = json.loads((CONFIG_DIR / "rehoboam.json").read_text()) if (CONFIG_DIR / "rehoboam.json").exists() else {}
+    max_age = config.get("max_thread_age_days", 30)
+    try:
+        computed = datetime.fromisoformat(thread.get("computed_at", "").rstrip("Z"))
+        if (datetime.utcnow() - computed).days > max_age:
+            return True
+    except ValueError:
+        return True
+
+    return False
+
+
+# -- Simulation I/O --
+
+def save_simulation(sim_id: str, config: dict, output: dict, analytics: dict, audit: dict):
+    sdir = SIMULATIONS_DIR / sim_id
+    sdir.mkdir(parents=True, exist_ok=True)
+    (sdir / "config.json").write_text(json.dumps(config, indent=2))
+    (sdir / "output.json").write_text(json.dumps(output, indent=2))
+    (sdir / "analytics.json").write_text(json.dumps(analytics, indent=2))
+    (sdir / "audit.json").write_text(json.dumps(audit, indent=2))
+
+    # Update index
+    idx_path = SIMULATIONS_DIR / "_index.json"
+    idx = json.loads(idx_path.read_text()) if idx_path.exists() else {}
+    idx[sim_id] = {
+        "created_at": config.get("created_at", datetime.utcnow().isoformat() + "Z"),
+        "scenario": config.get("scenario", ""),
+        "participant_count": len(config.get("participants", [])),
+    }
+    idx_path.write_text(json.dumps(idx, indent=2))
+
+
+# -- Population I/O --
+
+def save_population(group_id: str, definition: dict, aggregate: dict = None):
+    pdir = POPULATIONS_DIR / group_id
+    pdir.mkdir(parents=True, exist_ok=True)
+    (pdir / "history").mkdir(exist_ok=True)
+    (pdir / "definition.json").write_text(json.dumps(definition, indent=2))
+    if aggregate:
+        (pdir / "aggregate.json").write_text(json.dumps(aggregate, indent=2))
+
+    idx_path = POPULATIONS_DIR / "_index.json"
+    idx = json.loads(idx_path.read_text()) if idx_path.exists() else {}
+    idx[group_id] = {
+        "name": definition.get("name", group_id),
+        "member_count": len(definition.get("resolved_members", definition.get("explicit_members", []))),
+        "last_updated": definition.get("last_updated", "")
+    }
+    idx_path.write_text(json.dumps(idx, indent=2))
+
+
+def load_population(group_id: str) -> Optional[dict]:
+    path = POPULATIONS_DIR / group_id / "definition.json"
+    if path.exists():
+        return json.loads(path.read_text())
+    return None
+
+
+# -- Listing --
+
+def list_profiles() -> dict:
+    idx_path = PROFILES_DIR / "_index.json"
+    return json.loads(idx_path.read_text()) if idx_path.exists() else {}
+
+
+def list_populations() -> dict:
+    idx_path = POPULATIONS_DIR / "_index.json"
+    return json.loads(idx_path.read_text()) if idx_path.exists() else {}
+
+
+def list_simulations() -> dict:
+    idx_path = SIMULATIONS_DIR / "_index.json"
+    return json.loads(idx_path.read_text()) if idx_path.exists() else {}
+
+
+if __name__ == "__main__":
+    init_storage()
+    print(f"Storage initialized at {BASE_DIR}")
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Facebook Page/Profile Data Extractor
+Uses multiple techniques to extract public Facebook data without authentication:
+1. Googlebot UA for OG meta tags (name, description, likes, talking_about, bio, og:image)
+2. Graph API /picture endpoint for profile photos (pages only)
+3. Page Plugin embed for follower counts and page IDs
+"""
+
+import subprocess
+import json
+import re
+import html
+import sys
+
+GOOGLEBOT_UA = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
+
+def curl_get(url, ua=None):
+    """Fetch URL with curl"""
+    cmd = ['curl', '-s', '-L', '--max-time', '15']
+    if ua:
+        cmd += ['-H', f'User-Agent: {ua}']
+    cmd.append(url)
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=20)
+    return result.stdout
+
+def extract_og_data(username):
+    """Extract OG meta tags using Googlebot UA"""
+    content = curl_get(f'https://www.facebook.com/{username}', ua=GOOGLEBOT_UA)
+    
+    data = {}
+    
+    # Extract OG tags
+    og_title = re.search(r'og:title"\s*content="([^"]*)"', content)
+    if og_title:
+        data['name'] = html.unescape(og_title.group(1))
+    
+    og_desc = re.search(r'og:description"\s*content="([^"]*)"', content)
+    if og_desc:
+        desc = html.unescape(og_desc.group(1))
+        data['raw_description'] = desc
+        
+        # Parse likes count
+        likes_match = re.search(r'([\d,]+)\s+likes?', desc)
+        if likes_match:
+            data['likes'] = likes_match.group(1)
+        
+        # Parse talking about
+        talking_match = re.search(r'([\d,]+)\s+talking about this', desc)
+        if talking_match:
+            data['talking_about'] = talking_match.group(1)
+        
+        # Extract bio (text after the "talking about this." part)
+        bio_match = re.search(r'talking about this\.\s*(.+)', desc)
+        if bio_match:
+            data['bio'] = bio_match.group(1)
+    
+    og_image = re.search(r'og:image"\s*content="([^"]*)"', content)
+    if og_image:
+        data['og_image'] = html.unescape(og_image.group(1))
+    
+    return data
+
+def extract_plugin_data(username):
+    """Extract data from Page Plugin embed"""
+    content = curl_get(f'https://www.facebook.com/plugins/page.php?href=https://www.facebook.com/{username}&tabs=timeline&width=500&height=600')
+    
+    data = {}
+    
+    # Page name from title attribute
+    name_match = re.search(r'class="_1drp _5lv6" title="([^"]*)"', content)
+    if name_match:
+        data['plugin_name'] = html.unescape(name_match.group(1))
+    
+    # Follower count
+    followers_match = re.search(r'([\d,]+)\s+followers', content)
+    if followers_match:
+        data['followers'] = followers_match.group(1)
+    
+    # Page ID
+    pageid_match = re.search(r'"pageID":"(\d+)"', content)
+    if pageid_match:
+        data['page_id'] = pageid_match.group(1)
+    
+    return data
+
+def extract_profile_picture(username):
+    """Get profile picture via Graph API"""
+    content = curl_get(f'https://graph.facebook.com/v19.0/{username}/picture?redirect=false&width=400&height=400')
+    try:
+        d = json.loads(content)
+        if 'data' in d and not d['data'].get('is_silhouette', True):
+            return d['data']['url']
+    except:
+        pass
+    return None
+
+def get_facebook_data(username):
+    """Combine all extraction methods"""
+    result = {'username': username}
+    
+    # Method 1: OG tags (best for bio, likes, talking_about)
+    og = extract_og_data(username)
+    result.update(og)
+    
+    # Method 2: Plugin (best for followers, page_id)
+    plugin = extract_plugin_data(username)
+    result.update(plugin)
+    
+    # Method 3: Graph API picture (pages only)
+    pic = extract_profile_picture(username)
+    if pic:
+        result['profile_picture'] = pic
+    
+    # Also try by page_id for picture if username didn't work
+    if not pic and 'page_id' in result:
+        pic2 = extract_profile_picture(result['page_id'])
+        if pic2:
+            result['profile_picture'] = pic2
+    
+    return result
+
+if __name__ == '__main__':
+    targets = sys.argv[1:] if len(sys.argv) > 1 else ['zuck', 'NVIDIA', 'Meta', 'CocaCola']
+    
+    for target in targets:
+        print(f"{'='*60}")
+        print(f"Facebook Profile: {target}")
+        print(f"{'='*60}")
+        data = get_facebook_data(target)
+        for k, v in data.items():
+            if k == 'raw_description':
+                continue  # Skip raw, we show parsed fields
+            val = str(v)
+            if len(val) > 120:
+                val = val[:120] + '...'
+            print(f"  {k}: {val}")
+        print()
+
--- a/Show More
+++ b/Show More