fix: head+tail truncation for execute_code stdout (inspired by openclaw context-pruning)

Previously, _drain() only captured the first MAX_STDOUT_BYTES (50KB) of stdout, silently dropping all tail output. Scripts that print() their final results at the end would have those results lost. Now uses a two-buffer approach: 40% head + 60% tail (rolling window). This matches the pattern already used in terminal_tool.py (line 1042-1051) but gives the tail more space since execute_code scripts typically print() their final results at the end. Inspired by openclaw's softTrim context-pruning (headChars/tailChars).
2026-03-09 02:15:48 -07:00
237 changed files with 741 additions and 2780 deletions
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -195,8 +195,6 @@ def build_skills_system_prompt() -> str:

    # Collect skills with descriptions, grouped by category
    # Each entry: (skill_name, description)
-    # Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
-    # → category "mlops/training", skill "axolotl"
    skills_by_category: dict[str, list[tuple[str, str]]] = {}
    for skill_file in skills_dir.rglob("SKILL.md"):
        # Skip skills incompatible with the current OS platform
@@ -205,13 +203,8 @@ def build_skills_system_prompt() -> str:
        rel_path = skill_file.relative_to(skills_dir)
        parts = rel_path.parts
        if len(parts) >= 2:
-            # Category is everything between skills_dir and the skill folder
-            # e.g. parts = ("mlops", "training", "axolotl", "SKILL.md")
-            #   → category = "mlops/training", skill_name = "axolotl"
-            # e.g. parts = ("github", "github-auth", "SKILL.md")
-            #   → category = "github", skill_name = "github-auth"
+            category = parts[0]
            skill_name = parts[-2]
-            category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
        else:
            category = "general"
            skill_name = skill_file.parent.name
@@ -222,11 +215,9 @@ def build_skills_system_prompt() -> str:
        return ""

    # Read category-level descriptions from DESCRIPTION.md
-    # Checks both the exact category path and parent directories
    category_descriptions = {}
    for category in skills_by_category:
-        cat_path = Path(category)
-        desc_file = skills_dir / cat_path / "DESCRIPTION.md"
+        desc_file = skills_dir / category / "DESCRIPTION.md"
        if desc_file.exists():
            try:
                content = desc_file.read_text(encoding="utf-8")
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -555,21 +555,6 @@ toolsets:
 #     args: ["-y", "@modelcontextprotocol/server-github"]
 #     env:
 #       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
-#
-# Sampling (server-initiated LLM requests) — enabled by default.
-# Per-server config under the 'sampling' key:
-#   analysis:
-#     command: npx
-#     args: ["-y", "analysis-server"]
-#     sampling:
-#       enabled: true           # default: true
-#       model: "gemini-3-flash" # override model (optional)
-#       max_tokens_cap: 4096    # max tokens per request
-#       timeout: 30             # LLM call timeout (seconds)
-#       max_rpm: 10             # max requests per minute
-#       allowed_models: []      # model whitelist (empty = all)
-#       max_tool_rounds: 5      # tool loop limit (0 = disable)
-#       log_level: "info"       # audit verbosity

 # =============================================================================
 # Voice Transcription (Speech-to-Text)
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -252,7 +252,6 @@ def cleanup_document_cache(max_age_hours: int = 24) -> int:
 class MessageType(Enum):
    """Types of incoming messages."""
    TEXT = "text"
-    LOCATION = "location"
    PHOTO = "photo"
    VIDEO = "video"
    AUDIO = "audio"
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -132,10 +132,6 @@ class TelegramAdapter(BasePlatformAdapter):
                filters.COMMAND,
                self._handle_command
            ))
-            self._app.add_handler(TelegramMessageHandler(
-                filters.LOCATION | getattr(filters, "VENUE", filters.LOCATION),
-                self._handle_location_message
-            ))
            self._app.add_handler(TelegramMessageHandler(
                filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
                self._handle_media_message
@@ -550,41 +546,6 @@ class TelegramAdapter(BasePlatformAdapter):
        event = self._build_message_event(update.message, MessageType.COMMAND)
        await self.handle_message(event)
    
-    async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-        """Handle incoming location/venue pin messages."""
-        if not update.message:
-            return
-
-        msg = update.message
-        venue = getattr(msg, "venue", None)
-        location = getattr(venue, "location", None) if venue else getattr(msg, "location", None)
-
-        if not location:
-            return
-
-        lat = getattr(location, "latitude", None)
-        lon = getattr(location, "longitude", None)
-        if lat is None or lon is None:
-            return
-
-        # Build a text message with coordinates and context
-        parts = ["[The user shared a location pin.]"]
-        if venue:
-            title = getattr(venue, "title", None)
-            address = getattr(venue, "address", None)
-            if title:
-                parts.append(f"Venue: {title}")
-            if address:
-                parts.append(f"Address: {address}")
-        parts.append(f"latitude: {lat}")
-        parts.append(f"longitude: {lon}")
-        parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
-        parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
-
-        event = self._build_message_event(msg, MessageType.LOCATION)
-        event.text = "\n".join(parts)
-        await self.handle_message(event)
-
    async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming media messages, downloading images to local cache."""
        if not update.message:
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1449,11 +1449,6 @@ class GatewayRunner:
            except Exception:
                current_provider = "openrouter"

-        # Detect custom endpoint: provider resolved to openrouter but a custom
-        # base URL is configured — the user set up a custom endpoint.
-        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
-            current_provider = "custom"
-
        if not args:
            provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
            lines = [
@@ -1580,10 +1575,6 @@ class GatewayRunner:
            except Exception:
                current_provider = "openrouter"

-        # Detect custom endpoint
-        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
-            current_provider = "custom"
-
        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)

        lines = [
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -761,39 +761,9 @@ def cmd_model(args):
        ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
        ("minimax", "MiniMax (global direct API)"),
        ("minimax-cn", "MiniMax China (domestic direct API)"),
+        ("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
    ]

-    # Add user-defined custom providers from config.yaml
-    custom_providers_cfg = config.get("custom_providers") or []
-    _custom_provider_map = {}  # key → {name, base_url, api_key}
-    if isinstance(custom_providers_cfg, list):
-        for entry in custom_providers_cfg:
-            if not isinstance(entry, dict):
-                continue
-            name = entry.get("name", "").strip()
-            base_url = entry.get("base_url", "").strip()
-            if not name or not base_url:
-                continue
-            # Generate a stable key from the name
-            key = "custom:" + name.lower().replace(" ", "-")
-            short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
-            saved_model = entry.get("model", "")
-            model_hint = f" — {saved_model}" if saved_model else ""
-            providers.append((key, f"{name} ({short_url}){model_hint}"))
-            _custom_provider_map[key] = {
-                "name": name,
-                "base_url": base_url,
-                "api_key": entry.get("api_key", ""),
-                "model": saved_model,
-            }
-
-    # Always add the manual custom endpoint option last
-    providers.append(("custom", "Custom endpoint (enter URL manually)"))
-
-    # Add removal option if there are saved custom providers
-    if _custom_provider_map:
-        providers.append(("remove-custom", "Remove a saved custom provider"))
-
    # Reorder so the active provider is at the top
    known_keys = {k for k, _ in providers}
    active_key = active if active in known_keys else "custom"
@@ -821,10 +791,6 @@ def cmd_model(args):
        _model_flow_openai_codex(config, current_model)
    elif selected_provider == "custom":
        _model_flow_custom(config)
-    elif selected_provider.startswith("custom:") and selected_provider in _custom_provider_map:
-        _model_flow_named_custom(config, _custom_provider_map[selected_provider])
-    elif selected_provider == "remove-custom":
-        _remove_custom_provider(config)
    elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
        _model_flow_api_key_provider(config, selected_provider, current_model)

@@ -1040,11 +1006,7 @@ def _model_flow_openai_codex(config, current_model=""):


 def _model_flow_custom(config):
-    """Custom endpoint: collect URL, API key, and model name.
-
-    Automatically saves the endpoint to ``custom_providers`` in config.yaml
-    so it appears in the provider menu on subsequent runs.
-    """
+    """Custom endpoint: collect URL, API key, and model name."""
    from hermes_cli.auth import _save_model_choice, deactivate_provider
    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config

@@ -1076,8 +1038,6 @@ def _model_flow_custom(config):
        print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
        return

-    effective_key = api_key or current_key
-
    if base_url:
        save_env_value("OPENAI_BASE_URL", base_url)
    if api_key:
@@ -1090,7 +1050,7 @@ def _model_flow_custom(config):
        cfg = load_config()
        model = cfg.get("model")
        if isinstance(model, dict):
-            model["provider"] = "custom"
+            model["provider"] = "auto"
            model["base_url"] = effective_url
        save_config(cfg)
        deactivate_provider()
@@ -1101,223 +1061,6 @@ def _model_flow_custom(config):
            deactivate_provider()
        print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")

-    # Auto-save to custom_providers so it appears in the menu next time
-    _save_custom_provider(effective_url, effective_key, model_name or "")
-
-
-def _save_custom_provider(base_url, api_key="", model=""):
-    """Save a custom endpoint to custom_providers in config.yaml.
-
-    Deduplicates by base_url — if the URL already exists, updates the
-    model name but doesn't add a duplicate entry.
-    Auto-generates a display name from the URL hostname.
-    """
-    from hermes_cli.config import load_config, save_config
-
-    cfg = load_config()
-    providers = cfg.get("custom_providers") or []
-    if not isinstance(providers, list):
-        providers = []
-
-    # Check if this URL is already saved — update model if so
-    for entry in providers:
-        if isinstance(entry, dict) and entry.get("base_url", "").rstrip("/") == base_url.rstrip("/"):
-            if model and entry.get("model") != model:
-                entry["model"] = model
-                cfg["custom_providers"] = providers
-                save_config(cfg)
-            return  # already saved, updated model if needed
-
-    # Auto-generate a name from the URL
-    import re
-    clean = base_url.replace("https://", "").replace("http://", "").rstrip("/")
-    # Remove /v1 suffix for cleaner names
-    clean = re.sub(r"/v1/?$", "", clean)
-    # Use hostname:port as the name
-    name = clean.split("/")[0]
-    # Capitalize for readability
-    if "localhost" in name or "127.0.0.1" in name:
-        name = f"Local ({name})"
-    elif "runpod" in name.lower():
-        name = f"RunPod ({name})"
-    else:
-        name = name.capitalize()
-
-    entry = {"name": name, "base_url": base_url}
-    if api_key:
-        entry["api_key"] = api_key
-    if model:
-        entry["model"] = model
-
-    providers.append(entry)
-    cfg["custom_providers"] = providers
-    save_config(cfg)
-    print(f"  💾 Saved to custom providers as \"{name}\" (edit in config.yaml)")
-
-
-def _remove_custom_provider(config):
-    """Let the user remove a saved custom provider from config.yaml."""
-    from hermes_cli.config import load_config, save_config
-
-    cfg = load_config()
-    providers = cfg.get("custom_providers") or []
-    if not isinstance(providers, list) or not providers:
-        print("No custom providers configured.")
-        return
-
-    print("Remove a custom provider:\n")
-
-    choices = []
-    for entry in providers:
-        if isinstance(entry, dict):
-            name = entry.get("name", "unnamed")
-            url = entry.get("base_url", "")
-            short_url = url.replace("https://", "").replace("http://", "").rstrip("/")
-            choices.append(f"{name} ({short_url})")
-        else:
-            choices.append(str(entry))
-    choices.append("Cancel")
-
-    try:
-        from simple_term_menu import TerminalMenu
-        menu = TerminalMenu(
-            [f"  {c}" for c in choices], cursor_index=0,
-            menu_cursor="-> ", menu_cursor_style=("fg_red", "bold"),
-            menu_highlight_style=("fg_red",),
-            cycle_cursor=True, clear_screen=False,
-            title="Select provider to remove:",
-        )
-        idx = menu.show()
-        print()
-    except (ImportError, NotImplementedError):
-        for i, c in enumerate(choices, 1):
-            print(f"  {i}. {c}")
-        print()
-        try:
-            val = input(f"Choice [1-{len(choices)}]: ").strip()
-            idx = int(val) - 1 if val else None
-        except (ValueError, KeyboardInterrupt, EOFError):
-            idx = None
-
-    if idx is None or idx >= len(providers):
-        print("No change.")
-        return
-
-    removed = providers.pop(idx)
-    cfg["custom_providers"] = providers
-    save_config(cfg)
-    removed_name = removed.get("name", "unnamed") if isinstance(removed, dict) else str(removed)
-    print(f"✅ Removed \"{removed_name}\" from custom providers.")
-
-
-def _model_flow_named_custom(config, provider_info):
-    """Handle a named custom provider from config.yaml custom_providers list.
-
-    If the entry has a saved model name, activates it immediately.
-    Otherwise probes the endpoint's /models API to let the user pick one.
-    """
-    from hermes_cli.auth import _save_model_choice, deactivate_provider
-    from hermes_cli.config import save_env_value, load_config, save_config
-    from hermes_cli.models import fetch_api_models
-
-    name = provider_info["name"]
-    base_url = provider_info["base_url"]
-    api_key = provider_info.get("api_key", "")
-    saved_model = provider_info.get("model", "")
-
-    # If a model is saved, just activate immediately — no probing needed
-    if saved_model:
-        save_env_value("OPENAI_BASE_URL", base_url)
-        if api_key:
-            save_env_value("OPENAI_API_KEY", api_key)
-        _save_model_choice(saved_model)
-
-        cfg = load_config()
-        model = cfg.get("model")
-        if isinstance(model, dict):
-            model["provider"] = "custom"
-            model["base_url"] = base_url
-        save_config(cfg)
-        deactivate_provider()
-
-        print(f"✅ Switched to: {saved_model}")
-        print(f"   Provider: {name} ({base_url})")
-        return
-
-    # No saved model — probe endpoint and let user pick
-    print(f"  Provider: {name}")
-    print(f"  URL:      {base_url}")
-    print()
-    print("No model saved for this provider. Fetching available models...")
-    models = fetch_api_models(api_key, base_url, timeout=8.0)
-
-    if models:
-        print(f"Found {len(models)} model(s):\n")
-        try:
-            from simple_term_menu import TerminalMenu
-            menu_items = [f"  {m}" for m in models] + ["  Cancel"]
-            menu = TerminalMenu(
-                menu_items, cursor_index=0,
-                menu_cursor="-> ", menu_cursor_style=("fg_green", "bold"),
-                menu_highlight_style=("fg_green",),
-                cycle_cursor=True, clear_screen=False,
-                title=f"Select model from {name}:",
-            )
-            idx = menu.show()
-            print()
-            if idx is None or idx >= len(models):
-                print("Cancelled.")
-                return
-            model_name = models[idx]
-        except (ImportError, NotImplementedError):
-            for i, m in enumerate(models, 1):
-                print(f"  {i}. {m}")
-            print(f"  {len(models) + 1}. Cancel")
-            print()
-            try:
-                val = input(f"Choice [1-{len(models) + 1}]: ").strip()
-                if not val:
-                    print("Cancelled.")
-                    return
-                idx = int(val) - 1
-                if idx < 0 or idx >= len(models):
-                    print("Cancelled.")
-                    return
-                model_name = models[idx]
-            except (ValueError, KeyboardInterrupt, EOFError):
-                print("\nCancelled.")
-                return
-    else:
-        print("Could not fetch models from endpoint. Enter model name manually.")
-        try:
-            model_name = input("Model name: ").strip()
-        except (KeyboardInterrupt, EOFError):
-            print("\nCancelled.")
-            return
-        if not model_name:
-            print("No model specified. Cancelled.")
-            return
-
-    # Activate and save the model to the custom_providers entry
-    save_env_value("OPENAI_BASE_URL", base_url)
-    if api_key:
-        save_env_value("OPENAI_API_KEY", api_key)
-    _save_model_choice(model_name)
-
-    cfg = load_config()
-    model = cfg.get("model")
-    if isinstance(model, dict):
-        model["provider"] = "custom"
-        model["base_url"] = base_url
-    save_config(cfg)
-    deactivate_provider()
-
-    # Save model name to the custom_providers entry for next time
-    _save_custom_provider(base_url, api_key, model_name)
-
-    print(f"\n✅ Model set to: {model_name}")
-    print(f"   Provider: {name} ({base_url})")
-

 # Curated model lists for direct API-key providers
 _PROVIDER_MODELS = {
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -63,7 +63,7 @@ _PROVIDER_LABELS = {
    "kimi-coding": "Kimi / Moonshot",
    "minimax": "MiniMax",
    "minimax-cn": "MiniMax (China)",
-    "custom": "Custom endpoint",
+    "custom": "custom endpoint",
 }

 _PROVIDER_ALIASES = {
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -632,29 +632,6 @@ def setup_model_provider(config: dict):
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")

-        # Update config.yaml and deactivate any OAuth provider so the
-        # resolver doesn't keep returning the old provider (e.g. Codex).
-        try:
-            from hermes_cli.auth import deactivate_provider
-            deactivate_provider()
-        except Exception:
-            pass
-        import yaml
-        config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml"
-        try:
-            disk_cfg = {}
-            if config_path.exists():
-                disk_cfg = yaml.safe_load(config_path.read_text()) or {}
-            model_section = disk_cfg.get("model", {})
-            if isinstance(model_section, str):
-                model_section = {"default": model_section}
-            model_section["provider"] = "openrouter"
-            model_section.pop("base_url", None)  # OpenRouter uses default URL
-            disk_cfg["model"] = model_section
-            config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
-        except Exception as e:
-            logger.debug("Could not save provider to config.yaml: %s", e)
-
    elif provider_idx == 3:  # Custom endpoint
        selected_provider = "custom"
        print()
@@ -682,28 +659,6 @@ def setup_model_provider(config: dict):
        if model_name:
            config['model'] = model_name
            save_env_value("LLM_MODEL", model_name)
-
-        # Save provider and base_url to config.yaml so the gateway and CLI
-        # both resolve the correct provider without relying on env-var heuristics.
-        if base_url:
-            import yaml
-            config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml"
-            try:
-                disk_cfg = {}
-                if config_path.exists():
-                    disk_cfg = yaml.safe_load(config_path.read_text()) or {}
-                model_section = disk_cfg.get("model", {})
-                if isinstance(model_section, str):
-                    model_section = {"default": model_section}
-                model_section["provider"] = "custom"
-                model_section["base_url"] = base_url.rstrip("/")
-                if model_name:
-                    model_section["default"] = model_name
-                disk_cfg["model"] = model_section
-                config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
-            except Exception as e:
-                logger.debug("Could not save provider to config.yaml: %s", e)
-
        print_success("Custom endpoint configured")

    elif provider_idx == 4:  # Z.AI / GLM
--- a/run_agent.py
+++ b/run_agent.py
@@ -3834,27 +3834,6 @@ class AIAgent:
                else:
                    assistant_message = response.choices[0].message
                
-                # Normalize content to string — some OpenAI-compatible servers
-                # (llama-server, etc.) return content as a dict or list instead
-                # of a plain string, which crashes downstream .strip() calls.
-                if assistant_message.content is not None and not isinstance(assistant_message.content, str):
-                    raw = assistant_message.content
-                    if isinstance(raw, dict):
-                        assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
-                    elif isinstance(raw, list):
-                        # Multimodal content list — extract text parts
-                        parts = []
-                        for part in raw:
-                            if isinstance(part, str):
-                                parts.append(part)
-                            elif isinstance(part, dict) and part.get("type") == "text":
-                                parts.append(part.get("text", ""))
-                            elif isinstance(part, dict) and "text" in part:
-                                parts.append(str(part["text"]))
-                        assistant_message.content = "\n".join(parts)
-                    else:
-                        assistant_message.content = str(raw)
-
                # Handle assistant response
                if assistant_message.content and not self.quiet_mode:
                    print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
--- a/skills/creative/DESCRIPTION.md
+++ b/skills/creative/DESCRIPTION.md
@@ -1,3 +0,0 @@
---
-description: Creative content generation — ASCII art, hand-drawn style diagrams, and visual design tools.
---
--- a/skills/diagramming/excalidraw/SKILL.md
+++ b/skills/diagramming/excalidraw/SKILL.md
--- a/skills/diagramming/excalidraw/references/colors.md
+++ b/skills/diagramming/excalidraw/references/colors.md
--- a/skills/diagramming/excalidraw/references/dark-mode.md
+++ b/skills/diagramming/excalidraw/references/dark-mode.md
--- a/skills/diagramming/excalidraw/references/examples.md
+++ b/skills/diagramming/excalidraw/references/examples.md
--- a/skills/diagramming/excalidraw/scripts/upload.py
+++ b/skills/diagramming/excalidraw/scripts/upload.py
--- a/skills/research/domain-intel/SKILL.md
+++ b/skills/research/domain-intel/SKILL.md
--- a/skills/research/domain-intel/scripts/domain_intel.py
+++ b/skills/research/domain-intel/scripts/domain_intel.py
--- a/skills/research/blogwatcher/SKILL.md
+++ b/skills/research/blogwatcher/SKILL.md
--- a/skills/gaming/pokemon-player/SKILL.md
+++ b/skills/gaming/pokemon-player/SKILL.md
@@ -1,161 +0,0 @@
---
-name: pokemon-player
-description: Play Pokémon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal.
-tags: [gaming, pokemon, emulator, pyboy, gameplay, gameboy]
---
-# Pokémon Player
-
-Play Pokémon games via headless emulation using the `pokemon-agent` package.
-
-## When to Use
- User says "play pokemon", "start pokemon", "pokemon game"
- User asks about Pokemon Red, Blue, Yellow, FireRed, etc.
- User wants to watch an AI play Pokemon
- User references a ROM file (.gb, .gbc, .gba)
-
-## First-Time Setup
-
-### 1. Install the package
-```bash
-pip install pokemon-agent[dashboard] pyboy
-```
-
-### 2. Get the ROM
-Ask the user for their ROM file path. Do NOT attempt to download ROMs.
-
-### 3. Start the game server
-```bash
-pokemon-agent serve --rom <ROM_PATH> --port 8765 &
-```
-Wait 3 seconds, then verify:
-```bash
-curl -s http://localhost:8765/health
-```
-
-## The Gameplay Loop
-
-### Step 1: OBSERVE
-```bash
-curl -s http://localhost:8765/state
-```
-
-### Step 2: ORIENT
- Dialog active → advance text
- In battle → fight
- Party hurt → heal
- Near objective → navigate
-
-### Step 3: DECIDE
-Priority order:
-1. If dialog active → a_until_dialog_end
-2. If in battle → choose best move
-3. If any Pokemon <20% HP → Pokémon Center
-4. If near story objective → navigate to it
-5. If underleveled → train in grass
-6. Otherwise → explore
-
-### Step 4: ACT
-```bash
-curl -s -X POST http://localhost:8765/action \
-  -H "Content-Type: application/json" \
-  -d '{"actions": ["walk_up", "walk_up", "press_a"]}'
-```
-
-Action reference:
- press_a — confirm, talk, select
- press_b — cancel, close menu
- press_start — open game menu
- walk_up/down/left/right — move one tile
- a_until_dialog_end — advance all dialog
- wait_60 — wait ~1 second
-
-### Step 5: VERIFY
-Check state_after in the response. If stuck 3+ turns:
-1. Press B several times
-2. Try different directions
-3. Take screenshot and use vision_analyze
-4. Load last save if truly stuck
-
-### Step 6: RECORD
-```
-memory add: PKM:OBJECTIVE: Heading to Pewter City to challenge Brock
-memory add: PKM:PROGRESS: Got Squirtle, Got Pokedex, → Pewter City
-```
-
-### Step 7: SAVE
-Save every 20-30 turns and ALWAYS before gym battles:
-```bash
-curl -s -X POST http://localhost:8765/save \
-  -H "Content-Type: application/json" \
-  -d '{"name": "before_brock"}'
-```
-
-## Battle Strategy
-
-### Decision Tree
-1. Want to catch? → Weaken then throw Poké Ball
-2. Wild you don't need? → RUN
-3. Type advantage? → Use super-effective move
-4. No advantage? → Use strongest STAB move
-5. Low HP? → Switch or use Potion
-
-### Type Chart
- Water beats Fire, Ground, Rock
- Fire beats Grass, Bug, Ice
- Grass beats Water, Ground, Rock
- Electric beats Water, Flying
- Ground beats Fire, Electric, Rock, Poison
- Psychic beats Fighting, Poison (dominant in Gen 1!)
-
-### Gen 1 Quirks
- Special stat is both offense AND defense for special moves
- Psychic is overpowered (Ghost moves bugged)
- Critical hits based on Speed stat
- Wrap/Bind prevent opponent from acting
-
-## Memory Conventions
-| Prefix | Purpose | Example |
-|--------|---------|---------|
-| PKM:OBJECTIVE | Current goal | Defeat Brock in Pewter City |
-| PKM:MAP | Navigation knowledge | Viridian Forest: go north |
-| PKM:STRATEGY | Battle/team plans | Need Grass type before Misty |
-| PKM:PROGRESS | Milestone tracker | ✓ Boulder Badge → Cascade Badge |
-| PKM:STUCK | Stuck situations | Got stuck in Cerulean Cave |
-| PKM:TEAM | Team notes | Squirtle is Water/Ice coverage |
-
-## Progression Milestones
- ☐ Choose starter
- ☐ Deliver Oak's Parcel → receive Pokédex
- ☐ Boulder Badge — Brock (Rock) → use Water/Grass
- ☐ Cascade Badge — Misty (Water) → use Grass/Electric
- ☐ Thunder Badge — Lt. Surge (Electric) → use Ground
- ☐ Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying
- ☐ Soul Badge — Koga (Poison) → use Ground/Psychic
- ☐ Marsh Badge — Sabrina (Psychic)
- ☐ Volcano Badge — Blaine (Fire) → use Water/Ground
- ☐ Earth Badge — Giovanni (Ground) → use Water/Grass/Ice
- ☐ Elite Four → Champion!
-
-## Stopping Play
-1. Save the game:
-```bash
-curl -s -X POST http://localhost:8765/save \
-  -d '{"name": "session_end"}'
-```
-2. Update memory with progress
-3. Tell user: "Game saved! Say 'play pokemon' to resume."
-4. Kill the background server process
-
-## Dashboard
-If `pokemon-agent[dashboard]` is installed, open:
-http://localhost:8765/dashboard
-
-Live features: game screen, AI reasoning stream, team status, action log.
-
-## Pitfalls
- NEVER download or provide ROM files — always ask the user
- Don't send more than 15 actions per /action call
- Always wait for dialog to clear before moving
- Save BEFORE gym battles
- Take screenshots sparingly — they cost vision tokens
- Verify server is running with /health before any commands
--- a/skills/media/gif-search/SKILL.md
+++ b/skills/media/gif-search/SKILL.md
--- a/skills/leisure/find-nearby/SKILL.md
+++ b/skills/leisure/find-nearby/SKILL.md
@@ -1,69 +0,0 @@
---
-name: find-nearby
-description: Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed.
-version: 1.0.0
-metadata:
-  hermes:
-    tags: [location, maps, nearby, places, restaurants, local]
-    related_skills: []
---
-
-# Find Nearby — Local Place Discovery
-
-Find restaurants, cafes, bars, pharmacies, and other places near any location. Uses OpenStreetMap (free, no API keys). Works with:
-
- **Coordinates** from Telegram location pins (latitude/longitude in conversation)
- **Addresses** ("near 123 Main St, Springfield")
- **Cities** ("restaurants in downtown Austin")
- **Zip codes** ("pharmacies near 90210")
- **Landmarks** ("cafes near Times Square")
-
-## Quick Reference
-
-```bash
-# By coordinates (from Telegram location pin or user-provided)
-python3 SKILL_DIR/scripts/find_nearby.py --lat <LAT> --lon <LON> --type restaurant --radius 1500
-
-# By address, city, or landmark (auto-geocoded)
-python3 SKILL_DIR/scripts/find_nearby.py --near "Times Square, New York" --type cafe
-
-# Multiple place types
-python3 SKILL_DIR/scripts/find_nearby.py --near "downtown austin" --type restaurant --type bar --limit 10
-
-# JSON output
-python3 SKILL_DIR/scripts/find_nearby.py --near "90210" --type pharmacy --json
-```
-
-### Parameters
-
-| Flag | Description | Default |
-|------|-------------|---------|
-| `--lat`, `--lon` | Exact coordinates | — |
-| `--near` | Address, city, zip, or landmark (geocoded) | — |
-| `--type` | Place type (repeatable for multiple) | restaurant |
-| `--radius` | Search radius in meters | 1500 |
-| `--limit` | Max results | 15 |
-| `--json` | Machine-readable JSON output | off |
-
-### Common Place Types
-
-`restaurant`, `cafe`, `bar`, `pub`, `fast_food`, `pharmacy`, `hospital`, `bank`, `atm`, `fuel`, `parking`, `supermarket`, `convenience`, `hotel`
-
-## Workflow
-
-1. **Get the location.** Look for coordinates (`latitude: ... / longitude: ...`) from a Telegram pin, or ask the user for an address/city/zip.
-
-2. **Ask for preferences** (only if not already stated): place type, how far they're willing to go, any specifics (cuisine, "open now", etc.).
-
-3. **Run the script** with appropriate flags. Use `--json` if you need to process results programmatically.
-
-4. **Present results** with names, distances, and Google Maps links. If the user asked about hours or "open now," check the `hours` field in results — if missing or unclear, verify with `web_search`.
-
-5. **For directions**, use the `directions_url` from results, or construct: `https://www.google.com/maps/dir/?api=1&origin=<LAT>,<LON>&destination=<LAT>,<LON>`
-
-## Tips
-
- If results are sparse, widen the radius (1500 → 3000m)
- For "open now" requests: check the `hours` field in results, cross-reference with `web_search` for accuracy since OSM hours aren't always complete
- Zip codes alone can be ambiguous globally — prompt the user for country/state if results look wrong
- The script uses OpenStreetMap data which is community-maintained; coverage varies by region
--- a/skills/leisure/find-nearby/scripts/find_nearby.py
+++ b/skills/leisure/find-nearby/scripts/find_nearby.py
@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-"""Find nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed.
-
-Usage:
-    # By coordinates
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --radius 1500
-
-    # By address/city/zip (auto-geocoded)
-    python find_nearby.py --near "Times Square, New York" --type cafe --radius 1000
-    python find_nearby.py --near "90210" --type pharmacy
-
-    # Multiple types
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --type bar
-
-    # JSON output for programmatic use
-    python find_nearby.py --near "downtown las vegas" --type restaurant --json
-"""
-
-import argparse
-import json
-import math
-import sys
-import urllib.parse
-import urllib.request
-from typing import Any
-
-OVERPASS_URLS = [
-    "https://overpass-api.de/api/interpreter",
-    "https://overpass.kumi.systems/api/interpreter",
-]
-NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
-USER_AGENT = "HermesAgent/1.0 (find-nearby skill)"
-TIMEOUT = 15
-
-
-def _http_get(url: str) -> Any:
-    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def _http_post(url: str, data: str) -> Any:
-    req = urllib.request.Request(
-        url, data=data.encode(), headers={"User-Agent": USER_AGENT}
-    )
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
-    """Distance in meters between two coordinates."""
-    R = 6_371_000
-    rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
-    dlat = math.radians(lat2 - lat1)
-    dlon = math.radians(lon2 - lon1)
-    a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
-    return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
-
-
-def geocode(query: str) -> tuple[float, float]:
-    """Convert address/city/zip to coordinates via Nominatim."""
-    params = urllib.parse.urlencode({"q": query, "format": "json", "limit": 1})
-    results = _http_get(f"{NOMINATIM_URL}?{params}")
-    if not results:
-        print(f"Error: Could not geocode '{query}'. Try a more specific address.", file=sys.stderr)
-        sys.exit(1)
-    return float(results[0]["lat"]), float(results[0]["lon"])
-
-
-def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, limit: int = 15) -> list[dict]:
-    """Query Overpass for nearby amenities."""
-    # Build Overpass QL query
-    type_filters = "".join(
-        f'nwr["amenity"="{t}"](around:{radius},{lat},{lon});' for t in types
-    )
-    query = f"[out:json][timeout:{TIMEOUT}];({type_filters});out center tags;"
-
-    # Try each Overpass server
-    data = None
-    for url in OVERPASS_URLS:
-        try:
-            data = _http_post(url, f"data={urllib.parse.quote(query)}")
-            break
-        except Exception:
-            continue
-
-    if not data:
-        return []
-
-    # Parse results
-    places = []
-    for el in data.get("elements", []):
-        tags = el.get("tags", {})
-        name = tags.get("name")
-        if not name:
-            continue
-
-        # Get coordinates (nodes have lat/lon directly, ways/relations use center)
-        plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
-        plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
-        if not plat or not plon:
-            continue
-
-        dist = haversine(lat, lon, plat, plon)
-
-        place = {
-            "name": name,
-            "type": tags.get("amenity", ""),
-            "distance_m": round(dist),
-            "lat": plat,
-            "lon": plon,
-            "maps_url": f"https://www.google.com/maps/search/?api=1&query={plat},{plon}",
-            "directions_url": f"https://www.google.com/maps/dir/?api=1&origin={lat},{lon}&destination={plat},{plon}",
-        }
-
-        # Add useful optional fields
-        if tags.get("cuisine"):
-            place["cuisine"] = tags["cuisine"]
-        if tags.get("opening_hours"):
-            place["hours"] = tags["opening_hours"]
-        if tags.get("phone"):
-            place["phone"] = tags["phone"]
-        if tags.get("website"):
-            place["website"] = tags["website"]
-        if tags.get("addr:street"):
-            addr_parts = [tags.get("addr:housenumber", ""), tags.get("addr:street", "")]
-            if tags.get("addr:city"):
-                addr_parts.append(tags["addr:city"])
-            place["address"] = " ".join(p for p in addr_parts if p)
-
-        places.append(place)
-
-    # Sort by distance, limit results
-    places.sort(key=lambda p: p["distance_m"])
-    return places[:limit]
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Find nearby places via OpenStreetMap")
-    parser.add_argument("--lat", type=float, help="Latitude")
-    parser.add_argument("--lon", type=float, help="Longitude")
-    parser.add_argument("--near", type=str, help="Address, city, or zip code (geocoded automatically)")
-    parser.add_argument("--type", action="append", dest="types", default=[], help="Place type (restaurant, cafe, bar, pharmacy, etc.)")
-    parser.add_argument("--radius", type=int, default=1500, help="Search radius in meters (default: 1500)")
-    parser.add_argument("--limit", type=int, default=15, help="Max results (default: 15)")
-    parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
-    args = parser.parse_args()
-
-    # Resolve coordinates
-    if args.near:
-        lat, lon = geocode(args.near)
-    elif args.lat is not None and args.lon is not None:
-        lat, lon = args.lat, args.lon
-    else:
-        print("Error: Provide --lat/--lon or --near", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.types:
-        args.types = ["restaurant"]
-
-    places = find_nearby(lat, lon, args.types, args.radius, args.limit)
-
-    if args.json_output:
-        print(json.dumps({"origin": {"lat": lat, "lon": lon}, "results": places, "count": len(places)}, indent=2))
-    else:
-        if not places:
-            print(f"No {'/'.join(args.types)} found within {args.radius}m")
-            return
-        print(f"Found {len(places)} places within {args.radius}m:\n")
-        for i, p in enumerate(places, 1):
-            dist_str = f"{p['distance_m']}m" if p["distance_m"] < 1000 else f"{p['distance_m']/1000:.1f}km"
-            print(f"  {i}. {p['name']} ({p['type']}) — {dist_str}")
-            if p.get("cuisine"):
-                print(f"     Cuisine: {p['cuisine']}")
-            if p.get("hours"):
-                print(f"     Hours: {p['hours']}")
-            if p.get("address"):
-                print(f"     Address: {p['address']}")
-            print(f"     Map: {p['maps_url']}")
-            print()
-
-
-if __name__ == "__main__":
-    main()
--- a/skills/market-data/polymarket/SKILL.md
+++ b/skills/market-data/polymarket/SKILL.md
--- a/skills/market-data/polymarket/references/api-endpoints.md
+++ b/skills/market-data/polymarket/references/api-endpoints.md
--- a/skills/market-data/polymarket/scripts/polymarket.py
+++ b/skills/market-data/polymarket/scripts/polymarket.py
--- a/skills/mcp/native-mcp/SKILL.md
+++ b/skills/mcp/native-mcp/SKILL.md
@@ -321,32 +321,6 @@ mcp_servers:

 All tools from all servers are registered and available simultaneously. Each server's tools are prefixed with its name to avoid collisions.

-## Sampling (Server-Initiated LLM Requests)
-
-Hermes supports MCP's `sampling/createMessage` capability — MCP servers can request LLM completions through the agent during tool execution. This enables agent-in-the-loop workflows (data analysis, content generation, decision-making).
-
-Sampling is **enabled by default**. Configure per server:
-
-```yaml
-mcp_servers:
-  my_server:
-    command: "npx"
-    args: ["-y", "my-mcp-server"]
-    sampling:
-      enabled: true           # default: true
-      model: "gemini-3-flash" # model override (optional)
-      max_tokens_cap: 4096    # max tokens per request
-      timeout: 30             # LLM call timeout (seconds)
-      max_rpm: 10             # max requests per minute
-      allowed_models: []      # model whitelist (empty = all)
-      max_tool_rounds: 5      # tool loop limit (0 = disable)
-      log_level: "info"       # audit verbosity
-```
-
-Servers can also include `tools` in sampling requests for multi-turn tool-augmented workflows. The `max_tool_rounds` config prevents infinite tool loops. Per-server audit metrics (requests, errors, tokens, tool use count) are tracked via `get_mcp_status()`.
-
-Disable sampling for untrusted servers with `sampling: { enabled: false }`.
-
 ## Notes

 - MCP tools are called synchronously from the agent's perspective but run asynchronously on a dedicated background event loop
--- a/skills/media/DESCRIPTION.md
+++ b/skills/media/DESCRIPTION.md
@@ -1,3 +1 @@
---
-description: Skills for working with media content — YouTube transcripts, GIF search, music generation, and audio visualization.
---
+Media content extraction and transformation tools — YouTube transcripts, audio, video processing.
--- a/skills/mlops/training/accelerate/SKILL.md
+++ b/skills/mlops/training/accelerate/SKILL.md
--- a/skills/mlops/training/accelerate/references/custom-plugins.md
+++ b/skills/mlops/training/accelerate/references/custom-plugins.md
--- a/skills/mlops/training/accelerate/references/megatron-integration.md
+++ b/skills/mlops/training/accelerate/references/megatron-integration.md
--- a/skills/mlops/training/accelerate/references/performance.md
+++ b/skills/mlops/training/accelerate/references/performance.md
--- a/skills/mlops/models/audiocraft/SKILL.md
+++ b/skills/mlops/models/audiocraft/SKILL.md
--- a/skills/mlops/models/audiocraft/references/advanced-usage.md
+++ b/skills/mlops/models/audiocraft/references/advanced-usage.md
--- a/skills/mlops/models/audiocraft/references/troubleshooting.md
+++ b/skills/mlops/models/audiocraft/references/troubleshooting.md
--- a/skills/mlops/training/axolotl/SKILL.md
+++ b/skills/mlops/training/axolotl/SKILL.md
--- a/skills/mlops/training/axolotl/references/api.md
+++ b/skills/mlops/training/axolotl/references/api.md
--- a/skills/mlops/training/axolotl/references/dataset-formats.md
+++ b/skills/mlops/training/axolotl/references/dataset-formats.md
--- a/skills/mlops/training/axolotl/references/index.md
+++ b/skills/mlops/training/axolotl/references/index.md
--- a/skills/mlops/training/axolotl/references/other.md
+++ b/skills/mlops/training/axolotl/references/other.md
--- a/skills/mlops/vector-databases/chroma/SKILL.md
+++ b/skills/mlops/vector-databases/chroma/SKILL.md
--- a/skills/mlops/vector-databases/chroma/references/integration.md
+++ b/skills/mlops/vector-databases/chroma/references/integration.md
--- a/skills/mlops/models/clip/SKILL.md
+++ b/skills/mlops/models/clip/SKILL.md
--- a/skills/mlops/models/clip/references/applications.md
+++ b/skills/mlops/models/clip/references/applications.md
--- a/skills/mlops/cloud/DESCRIPTION.md
+++ b/skills/mlops/cloud/DESCRIPTION.md
@@ -1,3 +0,0 @@
---
-description: GPU cloud providers and serverless compute platforms for ML workloads.
---
--- a/skills/software-development/code-review/SKILL.md
+++ b/skills/software-development/code-review/SKILL.md
--- a/skills/mlops/research/dspy/SKILL.md
+++ b/skills/mlops/research/dspy/SKILL.md
--- a/skills/mlops/research/dspy/references/examples.md
+++ b/skills/mlops/research/dspy/references/examples.md
--- a/skills/mlops/research/dspy/references/modules.md
+++ b/skills/mlops/research/dspy/references/modules.md
--- a/skills/mlops/research/dspy/references/optimizers.md
+++ b/skills/mlops/research/dspy/references/optimizers.md
--- a/skills/mlops/evaluation/DESCRIPTION.md
+++ b/skills/mlops/evaluation/DESCRIPTION.md
@@ -1,3 +0,0 @@
---
-description: Model evaluation benchmarks, experiment tracking, data curation, tokenizers, and interpretability tools.
---
--- a/skills/mlops/vector-databases/faiss/SKILL.md
+++ b/skills/mlops/vector-databases/faiss/SKILL.md
--- a/skills/mlops/vector-databases/faiss/references/index_types.md
+++ b/skills/mlops/vector-databases/faiss/references/index_types.md
--- a/skills/mlops/training/flash-attention/SKILL.md
+++ b/skills/mlops/training/flash-attention/SKILL.md
--- a/skills/mlops/training/flash-attention/references/benchmarks.md
+++ b/skills/mlops/training/flash-attention/references/benchmarks.md
--- a/skills/mlops/training/flash-attention/references/transformers-integration.md
+++ b/skills/mlops/training/flash-attention/references/transformers-integration.md
--- a/skills/mlops/inference/gguf/SKILL.md
+++ b/skills/mlops/inference/gguf/SKILL.md
--- a/skills/mlops/inference/gguf/references/advanced-usage.md
+++ b/skills/mlops/inference/gguf/references/advanced-usage.md
--- a/skills/mlops/inference/gguf/references/troubleshooting.md
+++ b/skills/mlops/inference/gguf/references/troubleshooting.md
--- a/skills/mlops/training/grpo-rl-training/README.md
+++ b/skills/mlops/training/grpo-rl-training/README.md
--- a/skills/mlops/training/grpo-rl-training/SKILL.md
+++ b/skills/mlops/training/grpo-rl-training/SKILL.md
--- a/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py
+++ b/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py
--- a/skills/mlops/inference/guidance/SKILL.md
+++ b/skills/mlops/inference/guidance/SKILL.md
--- a/skills/mlops/inference/guidance/references/backends.md
+++ b/skills/mlops/inference/guidance/references/backends.md
--- a/skills/mlops/inference/guidance/references/constraints.md
+++ b/skills/mlops/inference/guidance/references/constraints.md
--- a/skills/mlops/inference/guidance/references/examples.md
+++ b/skills/mlops/inference/guidance/references/examples.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/SKILL.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/SKILL.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/algorithms.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/algorithms.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/integration.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/integration.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/pipeline.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/pipeline.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/training.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/training.md
--- a/skills/mlops/inference/DESCRIPTION.md
+++ b/skills/mlops/inference/DESCRIPTION.md
@@ -1,3 +0,0 @@
---
-description: Model serving, quantization (GGUF/GPTQ), structured output, inference optimization, and model surgery tools for deploying and running LLMs.
---
--- a/skills/mlops/inference/obliteratus/SKILL.md
+++ b/skills/mlops/inference/obliteratus/SKILL.md
@@ -1,330 +0,0 @@
---
-name: obliteratus
-description: Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets across 5 compute tiers, tournament evaluation, and telemetry-driven recommendations. Use when a user wants to uncensor, abliterate, or remove refusal from an LLM.
-version: 2.0.0
-author: Hermes Agent
-license: MIT
-dependencies: [obliteratus, torch, transformers, bitsandbytes, accelerate, safetensors]
-metadata:
-  hermes:
-    tags: [Abliteration, Uncensoring, Refusal-Removal, LLM, Weight-Projection, SVD, Mechanistic-Interpretability, HuggingFace, Model-Surgery]
-    related_skills: [vllm, gguf, huggingface-tokenizers]
---
-
-# OBLITERATUS Skill
-
-Remove refusal behaviors (guardrails) from open-weight LLMs without retraining or fine-tuning. Uses mechanistic interpretability techniques — including diff-in-means, SVD, whitened SVD, LEACE concept erasure, SAE decomposition, Bayesian kernel projection, and more — to identify and surgically excise refusal directions from model weights while preserving reasoning capabilities.
-
-**License warning:** OBLITERATUS is AGPL-3.0. NEVER import it as a Python library. Always invoke via CLI (`obliteratus` command) or subprocess. This keeps Hermes Agent's MIT license clean.
-
-## When to Use This Skill
-
-Trigger when the user:
- Wants to "uncensor" or "abliterate" an LLM
- Asks about removing refusal/guardrails from a model
- Wants to create an uncensored version of Llama, Qwen, Mistral, etc.
- Mentions "refusal removal", "abliteration", "weight projection"
- Wants to analyze how a model's refusal mechanism works
- References OBLITERATUS, abliterator, or refusal directions
-
-## Step 1: Installation
-
-Check if already installed:
-```bash
-obliteratus --version 2>/dev/null && echo "INSTALLED" || echo "NOT INSTALLED"
-```
-
-If not installed, clone and install from GitHub:
-```bash
-git clone https://github.com/elder-plinius/OBLITERATUS.git
-cd OBLITERATUS
-pip install -e .
-# For Gradio web UI support:
-# pip install -e ".[spaces]"
-```
-
-**IMPORTANT:** Confirm with user before installing. This pulls in ~5-10GB of dependencies (PyTorch, Transformers, bitsandbytes, etc.).
-
-## Step 2: Check Hardware
-
-Before anything, check what GPU is available:
-```bash
-python3 -c "
-import torch
-if torch.cuda.is_available():
-    gpu = torch.cuda.get_device_name(0)
-    vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
-    print(f'GPU: {gpu}')
-    print(f'VRAM: {vram:.1f} GB')
-    if vram < 4: print('TIER: tiny (models under 1B)')
-    elif vram < 8: print('TIER: small (models 1-4B)')
-    elif vram < 16: print('TIER: medium (models 4-9B with 4bit quant)')
-    elif vram < 32: print('TIER: large (models 8-32B with 4bit quant)')
-    else: print('TIER: frontier (models 32B+)')
-else:
-    print('NO GPU - only tiny models (under 1B) on CPU')
-"
-```
-
-### VRAM Requirements (with 4-bit quantization)
-
-| VRAM     | Max Model Size  | Example Models                              |
-|:---------|:----------------|:--------------------------------------------|
-| CPU only | ~1B params      | GPT-2, TinyLlama, SmolLM                    |
-| 4-8 GB   | ~4B params      | Qwen2.5-1.5B, Phi-3.5 mini, Llama 3.2 3B   |
-| 8-16 GB  | ~9B params      | Llama 3.1 8B, Mistral 7B, Gemma 2 9B       |
-| 24 GB    | ~32B params     | Qwen3-32B, Llama 3.1 70B (tight), Command-R |
-| 48 GB+   | ~72B+ params    | Qwen2.5-72B, DeepSeek-R1                    |
-| Multi-GPU| 200B+ params    | Llama 3.1 405B, DeepSeek-V3 (685B MoE)      |
-
-## Step 3: Browse Available Models & Get Recommendations
-
-```bash
-# Browse models by compute tier
-obliteratus models --tier medium
-
-# Get architecture info for a specific model
-obliteratus info <model_name>
-
-# Get telemetry-driven recommendation for best method & params
-obliteratus recommend <model_name>
-obliteratus recommend <model_name> --insights  # global cross-architecture rankings
-```
-
-## Step 4: Choose a Method
-
-### Method Selection Guide
-**Default / recommended for most cases: `advanced`.** It uses multi-direction SVD with norm-preserving projection and is well-tested.
-
-| Situation                         | Recommended Method | Why                                      |
-|:----------------------------------|:-------------------|:-----------------------------------------|
-| Default / most models             | `advanced`         | Multi-direction SVD, norm-preserving, reliable |
-| Quick test / prototyping          | `basic`            | Fast, simple, good enough to evaluate    |
-| Dense model (Llama, Mistral)      | `advanced`         | Multi-direction, norm-preserving         |
-| MoE model (DeepSeek, Mixtral)     | `nuclear`          | Expert-granular, handles MoE complexity  |
-| Reasoning model (R1 distills)     | `surgical`         | CoT-aware, preserves chain-of-thought    |
-| Stubborn refusals persist         | `aggressive`       | Whitened SVD + head surgery + jailbreak   |
-| Want reversible changes           | Use steering vectors (see Analysis section) |
-| Maximum quality, time no object   | `optimized`        | Bayesian search for best parameters      |
-| Experimental auto-detection       | `informed`         | Auto-detects alignment type — experimental, may not always outperform advanced |
-
-### 9 CLI Methods
- **basic** — Single refusal direction via diff-in-means. Fast (~5-10 min for 8B).
- **advanced** (DEFAULT, RECOMMENDED) — Multiple SVD directions, norm-preserving projection, 2 refinement passes. Medium speed (~10-20 min).
- **aggressive** — Whitened SVD + jailbreak-contrastive + attention head surgery. Higher risk of coherence damage.
- **spectral_cascade** — DCT frequency-domain decomposition. Research/novel approach.
- **informed** — Runs analysis DURING abliteration to auto-configure. Experimental — slower and less predictable than advanced.
- **surgical** — SAE features + neuron masking + head surgery + per-expert. Very slow (~1-2 hrs). Best for reasoning models.
- **optimized** — Bayesian hyperparameter search (Optuna TPE). Longest runtime but finds optimal parameters.
- **inverted** — Flips the refusal direction. Model becomes actively willing.
- **nuclear** — Maximum force combo for stubborn MoE models. Expert-granular.
-
-### Direction Extraction Methods (--direction-method flag)
- **diff_means** (default) — Simple difference-in-means between refused/complied activations. Robust.
- **svd** — Multi-direction SVD extraction. Better for complex alignment.
- **leace** — LEACE (Linear Erasure via Closed-form Estimation). Optimal linear erasure.
-
-### 4 Python-API-Only Methods
-(NOT available via CLI — require Python import, which violates AGPL boundary. Mention to user only if they explicitly want to use OBLITERATUS as a library in their own AGPL project.)
- failspy, gabliteration, heretic, rdo
-
-## Step 5: Run Abliteration
-
-### Standard usage
-```bash
-# Default method (advanced) — recommended for most models
-obliteratus obliterate <model_name> --method advanced --output-dir ./abliterated-models
-
-# With 4-bit quantization (saves VRAM)
-obliteratus obliterate <model_name> --method advanced --quantization 4bit --output-dir ./abliterated-models
-
-# Large models (70B+) — conservative defaults
-obliteratus obliterate <model_name> --method advanced --quantization 4bit --large-model --output-dir ./abliterated-models
-```
-
-### Fine-tuning parameters
-```bash
-obliteratus obliterate <model_name> \
-  --method advanced \
-  --direction-method diff_means \
-  --n-directions 4 \
-  --refinement-passes 2 \
-  --regularization 0.1 \
-  --quantization 4bit \
-  --output-dir ./abliterated-models \
-  --contribute  # opt-in telemetry for community research
-```
-
-### Key flags
-| Flag | Description | Default |
-|:-----|:------------|:--------|
-| `--method` | Abliteration method | advanced |
-| `--direction-method` | Direction extraction | diff_means |
-| `--n-directions` | Number of refusal directions (1-32) | method-dependent |
-| `--refinement-passes` | Iterative passes (1-5) | 2 |
-| `--regularization` | Regularization strength (0.0-1.0) | 0.1 |
-| `--quantization` | Load in 4bit or 8bit | none (full precision) |
-| `--large-model` | Conservative defaults for 120B+ | false |
-| `--output-dir` | Where to save the abliterated model | ./obliterated_model |
-| `--contribute` | Share anonymized results for research | false |
-| `--verify-sample-size` | Number of test prompts for refusal check | 20 |
-| `--dtype` | Model dtype (float16, bfloat16) | auto |
-
-### Other execution modes
-```bash
-# Interactive guided mode (hardware → model → preset)
-obliteratus interactive
-
-# Web UI (Gradio)
-obliteratus ui --port 7860
-
-# Run a full ablation study from YAML config
-obliteratus run config.yaml --preset quick
-
-# Tournament: pit all methods against each other
-obliteratus tourney <model_name>
-```
-
-## Step 6: Verify Results
-
-After abliteration, check the output metrics:
-
-| Metric | Good Value | Warning |
-|:-------|:-----------|:--------|
-| Refusal rate | < 5% (ideally ~0%) | > 10% means refusals persist |
-| Perplexity change | < 10% increase | > 15% means coherence damage |
-| KL divergence | < 0.1 | > 0.5 means significant distribution shift |
-| Coherence | High / passes qualitative check | Degraded responses, repetition |
-
-### If refusals persist (> 10%)
-1. Try `aggressive` method
-2. Increase `--n-directions` (e.g., 8 or 16)
-3. Add `--refinement-passes 3`
-4. Try `--direction-method svd` instead of diff_means
-
-### If coherence is damaged (perplexity > 15% increase)
-1. Reduce `--n-directions` (try 2)
-2. Increase `--regularization` (try 0.3)
-3. Reduce `--refinement-passes` to 1
-4. Try `basic` method (gentler)
-
-## Step 7: Use the Abliterated Model
-
-The output is a standard HuggingFace model directory.
-
-```bash
-# Test locally with transformers
-python3 -c "
-from transformers import AutoModelForCausalLM, AutoTokenizer
-model = AutoModelForCausalLM.from_pretrained('./abliterated-models/<model>')
-tokenizer = AutoTokenizer.from_pretrained('./abliterated-models/<model>')
-inputs = tokenizer('How do I pick a lock?', return_tensors='pt')
-outputs = model.generate(**inputs, max_new_tokens=200)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))
-"
-
-# Upload to HuggingFace Hub
-huggingface-cli upload <username>/<model-name>-abliterated ./abliterated-models/<model>
-
-# Serve with vLLM
-vllm serve ./abliterated-models/<model>
-```
-
-## CLI Command Reference
-
-| Command | Description |
-|:--------|:------------|
-| `obliteratus obliterate` | Main abliteration command |
-| `obliteratus info <model>` | Print model architecture details |
-| `obliteratus models --tier <tier>` | Browse curated models by compute tier |
-| `obliteratus recommend <model>` | Telemetry-driven method/param suggestion |
-| `obliteratus interactive` | Guided setup wizard |
-| `obliteratus tourney <model>` | Tournament: all methods head-to-head |
-| `obliteratus run <config.yaml>` | Execute ablation study from YAML |
-| `obliteratus strategies` | List all registered ablation strategies |
-| `obliteratus report <results.json>` | Regenerate visual reports |
-| `obliteratus ui` | Launch Gradio web interface |
-| `obliteratus aggregate` | Summarize community telemetry data |
-
-## Analysis Modules
-
-OBLITERATUS includes 28 analysis modules for mechanistic interpretability.
-See `skill_view(name="obliteratus", file_path="references/analysis-modules.md")` for the full reference.
-
-### Quick analysis commands
-```bash
-# Run specific analysis modules
-obliteratus run analysis-config.yaml --preset quick
-
-# Key modules to run first:
-# - alignment_imprint: Fingerprint DPO/RLHF/CAI/SFT alignment method
-# - concept_geometry: Single direction vs polyhedral cone
-# - logit_lens: Which layer decides to refuse
-# - anti_ouroboros: Self-repair risk score
-# - causal_tracing: Causally necessary components
-```
-
-### Steering Vectors (Reversible Alternative)
-Instead of permanent weight modification, use inference-time steering:
-```python
-# Python API only — for user's own projects
-from obliteratus.analysis.steering_vectors import SteeringVectorFactory, SteeringHookManager
-```
-
-## Ablation Strategies
-
-Beyond direction-based abliteration, OBLITERATUS includes structural ablation strategies:
- **Embedding Ablation** — Target embedding layer components
- **FFN Ablation** — Feed-forward network block removal
- **Head Pruning** — Attention head pruning
- **Layer Removal** — Full layer removal
-
-List all available: `obliteratus strategies`
-
-## Evaluation
-
-OBLITERATUS includes built-in evaluation tools:
- Refusal rate benchmarking
- Perplexity comparison (before/after)
- LM Eval Harness integration for academic benchmarks
- Head-to-head competitor comparison
- Baseline performance tracking
-
-## Platform Support
-
- **CUDA** — Full support (NVIDIA GPUs)
- **Apple Silicon (MLX)** — Supported via MLX backend
- **CPU** — Supported for tiny models (< 1B params)
-
-## YAML Config Templates
-
-Load templates for reproducible runs via `skill_view`:
- `templates/abliteration-config.yaml` — Standard single-model config
- `templates/analysis-study.yaml` — Pre-abliteration analysis study
- `templates/batch-abliteration.yaml` — Multi-model batch processing
-
-## Telemetry
-
-OBLITERATUS can optionally contribute anonymized run data to a global research dataset.
-Enable with `--contribute` flag. No personal data is collected — only model name, method, metrics.
-
-## Common Pitfalls
-
-1. **Don't use `informed` as default** — it's experimental and slower. Use `advanced` for reliable results.
-2. **Models under ~1B respond poorly to abliteration** — their refusal behaviors are shallow and fragmented, making clean direction extraction difficult. Expect partial results (20-40% remaining refusal). Models 3B+ have cleaner refusal directions and respond much better (often 0% refusal with `advanced`).
-3. **`aggressive` can make things worse** — on small models it can damage coherence and actually increase refusal rate. Only use it if `advanced` leaves > 10% refusals on a 3B+ model.
-4. **Always check perplexity** — if it spikes > 15%, the model is damaged. Reduce aggressiveness.
-5. **MoE models need special handling** — use `nuclear` method for Mixtral, DeepSeek-MoE, etc.
-6. **Quantized models can't be re-quantized** — abliterate the full-precision model, then quantize the output.
-7. **VRAM estimation is approximate** — 4-bit quant helps but peak usage can spike during extraction.
-8. **Reasoning models are sensitive** — use `surgical` for R1 distills to preserve chain-of-thought.
-9. **Check `obliteratus recommend`** — telemetry data may have better parameters than defaults.
-10. **AGPL license** — never `import obliteratus` in MIT/Apache projects. CLI invocation only.
-11. **Large models (70B+)** — always use `--large-model` flag for conservative defaults.
-12. **Spectral certification RED is common** — the spectral check often flags "incomplete" even when practical refusal rate is 0%. Check actual refusal rate rather than relying on spectral certification alone.
-
-## Complementary Skills
-
- **vllm** — Serve abliterated models with high throughput
- **gguf** — Convert abliterated models to GGUF for llama.cpp
- **huggingface-tokenizers** — Work with model tokenizers
--- a/skills/mlops/inference/obliteratus/references/analysis-modules.md
+++ b/skills/mlops/inference/obliteratus/references/analysis-modules.md
@@ -1,166 +0,0 @@
-# OBLITERATUS Analysis Modules — Reference
-
-OBLITERATUS includes 28 analysis modules for mechanistic interpretability of refusal in LLMs.
-These modules help understand how and where refusal behaviors are encoded before performing abliteration.
-
---
-
-## Core Analysis (Run These First)
-
-### 1. Alignment Imprint Detection (`alignment_imprint.py`)
-Fingerprints whether a model was trained via DPO, RLHF, CAI, or SFT.
-This determines which extraction strategy will work best.
-
-### 2. Concept Cone Geometry (`concept_geometry.py`)
-Determines if refusal is a single linear direction or a polyhedral cone
-(set of multiple mechanisms). Single-direction models respond well to `basic`;
-polyhedral models need `advanced` or `surgical`.
-
-### 3. Refusal Logit Lens (`logit_lens.py`)
-Identifies the specific layer where a model "decides" to refuse by decoding
-intermediate layer representations into token space.
-
-### 4. Ouroboros Detection (`anti_ouroboros.py`)
-Identifies if a model attempts to "self-repair" refusal behaviors after
-excision. Reports a risk score (0-1). High scores mean additional refinement
-passes are needed.
-
-### 5. Causal Tracing (`causal_tracing.py`)
-Identifies which components (layers, heads, MLPs) are causally necessary
-for refusal behavior using activation patching.
-
---
-
-## Geometric Analysis
-
-### 6. Cross-Layer Alignment (`cross_layer.py`)
-Measures how refusal directions align across different layers. High alignment
-means the refusal signal is consistent; low alignment suggests layer-specific
-mechanisms.
-
-### 7. Residual Stream Decomposition (`residual_stream.py`)
-Decomposes the residual stream into attention and MLP contributions to
-understand which component type contributes more to refusal.
-
-### 8. Riemannian Manifold Geometry (`riemannian_manifold.py`)
-Analyzes the curvature and geometry of the weight manifold near refusal
-directions. Informs how aggressively projections can be applied without
-damaging the manifold structure.
-
-### 9. Whitened SVD (`whitened_svd.py`)
-Covariance-normalized SVD extraction that separates guardrail signals from
-natural activation variance. More precise than standard SVD for models with
-high activation variance.
-
-### 10. Concept Cone Geometry (extended)
-Maps the full polyhedral structure of refusal, including cone angles,
-face counts, and intersection patterns.
-
---
-
-## Probing & Classification
-
-### 11. Activation Probing (`activation_probing.py`)
-Post-excision verification — probes for residual refusal concepts after
-abliteration to ensure complete removal.
-
-### 12. Probing Classifiers (`probing_classifiers.py`)
-Trains linear classifiers to detect refusal in activations. Used both
-before (to verify refusal exists) and after (to verify it's gone).
-
-### 13. Activation Patching (`activation_patching.py`)
-Interchange interventions — swaps activations between refused and complied
-runs to identify causal components.
-
-### 14. Tuned Lens (`tuned_lens.py`)
-Trained version of logit lens that provides more accurate per-layer
-decoding by learning affine transformations for each layer.
-
-### 15. Multi-Token Position Analysis (`multi_token_position.py`)
-Analyzes refusal signals across multiple token positions, not just the
-last token. Important for models that distribute refusal across the sequence.
-
---
-
-## Abliteration & Manipulation
-
-### 16. SAE-Based Abliteration (`sae_abliteration.py`)
-Uses Sparse Autoencoder features to identify and remove specific refusal
-features. More surgical than direction-based methods.
-
-### 17. Steering Vectors (`steering_vectors.py`)
-Creates and applies inference-time steering vectors for reversible refusal
-modification. Includes `SteeringVectorFactory` and `SteeringHookManager`.
-
-### 18. LEACE Concept Erasure (`leace.py`)
-Linear Erasure via Closed-form Estimation — mathematically optimal linear
-concept removal. Available as both analysis module and direction extraction method.
-
-### 19. Sparse Surgery (`sparse_surgery.py`)
-High-precision weight modification targeting individual neurons and
-weight matrix entries rather than full directions.
-
-### 20. Conditional Abliteration (`conditional_abliteration.py`)
-Targeted removal that only affects specific refusal categories while
-preserving others (e.g., remove weapons refusal but keep CSAM refusal).
-
---
-
-## Transfer & Robustness
-
-### 21. Cross-Model Transfer (`cross_model_transfer.py`)
-Tests whether refusal directions extracted from one model transfer to
-another architecture. Measures universality of guardrail directions.
-
-### 22. Defense Robustness (`defense_robustness.py`)
-Evaluates how robust the abliteration is against various defense mechanisms
-and re-alignment attempts.
-
-### 23. Spectral Certification (`spectral_certification.py`)
-Provides mathematical bounds on the completeness of refusal removal
-using spectral analysis of the projection.
-
-### 24. Wasserstein Optimal Extraction (`wasserstein_optimal.py`)
-Uses optimal transport theory for more precise direction extraction
-that minimizes distribution shift.
-
-### 25. Wasserstein Transfer (`wasserstein_transfer.py`)
-Distribution transfer between models using Wasserstein distance
-for cross-architecture refusal direction mapping.
-
---
-
-## Advanced / Research
-
-### 26. Bayesian Kernel Projection (`bayesian_kernel_projection.py`)
-Probabilistic feature mapping that estimates uncertainty in refusal
-direction identification.
-
-### 27. Cross-Model Universality Index
-Measures if guardrail directions generalize across different model
-architectures and training regimes.
-
-### 28. Visualization (`visualization.py`)
-Plotting and graphing utilities for all analysis modules. Generates
-heatmaps, direction plots, and layer-wise analysis charts.
-
---
-
-## Running Analysis
-
-### Via CLI
-```bash
-# Run analysis from a YAML config
-obliteratus run analysis-study.yaml --preset quick
-
-# Available study presets:
-# quick     — Fast sanity check (2-3 modules)
-# full      — All core + geometric analysis
-# jailbreak — Refusal circuit localization
-# knowledge — Knowledge preservation analysis
-# robustness — Stress testing / defense evaluation
-```
-
-### Via YAML Config
-See the `templates/analysis-study.yaml` template for a complete example.
-Load with: `skill_view(name="obliteratus", file_path="templates/analysis-study.yaml")`
--- a/skills/mlops/inference/obliteratus/references/methods-guide.md
+++ b/skills/mlops/inference/obliteratus/references/methods-guide.md
@@ -1,141 +0,0 @@
-# OBLITERATUS Methods — Detailed Guide
-
-> The CLI accepts 9 methods via `--method`: basic, advanced, aggressive, spectral_cascade,
-> informed, surgical, optimized, inverted, nuclear.
-> Four additional methods (failspy, gabliteration, heretic, rdo) are available only via the Python API.
-
-## How Abliteration Works (Theory)
-
-Abliteration identifies a "refusal direction" — a vector in the model's activation space that
-corresponds to refusal behavior — and projects it out of the weight matrices.
-
-Mathematically: `W_new = W_old - (W_old @ d @ d.T)` where `d` is the refusal direction.
-
-The key challenge is finding accurate refusal directions without damaging other capabilities.
-
---
-
-## Direction Extraction Methods
-
-Before projecting, OBLITERATUS extracts refusal directions using one of three methods:
-
-| Method | Flag | Description | Best For |
-|:-------|:-----|:------------|:---------|
-| Diff-in-Means | `--direction-method diff_means` | Difference between mean activations on refused vs. complied prompts | Default, fast, robust |
-| SVD | `--direction-method svd` | Multi-direction extraction via Singular Value Decomposition | Complex alignment, multiple refusal mechanisms |
-| LEACE | `--direction-method leace` | Linear Erasure via Closed-form Estimation — mathematically optimal | Maximum precision, research |
-
---
-
-## Method Details
-
-### basic
- **Directions:** 1 (single diff-in-means vector)
- **Speed:** Fast (~5-10 min for 8B model)
- **Risk:** Low
- **Use case:** Quick tests, prototyping, evaluating if abliteration works for a model
- **How it works:** Extracts one refusal direction and projects it out uniformly across all layers.
-
-### advanced (DEFAULT — RECOMMENDED)
- **Directions:** 4 (multi-direction SVD)
- **Speed:** Medium (~10-20 min for 8B model)
- **Risk:** Low-Medium
- **Refinement passes:** 2
- **Use case:** Default for most models. Well-tested and reliable.
- **How it works:** Extracts multiple refusal directions via SVD, applies norm-preserving bi-projection to maintain weight matrix norms. Two refinement passes catch residual refusal.
-
-### aggressive
- **Directions:** 8+ (whitened SVD + jailbreak-contrastive)
- **Speed:** Medium-Slow
- **Risk:** Medium-High (may damage coherence)
- **Use case:** When `advanced` leaves > 10% refusals. Stubborn models.
- **How it works:** Uses whitened SVD for covariance-normalized extraction, adds jailbreak-contrastive directions, performs attention head surgery on the most refusal-active heads.
-
-### spectral_cascade
- **Speed:** Medium
- **Risk:** Medium
- **Use case:** Research, novel approaches
- **How it works:** DCT (Discrete Cosine Transform) frequency-domain decomposition of refusal signals. Separates high-frequency (surface-level) from low-frequency (deep) refusal patterns.
-
-### informed (EXPERIMENTAL)
- **Speed:** Slow (~20-40 min for 8B model)
- **Risk:** Variable — results depend on analysis quality
- **Use case:** When you want auto-configuration, but be aware this is experimental and may not outperform `advanced`.
- **How it works:** Runs 4 analysis modules first (alignment imprint, concept geometry, logit lens, ouroboros detection), then auto-configures extraction strategy. Includes an "Ouroboros loop" that detects and counteracts self-repair.
- **Note:** The auto-detection can sometimes misconfigure. If results are poor, fall back to `advanced`.
-
-### surgical
- **Speed:** Very slow (~1-2 hrs for 8B model)
- **Risk:** Low (very precise)
- **Use case:** Reasoning models (R1 distills, QwQ, etc.) where chain-of-thought must be preserved.
- **How it works:** Uses SAE (Sparse Autoencoder) features + individual neuron masking + attention head surgery + per-expert decomposition (for MoE). CoT-aware — identifies and protects reasoning-critical directions before projecting.
-
-### optimized
- **Speed:** Very slow (hours — runs many trials)
- **Risk:** Low (finds optimal parameters)
- **Use case:** When quality matters more than speed. Production models.
- **How it works:** Bayesian hyperparameter search via Optuna TPE sampler. Optimizes n_directions, regularization, refinement passes, and layer selection jointly. Evaluates each configuration on refusal rate + perplexity.
-
-### inverted
- **Speed:** Fast
- **Risk:** High (model behavior changes dramatically)
- **Use case:** Research, studying refusal mechanisms
- **How it works:** Instead of projecting out the refusal direction, reflects it. The model actively complies rather than passively not-refusing. Useful for understanding the geometry of alignment.
-
-### nuclear
- **Speed:** Slow
- **Risk:** Medium-High
- **Use case:** Stubborn MoE models (DeepSeek-MoE, Mixtral, etc.)
- **How it works:** Combines expert-granular abliteration (EGA), steering vector injection, attention head pruning, and multi-pass refinement. Decomposes refusal signals into per-expert components for MoE architectures.
-
---
-
-## Method Selection Flowchart
-
-```
-Is this a quick test?
-  → YES: basic
-  → NO: continue
-
-Is it an MoE model (Mixtral, DeepSeek-MoE)?
-  → YES: nuclear
-  → NO: continue
-
-Is it a reasoning model (R1, QwQ, CoT-focused)?
-  → YES: surgical
-  → NO: continue
-
-Do you need the absolute best quality and have time?
-  → YES: optimized
-  → NO: advanced (recommended default)
-
-Did advanced leave > 10% refusals?
-  → YES: aggressive
-  → Still refusing: nuclear
-```
-
---
-
-## Key Parameters
-
-| Parameter | Range | Default | Effect |
-|:----------|:------|:--------|:-------|
-| `--n-directions` | 1-32 | method-dependent | More directions = more complete removal, but higher damage risk |
-| `--regularization` | 0.0-1.0 | 0.1 | Higher = more conservative (less removal, less damage) |
-| `--refinement-passes` | 1-5 | 2 | More passes catch residual refusal, but diminishing returns |
-| `--quantization` | 4bit, 8bit | none | Reduces VRAM usage; quality impact minimal for extraction |
-| `--verify-sample-size` | 10-200 | 20 | More samples = more accurate refusal rate estimate |
-
---
-
-## Troubleshooting
-
-| Problem | Likely Cause | Fix |
-|:--------|:-------------|:----|
-| Refusal rate > 20% | Too few directions | Increase `--n-directions`, try `aggressive` |
-| Refusal rate 5-20% | Residual refusal | Add `--refinement-passes 3`, try `--direction-method svd` |
-| Perplexity spike > 20% | Over-aggressive removal | Reduce `--n-directions`, increase `--regularization` |
-| Repetitive output | Weight matrix damage | Use `basic` with fewer directions, check norm preservation |
-| MoE model still refuses | Non-expert-aware method | Switch to `nuclear` |
-| Reasoning degraded | CoT directions damaged | Use `surgical` method |
-| OOM during extraction | Insufficient VRAM | Add `--quantization 4bit` and/or `--large-model` |
--- a/skills/mlops/inference/instructor/SKILL.md
+++ b/skills/mlops/inference/instructor/SKILL.md
--- a/skills/mlops/inference/instructor/references/examples.md
+++ b/skills/mlops/inference/instructor/references/examples.md
--- a/skills/mlops/inference/instructor/references/providers.md
+++ b/skills/mlops/inference/instructor/references/providers.md
--- a/skills/mlops/inference/instructor/references/validation.md
+++ b/skills/mlops/inference/instructor/references/validation.md
--- a/skills/mlops/cloud/lambda-labs/SKILL.md
+++ b/skills/mlops/cloud/lambda-labs/SKILL.md
--- a/skills/mlops/cloud/lambda-labs/references/advanced-usage.md
+++ b/skills/mlops/cloud/lambda-labs/references/advanced-usage.md
--- a/skills/mlops/cloud/lambda-labs/references/troubleshooting.md
+++ b/skills/mlops/cloud/lambda-labs/references/troubleshooting.md
--- a/skills/mlops/inference/llama-cpp/SKILL.md
+++ b/skills/mlops/inference/llama-cpp/SKILL.md
--- a/skills/mlops/inference/llama-cpp/references/optimization.md
+++ b/skills/mlops/inference/llama-cpp/references/optimization.md
--- a/skills/mlops/inference/llama-cpp/references/quantization.md
+++ b/skills/mlops/inference/llama-cpp/references/quantization.md
--- a/skills/mlops/inference/llama-cpp/references/server.md
+++ b/skills/mlops/inference/llama-cpp/references/server.md
--- a/skills/mlops/models/llava/SKILL.md
+++ b/skills/mlops/models/llava/SKILL.md
--- a/skills/mlops/models/llava/references/training.md
+++ b/skills/mlops/models/llava/references/training.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/SKILL.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/SKILL.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/api-evaluation.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/api-evaluation.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/benchmark-guide.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/benchmark-guide.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/custom-tasks.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/custom-tasks.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/distributed-eval.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/distributed-eval.md
--- a/skills/research/ml-paper-writing/SKILL.md
+++ b/skills/research/ml-paper-writing/SKILL.md
--- a/skills/research/ml-paper-writing/references/checklists.md
+++ b/skills/research/ml-paper-writing/references/checklists.md
--- a/skills/research/ml-paper-writing/references/citation-workflow.md
+++ b/skills/research/ml-paper-writing/references/citation-workflow.md
--- a/skills/research/ml-paper-writing/references/reviewer-guidelines.md
+++ b/skills/research/ml-paper-writing/references/reviewer-guidelines.md
--- a/skills/research/ml-paper-writing/references/sources.md
+++ b/skills/research/ml-paper-writing/references/sources.md
--- a/skills/research/ml-paper-writing/references/writing-guide.md
+++ b/skills/research/ml-paper-writing/references/writing-guide.md
--- a/skills/research/ml-paper-writing/templates/README.md
+++ b/skills/research/ml-paper-writing/templates/README.md
--- a/skills/research/ml-paper-writing/templates/aaai2026/README.md
+++ b/skills/research/ml-paper-writing/templates/aaai2026/README.md
--- a/Show More
+++ b/Show More