Compare commits

..

1 Commits

Author SHA1 Message Date
teknium1
67cf37fc26 fix: head+tail truncation for execute_code stdout (inspired by openclaw context-pruning)
Previously, _drain() only captured the first MAX_STDOUT_BYTES (50KB) of
stdout, silently dropping all tail output. Scripts that print() their
final results at the end would have those results lost.

Now uses a two-buffer approach: 40% head + 60% tail (rolling window).
This matches the pattern already used in terminal_tool.py (line 1042-1051)
but gives the tail more space since execute_code scripts typically
print() their final results at the end.

Inspired by openclaw's softTrim context-pruning (headChars/tailChars).
2026-03-09 02:15:48 -07:00
237 changed files with 741 additions and 2780 deletions

View File

@@ -195,8 +195,6 @@ def build_skills_system_prompt() -> str:
# Collect skills with descriptions, grouped by category
# Each entry: (skill_name, description)
# Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
# → category "mlops/training", skill "axolotl"
skills_by_category: dict[str, list[tuple[str, str]]] = {}
for skill_file in skills_dir.rglob("SKILL.md"):
# Skip skills incompatible with the current OS platform
@@ -205,13 +203,8 @@ def build_skills_system_prompt() -> str:
rel_path = skill_file.relative_to(skills_dir)
parts = rel_path.parts
if len(parts) >= 2:
# Category is everything between skills_dir and the skill folder
# e.g. parts = ("mlops", "training", "axolotl", "SKILL.md")
# → category = "mlops/training", skill_name = "axolotl"
# e.g. parts = ("github", "github-auth", "SKILL.md")
# → category = "github", skill_name = "github-auth"
category = parts[0]
skill_name = parts[-2]
category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
else:
category = "general"
skill_name = skill_file.parent.name
@@ -222,11 +215,9 @@ def build_skills_system_prompt() -> str:
return ""
# Read category-level descriptions from DESCRIPTION.md
# Checks both the exact category path and parent directories
category_descriptions = {}
for category in skills_by_category:
cat_path = Path(category)
desc_file = skills_dir / cat_path / "DESCRIPTION.md"
desc_file = skills_dir / category / "DESCRIPTION.md"
if desc_file.exists():
try:
content = desc_file.read_text(encoding="utf-8")

View File

@@ -555,21 +555,6 @@ toolsets:
# args: ["-y", "@modelcontextprotocol/server-github"]
# env:
# GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
#
# Sampling (server-initiated LLM requests) — enabled by default.
# Per-server config under the 'sampling' key:
# analysis:
# command: npx
# args: ["-y", "analysis-server"]
# sampling:
# enabled: true # default: true
# model: "gemini-3-flash" # override model (optional)
# max_tokens_cap: 4096 # max tokens per request
# timeout: 30 # LLM call timeout (seconds)
# max_rpm: 10 # max requests per minute
# allowed_models: [] # model whitelist (empty = all)
# max_tool_rounds: 5 # tool loop limit (0 = disable)
# log_level: "info" # audit verbosity
# =============================================================================
# Voice Transcription (Speech-to-Text)

View File

@@ -252,7 +252,6 @@ def cleanup_document_cache(max_age_hours: int = 24) -> int:
class MessageType(Enum):
"""Types of incoming messages."""
TEXT = "text"
LOCATION = "location"
PHOTO = "photo"
VIDEO = "video"
AUDIO = "audio"

View File

@@ -132,10 +132,6 @@ class TelegramAdapter(BasePlatformAdapter):
filters.COMMAND,
self._handle_command
))
self._app.add_handler(TelegramMessageHandler(
filters.LOCATION | getattr(filters, "VENUE", filters.LOCATION),
self._handle_location_message
))
self._app.add_handler(TelegramMessageHandler(
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
self._handle_media_message
@@ -550,41 +546,6 @@ class TelegramAdapter(BasePlatformAdapter):
event = self._build_message_event(update.message, MessageType.COMMAND)
await self.handle_message(event)
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming location/venue pin messages."""
if not update.message:
return
msg = update.message
venue = getattr(msg, "venue", None)
location = getattr(venue, "location", None) if venue else getattr(msg, "location", None)
if not location:
return
lat = getattr(location, "latitude", None)
lon = getattr(location, "longitude", None)
if lat is None or lon is None:
return
# Build a text message with coordinates and context
parts = ["[The user shared a location pin.]"]
if venue:
title = getattr(venue, "title", None)
address = getattr(venue, "address", None)
if title:
parts.append(f"Venue: {title}")
if address:
parts.append(f"Address: {address}")
parts.append(f"latitude: {lat}")
parts.append(f"longitude: {lon}")
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
event = self._build_message_event(msg, MessageType.LOCATION)
event.text = "\n".join(parts)
await self.handle_message(event)
async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming media messages, downloading images to local cache."""
if not update.message:

View File

@@ -1449,11 +1449,6 @@ class GatewayRunner:
except Exception:
current_provider = "openrouter"
# Detect custom endpoint: provider resolved to openrouter but a custom
# base URL is configured — the user set up a custom endpoint.
if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
current_provider = "custom"
if not args:
provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
lines = [
@@ -1580,10 +1575,6 @@ class GatewayRunner:
except Exception:
current_provider = "openrouter"
# Detect custom endpoint
if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
current_provider = "custom"
current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
lines = [

View File

@@ -761,39 +761,9 @@ def cmd_model(args):
("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
("minimax", "MiniMax (global direct API)"),
("minimax-cn", "MiniMax China (domestic direct API)"),
("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
]
# Add user-defined custom providers from config.yaml
custom_providers_cfg = config.get("custom_providers") or []
_custom_provider_map = {} # key → {name, base_url, api_key}
if isinstance(custom_providers_cfg, list):
for entry in custom_providers_cfg:
if not isinstance(entry, dict):
continue
name = entry.get("name", "").strip()
base_url = entry.get("base_url", "").strip()
if not name or not base_url:
continue
# Generate a stable key from the name
key = "custom:" + name.lower().replace(" ", "-")
short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
saved_model = entry.get("model", "")
model_hint = f"{saved_model}" if saved_model else ""
providers.append((key, f"{name} ({short_url}){model_hint}"))
_custom_provider_map[key] = {
"name": name,
"base_url": base_url,
"api_key": entry.get("api_key", ""),
"model": saved_model,
}
# Always add the manual custom endpoint option last
providers.append(("custom", "Custom endpoint (enter URL manually)"))
# Add removal option if there are saved custom providers
if _custom_provider_map:
providers.append(("remove-custom", "Remove a saved custom provider"))
# Reorder so the active provider is at the top
known_keys = {k for k, _ in providers}
active_key = active if active in known_keys else "custom"
@@ -821,10 +791,6 @@ def cmd_model(args):
_model_flow_openai_codex(config, current_model)
elif selected_provider == "custom":
_model_flow_custom(config)
elif selected_provider.startswith("custom:") and selected_provider in _custom_provider_map:
_model_flow_named_custom(config, _custom_provider_map[selected_provider])
elif selected_provider == "remove-custom":
_remove_custom_provider(config)
elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
_model_flow_api_key_provider(config, selected_provider, current_model)
@@ -1040,11 +1006,7 @@ def _model_flow_openai_codex(config, current_model=""):
def _model_flow_custom(config):
"""Custom endpoint: collect URL, API key, and model name.
Automatically saves the endpoint to ``custom_providers`` in config.yaml
so it appears in the provider menu on subsequent runs.
"""
"""Custom endpoint: collect URL, API key, and model name."""
from hermes_cli.auth import _save_model_choice, deactivate_provider
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
@@ -1076,8 +1038,6 @@ def _model_flow_custom(config):
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
return
effective_key = api_key or current_key
if base_url:
save_env_value("OPENAI_BASE_URL", base_url)
if api_key:
@@ -1090,7 +1050,7 @@ def _model_flow_custom(config):
cfg = load_config()
model = cfg.get("model")
if isinstance(model, dict):
model["provider"] = "custom"
model["provider"] = "auto"
model["base_url"] = effective_url
save_config(cfg)
deactivate_provider()
@@ -1101,223 +1061,6 @@ def _model_flow_custom(config):
deactivate_provider()
print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")
# Auto-save to custom_providers so it appears in the menu next time
_save_custom_provider(effective_url, effective_key, model_name or "")
def _save_custom_provider(base_url, api_key="", model=""):
"""Save a custom endpoint to custom_providers in config.yaml.
Deduplicates by base_url — if the URL already exists, updates the
model name but doesn't add a duplicate entry.
Auto-generates a display name from the URL hostname.
"""
from hermes_cli.config import load_config, save_config
cfg = load_config()
providers = cfg.get("custom_providers") or []
if not isinstance(providers, list):
providers = []
# Check if this URL is already saved — update model if so
for entry in providers:
if isinstance(entry, dict) and entry.get("base_url", "").rstrip("/") == base_url.rstrip("/"):
if model and entry.get("model") != model:
entry["model"] = model
cfg["custom_providers"] = providers
save_config(cfg)
return # already saved, updated model if needed
# Auto-generate a name from the URL
import re
clean = base_url.replace("https://", "").replace("http://", "").rstrip("/")
# Remove /v1 suffix for cleaner names
clean = re.sub(r"/v1/?$", "", clean)
# Use hostname:port as the name
name = clean.split("/")[0]
# Capitalize for readability
if "localhost" in name or "127.0.0.1" in name:
name = f"Local ({name})"
elif "runpod" in name.lower():
name = f"RunPod ({name})"
else:
name = name.capitalize()
entry = {"name": name, "base_url": base_url}
if api_key:
entry["api_key"] = api_key
if model:
entry["model"] = model
providers.append(entry)
cfg["custom_providers"] = providers
save_config(cfg)
print(f" 💾 Saved to custom providers as \"{name}\" (edit in config.yaml)")
def _remove_custom_provider(config):
"""Let the user remove a saved custom provider from config.yaml."""
from hermes_cli.config import load_config, save_config
cfg = load_config()
providers = cfg.get("custom_providers") or []
if not isinstance(providers, list) or not providers:
print("No custom providers configured.")
return
print("Remove a custom provider:\n")
choices = []
for entry in providers:
if isinstance(entry, dict):
name = entry.get("name", "unnamed")
url = entry.get("base_url", "")
short_url = url.replace("https://", "").replace("http://", "").rstrip("/")
choices.append(f"{name} ({short_url})")
else:
choices.append(str(entry))
choices.append("Cancel")
try:
from simple_term_menu import TerminalMenu
menu = TerminalMenu(
[f" {c}" for c in choices], cursor_index=0,
menu_cursor="-> ", menu_cursor_style=("fg_red", "bold"),
menu_highlight_style=("fg_red",),
cycle_cursor=True, clear_screen=False,
title="Select provider to remove:",
)
idx = menu.show()
print()
except (ImportError, NotImplementedError):
for i, c in enumerate(choices, 1):
print(f" {i}. {c}")
print()
try:
val = input(f"Choice [1-{len(choices)}]: ").strip()
idx = int(val) - 1 if val else None
except (ValueError, KeyboardInterrupt, EOFError):
idx = None
if idx is None or idx >= len(providers):
print("No change.")
return
removed = providers.pop(idx)
cfg["custom_providers"] = providers
save_config(cfg)
removed_name = removed.get("name", "unnamed") if isinstance(removed, dict) else str(removed)
print(f"✅ Removed \"{removed_name}\" from custom providers.")
def _model_flow_named_custom(config, provider_info):
"""Handle a named custom provider from config.yaml custom_providers list.
If the entry has a saved model name, activates it immediately.
Otherwise probes the endpoint's /models API to let the user pick one.
"""
from hermes_cli.auth import _save_model_choice, deactivate_provider
from hermes_cli.config import save_env_value, load_config, save_config
from hermes_cli.models import fetch_api_models
name = provider_info["name"]
base_url = provider_info["base_url"]
api_key = provider_info.get("api_key", "")
saved_model = provider_info.get("model", "")
# If a model is saved, just activate immediately — no probing needed
if saved_model:
save_env_value("OPENAI_BASE_URL", base_url)
if api_key:
save_env_value("OPENAI_API_KEY", api_key)
_save_model_choice(saved_model)
cfg = load_config()
model = cfg.get("model")
if isinstance(model, dict):
model["provider"] = "custom"
model["base_url"] = base_url
save_config(cfg)
deactivate_provider()
print(f"✅ Switched to: {saved_model}")
print(f" Provider: {name} ({base_url})")
return
# No saved model — probe endpoint and let user pick
print(f" Provider: {name}")
print(f" URL: {base_url}")
print()
print("No model saved for this provider. Fetching available models...")
models = fetch_api_models(api_key, base_url, timeout=8.0)
if models:
print(f"Found {len(models)} model(s):\n")
try:
from simple_term_menu import TerminalMenu
menu_items = [f" {m}" for m in models] + [" Cancel"]
menu = TerminalMenu(
menu_items, cursor_index=0,
menu_cursor="-> ", menu_cursor_style=("fg_green", "bold"),
menu_highlight_style=("fg_green",),
cycle_cursor=True, clear_screen=False,
title=f"Select model from {name}:",
)
idx = menu.show()
print()
if idx is None or idx >= len(models):
print("Cancelled.")
return
model_name = models[idx]
except (ImportError, NotImplementedError):
for i, m in enumerate(models, 1):
print(f" {i}. {m}")
print(f" {len(models) + 1}. Cancel")
print()
try:
val = input(f"Choice [1-{len(models) + 1}]: ").strip()
if not val:
print("Cancelled.")
return
idx = int(val) - 1
if idx < 0 or idx >= len(models):
print("Cancelled.")
return
model_name = models[idx]
except (ValueError, KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
else:
print("Could not fetch models from endpoint. Enter model name manually.")
try:
model_name = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
if not model_name:
print("No model specified. Cancelled.")
return
# Activate and save the model to the custom_providers entry
save_env_value("OPENAI_BASE_URL", base_url)
if api_key:
save_env_value("OPENAI_API_KEY", api_key)
_save_model_choice(model_name)
cfg = load_config()
model = cfg.get("model")
if isinstance(model, dict):
model["provider"] = "custom"
model["base_url"] = base_url
save_config(cfg)
deactivate_provider()
# Save model name to the custom_providers entry for next time
_save_custom_provider(base_url, api_key, model_name)
print(f"\n✅ Model set to: {model_name}")
print(f" Provider: {name} ({base_url})")
# Curated model lists for direct API-key providers
_PROVIDER_MODELS = {

View File

@@ -63,7 +63,7 @@ _PROVIDER_LABELS = {
"kimi-coding": "Kimi / Moonshot",
"minimax": "MiniMax",
"minimax-cn": "MiniMax (China)",
"custom": "Custom endpoint",
"custom": "custom endpoint",
}
_PROVIDER_ALIASES = {

View File

@@ -632,29 +632,6 @@ def setup_model_provider(config: dict):
save_env_value("OPENAI_BASE_URL", "")
save_env_value("OPENAI_API_KEY", "")
# Update config.yaml and deactivate any OAuth provider so the
# resolver doesn't keep returning the old provider (e.g. Codex).
try:
from hermes_cli.auth import deactivate_provider
deactivate_provider()
except Exception:
pass
import yaml
config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml"
try:
disk_cfg = {}
if config_path.exists():
disk_cfg = yaml.safe_load(config_path.read_text()) or {}
model_section = disk_cfg.get("model", {})
if isinstance(model_section, str):
model_section = {"default": model_section}
model_section["provider"] = "openrouter"
model_section.pop("base_url", None) # OpenRouter uses default URL
disk_cfg["model"] = model_section
config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
except Exception as e:
logger.debug("Could not save provider to config.yaml: %s", e)
elif provider_idx == 3: # Custom endpoint
selected_provider = "custom"
print()
@@ -682,28 +659,6 @@ def setup_model_provider(config: dict):
if model_name:
config['model'] = model_name
save_env_value("LLM_MODEL", model_name)
# Save provider and base_url to config.yaml so the gateway and CLI
# both resolve the correct provider without relying on env-var heuristics.
if base_url:
import yaml
config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml"
try:
disk_cfg = {}
if config_path.exists():
disk_cfg = yaml.safe_load(config_path.read_text()) or {}
model_section = disk_cfg.get("model", {})
if isinstance(model_section, str):
model_section = {"default": model_section}
model_section["provider"] = "custom"
model_section["base_url"] = base_url.rstrip("/")
if model_name:
model_section["default"] = model_name
disk_cfg["model"] = model_section
config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
except Exception as e:
logger.debug("Could not save provider to config.yaml: %s", e)
print_success("Custom endpoint configured")
elif provider_idx == 4: # Z.AI / GLM

View File

@@ -3834,27 +3834,6 @@ class AIAgent:
else:
assistant_message = response.choices[0].message
# Normalize content to string — some OpenAI-compatible servers
# (llama-server, etc.) return content as a dict or list instead
# of a plain string, which crashes downstream .strip() calls.
if assistant_message.content is not None and not isinstance(assistant_message.content, str):
raw = assistant_message.content
if isinstance(raw, dict):
assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
elif isinstance(raw, list):
# Multimodal content list — extract text parts
parts = []
for part in raw:
if isinstance(part, str):
parts.append(part)
elif isinstance(part, dict) and part.get("type") == "text":
parts.append(part.get("text", ""))
elif isinstance(part, dict) and "text" in part:
parts.append(str(part["text"]))
assistant_message.content = "\n".join(parts)
else:
assistant_message.content = str(raw)
# Handle assistant response
if assistant_message.content and not self.quiet_mode:
print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")

View File

@@ -1,3 +0,0 @@
---
description: Creative content generation — ASCII art, hand-drawn style diagrams, and visual design tools.
---

View File

@@ -1,161 +0,0 @@
---
name: pokemon-player
description: Play Pokémon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal.
tags: [gaming, pokemon, emulator, pyboy, gameplay, gameboy]
---
# Pokémon Player
Play Pokémon games via headless emulation using the `pokemon-agent` package.
## When to Use
- User says "play pokemon", "start pokemon", "pokemon game"
- User asks about Pokemon Red, Blue, Yellow, FireRed, etc.
- User wants to watch an AI play Pokemon
- User references a ROM file (.gb, .gbc, .gba)
## First-Time Setup
### 1. Install the package
```bash
pip install pokemon-agent[dashboard] pyboy
```
### 2. Get the ROM
Ask the user for their ROM file path. Do NOT attempt to download ROMs.
### 3. Start the game server
```bash
pokemon-agent serve --rom <ROM_PATH> --port 8765 &
```
Wait 3 seconds, then verify:
```bash
curl -s http://localhost:8765/health
```
## The Gameplay Loop
### Step 1: OBSERVE
```bash
curl -s http://localhost:8765/state
```
### Step 2: ORIENT
- Dialog active → advance text
- In battle → fight
- Party hurt → heal
- Near objective → navigate
### Step 3: DECIDE
Priority order:
1. If dialog active → a_until_dialog_end
2. If in battle → choose best move
3. If any Pokemon <20% HP → Pokémon Center
4. If near story objective → navigate to it
5. If underleveled → train in grass
6. Otherwise → explore
### Step 4: ACT
```bash
curl -s -X POST http://localhost:8765/action \
-H "Content-Type: application/json" \
-d '{"actions": ["walk_up", "walk_up", "press_a"]}'
```
Action reference:
- press_a — confirm, talk, select
- press_b — cancel, close menu
- press_start — open game menu
- walk_up/down/left/right — move one tile
- a_until_dialog_end — advance all dialog
- wait_60 — wait ~1 second
### Step 5: VERIFY
Check state_after in the response. If stuck 3+ turns:
1. Press B several times
2. Try different directions
3. Take screenshot and use vision_analyze
4. Load last save if truly stuck
### Step 6: RECORD
```
memory add: PKM:OBJECTIVE: Heading to Pewter City to challenge Brock
memory add: PKM:PROGRESS: Got Squirtle, Got Pokedex, → Pewter City
```
### Step 7: SAVE
Save every 20-30 turns and ALWAYS before gym battles:
```bash
curl -s -X POST http://localhost:8765/save \
-H "Content-Type: application/json" \
-d '{"name": "before_brock"}'
```
## Battle Strategy
### Decision Tree
1. Want to catch? → Weaken then throw Poké Ball
2. Wild you don't need? → RUN
3. Type advantage? → Use super-effective move
4. No advantage? → Use strongest STAB move
5. Low HP? → Switch or use Potion
### Type Chart
- Water beats Fire, Ground, Rock
- Fire beats Grass, Bug, Ice
- Grass beats Water, Ground, Rock
- Electric beats Water, Flying
- Ground beats Fire, Electric, Rock, Poison
- Psychic beats Fighting, Poison (dominant in Gen 1!)
### Gen 1 Quirks
- Special stat is both offense AND defense for special moves
- Psychic is overpowered (Ghost moves bugged)
- Critical hits based on Speed stat
- Wrap/Bind prevent opponent from acting
## Memory Conventions
| Prefix | Purpose | Example |
|--------|---------|---------|
| PKM:OBJECTIVE | Current goal | Defeat Brock in Pewter City |
| PKM:MAP | Navigation knowledge | Viridian Forest: go north |
| PKM:STRATEGY | Battle/team plans | Need Grass type before Misty |
| PKM:PROGRESS | Milestone tracker | ✓ Boulder Badge → Cascade Badge |
| PKM:STUCK | Stuck situations | Got stuck in Cerulean Cave |
| PKM:TEAM | Team notes | Squirtle is Water/Ice coverage |
## Progression Milestones
- ☐ Choose starter
- ☐ Deliver Oak's Parcel → receive Pokédex
- ☐ Boulder Badge — Brock (Rock) → use Water/Grass
- ☐ Cascade Badge — Misty (Water) → use Grass/Electric
- ☐ Thunder Badge — Lt. Surge (Electric) → use Ground
- ☐ Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying
- ☐ Soul Badge — Koga (Poison) → use Ground/Psychic
- ☐ Marsh Badge — Sabrina (Psychic)
- ☐ Volcano Badge — Blaine (Fire) → use Water/Ground
- ☐ Earth Badge — Giovanni (Ground) → use Water/Grass/Ice
- ☐ Elite Four → Champion!
## Stopping Play
1. Save the game:
```bash
curl -s -X POST http://localhost:8765/save \
-d '{"name": "session_end"}'
```
2. Update memory with progress
3. Tell user: "Game saved! Say 'play pokemon' to resume."
4. Kill the background server process
## Dashboard
If `pokemon-agent[dashboard]` is installed, open:
http://localhost:8765/dashboard
Live features: game screen, AI reasoning stream, team status, action log.
## Pitfalls
- NEVER download or provide ROM files — always ask the user
- Don't send more than 15 actions per /action call
- Always wait for dialog to clear before moving
- Save BEFORE gym battles
- Take screenshots sparingly — they cost vision tokens
- Verify server is running with /health before any commands

View File

@@ -1,69 +0,0 @@
---
name: find-nearby
description: Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed.
version: 1.0.0
metadata:
hermes:
tags: [location, maps, nearby, places, restaurants, local]
related_skills: []
---
# Find Nearby — Local Place Discovery
Find restaurants, cafes, bars, pharmacies, and other places near any location. Uses OpenStreetMap (free, no API keys). Works with:
- **Coordinates** from Telegram location pins (latitude/longitude in conversation)
- **Addresses** ("near 123 Main St, Springfield")
- **Cities** ("restaurants in downtown Austin")
- **Zip codes** ("pharmacies near 90210")
- **Landmarks** ("cafes near Times Square")
## Quick Reference
```bash
# By coordinates (from Telegram location pin or user-provided)
python3 SKILL_DIR/scripts/find_nearby.py --lat <LAT> --lon <LON> --type restaurant --radius 1500
# By address, city, or landmark (auto-geocoded)
python3 SKILL_DIR/scripts/find_nearby.py --near "Times Square, New York" --type cafe
# Multiple place types
python3 SKILL_DIR/scripts/find_nearby.py --near "downtown austin" --type restaurant --type bar --limit 10
# JSON output
python3 SKILL_DIR/scripts/find_nearby.py --near "90210" --type pharmacy --json
```
### Parameters
| Flag | Description | Default |
|------|-------------|---------|
| `--lat`, `--lon` | Exact coordinates | — |
| `--near` | Address, city, zip, or landmark (geocoded) | — |
| `--type` | Place type (repeatable for multiple) | restaurant |
| `--radius` | Search radius in meters | 1500 |
| `--limit` | Max results | 15 |
| `--json` | Machine-readable JSON output | off |
### Common Place Types
`restaurant`, `cafe`, `bar`, `pub`, `fast_food`, `pharmacy`, `hospital`, `bank`, `atm`, `fuel`, `parking`, `supermarket`, `convenience`, `hotel`
## Workflow
1. **Get the location.** Look for coordinates (`latitude: ... / longitude: ...`) from a Telegram pin, or ask the user for an address/city/zip.
2. **Ask for preferences** (only if not already stated): place type, how far they're willing to go, any specifics (cuisine, "open now", etc.).
3. **Run the script** with appropriate flags. Use `--json` if you need to process results programmatically.
4. **Present results** with names, distances, and Google Maps links. If the user asked about hours or "open now," check the `hours` field in results — if missing or unclear, verify with `web_search`.
5. **For directions**, use the `directions_url` from results, or construct: `https://www.google.com/maps/dir/?api=1&origin=<LAT>,<LON>&destination=<LAT>,<LON>`
## Tips
- If results are sparse, widen the radius (1500 → 3000m)
- For "open now" requests: check the `hours` field in results, cross-reference with `web_search` for accuracy since OSM hours aren't always complete
- Zip codes alone can be ambiguous globally — prompt the user for country/state if results look wrong
- The script uses OpenStreetMap data which is community-maintained; coverage varies by region

View File

@@ -1,184 +0,0 @@
#!/usr/bin/env python3
"""Find nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed.
Usage:
# By coordinates
python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --radius 1500
# By address/city/zip (auto-geocoded)
python find_nearby.py --near "Times Square, New York" --type cafe --radius 1000
python find_nearby.py --near "90210" --type pharmacy
# Multiple types
python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --type bar
# JSON output for programmatic use
python find_nearby.py --near "downtown las vegas" --type restaurant --json
"""
import argparse
import json
import math
import sys
import urllib.parse
import urllib.request
from typing import Any
OVERPASS_URLS = [
"https://overpass-api.de/api/interpreter",
"https://overpass.kumi.systems/api/interpreter",
]
NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
USER_AGENT = "HermesAgent/1.0 (find-nearby skill)"
TIMEOUT = 15
def _http_get(url: str) -> Any:
req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
return json.loads(r.read())
def _http_post(url: str, data: str) -> Any:
req = urllib.request.Request(
url, data=data.encode(), headers={"User-Agent": USER_AGENT}
)
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
return json.loads(r.read())
def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Distance in meters between two coordinates."""
R = 6_371_000
rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
def geocode(query: str) -> tuple[float, float]:
"""Convert address/city/zip to coordinates via Nominatim."""
params = urllib.parse.urlencode({"q": query, "format": "json", "limit": 1})
results = _http_get(f"{NOMINATIM_URL}?{params}")
if not results:
print(f"Error: Could not geocode '{query}'. Try a more specific address.", file=sys.stderr)
sys.exit(1)
return float(results[0]["lat"]), float(results[0]["lon"])
def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, limit: int = 15) -> list[dict]:
"""Query Overpass for nearby amenities."""
# Build Overpass QL query
type_filters = "".join(
f'nwr["amenity"="{t}"](around:{radius},{lat},{lon});' for t in types
)
query = f"[out:json][timeout:{TIMEOUT}];({type_filters});out center tags;"
# Try each Overpass server
data = None
for url in OVERPASS_URLS:
try:
data = _http_post(url, f"data={urllib.parse.quote(query)}")
break
except Exception:
continue
if not data:
return []
# Parse results
places = []
for el in data.get("elements", []):
tags = el.get("tags", {})
name = tags.get("name")
if not name:
continue
# Get coordinates (nodes have lat/lon directly, ways/relations use center)
plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
if not plat or not plon:
continue
dist = haversine(lat, lon, plat, plon)
place = {
"name": name,
"type": tags.get("amenity", ""),
"distance_m": round(dist),
"lat": plat,
"lon": plon,
"maps_url": f"https://www.google.com/maps/search/?api=1&query={plat},{plon}",
"directions_url": f"https://www.google.com/maps/dir/?api=1&origin={lat},{lon}&destination={plat},{plon}",
}
# Add useful optional fields
if tags.get("cuisine"):
place["cuisine"] = tags["cuisine"]
if tags.get("opening_hours"):
place["hours"] = tags["opening_hours"]
if tags.get("phone"):
place["phone"] = tags["phone"]
if tags.get("website"):
place["website"] = tags["website"]
if tags.get("addr:street"):
addr_parts = [tags.get("addr:housenumber", ""), tags.get("addr:street", "")]
if tags.get("addr:city"):
addr_parts.append(tags["addr:city"])
place["address"] = " ".join(p for p in addr_parts if p)
places.append(place)
# Sort by distance, limit results
places.sort(key=lambda p: p["distance_m"])
return places[:limit]
def main():
parser = argparse.ArgumentParser(description="Find nearby places via OpenStreetMap")
parser.add_argument("--lat", type=float, help="Latitude")
parser.add_argument("--lon", type=float, help="Longitude")
parser.add_argument("--near", type=str, help="Address, city, or zip code (geocoded automatically)")
parser.add_argument("--type", action="append", dest="types", default=[], help="Place type (restaurant, cafe, bar, pharmacy, etc.)")
parser.add_argument("--radius", type=int, default=1500, help="Search radius in meters (default: 1500)")
parser.add_argument("--limit", type=int, default=15, help="Max results (default: 15)")
parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
args = parser.parse_args()
# Resolve coordinates
if args.near:
lat, lon = geocode(args.near)
elif args.lat is not None and args.lon is not None:
lat, lon = args.lat, args.lon
else:
print("Error: Provide --lat/--lon or --near", file=sys.stderr)
sys.exit(1)
if not args.types:
args.types = ["restaurant"]
places = find_nearby(lat, lon, args.types, args.radius, args.limit)
if args.json_output:
print(json.dumps({"origin": {"lat": lat, "lon": lon}, "results": places, "count": len(places)}, indent=2))
else:
if not places:
print(f"No {'/'.join(args.types)} found within {args.radius}m")
return
print(f"Found {len(places)} places within {args.radius}m:\n")
for i, p in enumerate(places, 1):
dist_str = f"{p['distance_m']}m" if p["distance_m"] < 1000 else f"{p['distance_m']/1000:.1f}km"
print(f" {i}. {p['name']} ({p['type']}) — {dist_str}")
if p.get("cuisine"):
print(f" Cuisine: {p['cuisine']}")
if p.get("hours"):
print(f" Hours: {p['hours']}")
if p.get("address"):
print(f" Address: {p['address']}")
print(f" Map: {p['maps_url']}")
print()
if __name__ == "__main__":
main()

View File

@@ -321,32 +321,6 @@ mcp_servers:
All tools from all servers are registered and available simultaneously. Each server's tools are prefixed with its name to avoid collisions.
## Sampling (Server-Initiated LLM Requests)
Hermes supports MCP's `sampling/createMessage` capability — MCP servers can request LLM completions through the agent during tool execution. This enables agent-in-the-loop workflows (data analysis, content generation, decision-making).
Sampling is **enabled by default**. Configure per server:
```yaml
mcp_servers:
my_server:
command: "npx"
args: ["-y", "my-mcp-server"]
sampling:
enabled: true # default: true
model: "gemini-3-flash" # model override (optional)
max_tokens_cap: 4096 # max tokens per request
timeout: 30 # LLM call timeout (seconds)
max_rpm: 10 # max requests per minute
allowed_models: [] # model whitelist (empty = all)
max_tool_rounds: 5 # tool loop limit (0 = disable)
log_level: "info" # audit verbosity
```
Servers can also include `tools` in sampling requests for multi-turn tool-augmented workflows. The `max_tool_rounds` config prevents infinite tool loops. Per-server audit metrics (requests, errors, tokens, tool use count) are tracked via `get_mcp_status()`.
Disable sampling for untrusted servers with `sampling: { enabled: false }`.
## Notes
- MCP tools are called synchronously from the agent's perspective but run asynchronously on a dedicated background event loop

View File

@@ -1,3 +1 @@
---
description: Skills for working with media content — YouTube transcripts, GIF search, music generation, and audio visualization.
---
Media content extraction and transformation tools — YouTube transcripts, audio, video processing.

View File

@@ -1,3 +0,0 @@
---
description: GPU cloud providers and serverless compute platforms for ML workloads.
---

View File

@@ -1,3 +0,0 @@
---
description: Model evaluation benchmarks, experiment tracking, data curation, tokenizers, and interpretability tools.
---

View File

@@ -1,3 +0,0 @@
---
description: Model serving, quantization (GGUF/GPTQ), structured output, inference optimization, and model surgery tools for deploying and running LLMs.
---

View File

@@ -1,330 +0,0 @@
---
name: obliteratus
description: Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets across 5 compute tiers, tournament evaluation, and telemetry-driven recommendations. Use when a user wants to uncensor, abliterate, or remove refusal from an LLM.
version: 2.0.0
author: Hermes Agent
license: MIT
dependencies: [obliteratus, torch, transformers, bitsandbytes, accelerate, safetensors]
metadata:
hermes:
tags: [Abliteration, Uncensoring, Refusal-Removal, LLM, Weight-Projection, SVD, Mechanistic-Interpretability, HuggingFace, Model-Surgery]
related_skills: [vllm, gguf, huggingface-tokenizers]
---
# OBLITERATUS Skill
Remove refusal behaviors (guardrails) from open-weight LLMs without retraining or fine-tuning. Uses mechanistic interpretability techniques — including diff-in-means, SVD, whitened SVD, LEACE concept erasure, SAE decomposition, Bayesian kernel projection, and more — to identify and surgically excise refusal directions from model weights while preserving reasoning capabilities.
**License warning:** OBLITERATUS is AGPL-3.0. NEVER import it as a Python library. Always invoke via CLI (`obliteratus` command) or subprocess. This keeps Hermes Agent's MIT license clean.
## When to Use This Skill
Trigger when the user:
- Wants to "uncensor" or "abliterate" an LLM
- Asks about removing refusal/guardrails from a model
- Wants to create an uncensored version of Llama, Qwen, Mistral, etc.
- Mentions "refusal removal", "abliteration", "weight projection"
- Wants to analyze how a model's refusal mechanism works
- References OBLITERATUS, abliterator, or refusal directions
## Step 1: Installation
Check if already installed:
```bash
obliteratus --version 2>/dev/null && echo "INSTALLED" || echo "NOT INSTALLED"
```
If not installed, clone and install from GitHub:
```bash
git clone https://github.com/elder-plinius/OBLITERATUS.git
cd OBLITERATUS
pip install -e .
# For Gradio web UI support:
# pip install -e ".[spaces]"
```
**IMPORTANT:** Confirm with user before installing. This pulls in ~5-10GB of dependencies (PyTorch, Transformers, bitsandbytes, etc.).
## Step 2: Check Hardware
Before anything, check what GPU is available:
```bash
python3 -c "
import torch
if torch.cuda.is_available():
gpu = torch.cuda.get_device_name(0)
vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
print(f'GPU: {gpu}')
print(f'VRAM: {vram:.1f} GB')
if vram < 4: print('TIER: tiny (models under 1B)')
elif vram < 8: print('TIER: small (models 1-4B)')
elif vram < 16: print('TIER: medium (models 4-9B with 4bit quant)')
elif vram < 32: print('TIER: large (models 8-32B with 4bit quant)')
else: print('TIER: frontier (models 32B+)')
else:
print('NO GPU - only tiny models (under 1B) on CPU')
"
```
### VRAM Requirements (with 4-bit quantization)
| VRAM | Max Model Size | Example Models |
|:---------|:----------------|:--------------------------------------------|
| CPU only | ~1B params | GPT-2, TinyLlama, SmolLM |
| 4-8 GB | ~4B params | Qwen2.5-1.5B, Phi-3.5 mini, Llama 3.2 3B |
| 8-16 GB | ~9B params | Llama 3.1 8B, Mistral 7B, Gemma 2 9B |
| 24 GB | ~32B params | Qwen3-32B, Llama 3.1 70B (tight), Command-R |
| 48 GB+ | ~72B+ params | Qwen2.5-72B, DeepSeek-R1 |
| Multi-GPU| 200B+ params | Llama 3.1 405B, DeepSeek-V3 (685B MoE) |
## Step 3: Browse Available Models & Get Recommendations
```bash
# Browse models by compute tier
obliteratus models --tier medium
# Get architecture info for a specific model
obliteratus info <model_name>
# Get telemetry-driven recommendation for best method & params
obliteratus recommend <model_name>
obliteratus recommend <model_name> --insights # global cross-architecture rankings
```
## Step 4: Choose a Method
### Method Selection Guide
**Default / recommended for most cases: `advanced`.** It uses multi-direction SVD with norm-preserving projection and is well-tested.
| Situation | Recommended Method | Why |
|:----------------------------------|:-------------------|:-----------------------------------------|
| Default / most models | `advanced` | Multi-direction SVD, norm-preserving, reliable |
| Quick test / prototyping | `basic` | Fast, simple, good enough to evaluate |
| Dense model (Llama, Mistral) | `advanced` | Multi-direction, norm-preserving |
| MoE model (DeepSeek, Mixtral) | `nuclear` | Expert-granular, handles MoE complexity |
| Reasoning model (R1 distills) | `surgical` | CoT-aware, preserves chain-of-thought |
| Stubborn refusals persist | `aggressive` | Whitened SVD + head surgery + jailbreak |
| Want reversible changes | Use steering vectors (see Analysis section) |
| Maximum quality, time no object | `optimized` | Bayesian search for best parameters |
| Experimental auto-detection | `informed` | Auto-detects alignment type — experimental, may not always outperform advanced |
### 9 CLI Methods
- **basic** — Single refusal direction via diff-in-means. Fast (~5-10 min for 8B).
- **advanced** (DEFAULT, RECOMMENDED) — Multiple SVD directions, norm-preserving projection, 2 refinement passes. Medium speed (~10-20 min).
- **aggressive** — Whitened SVD + jailbreak-contrastive + attention head surgery. Higher risk of coherence damage.
- **spectral_cascade** — DCT frequency-domain decomposition. Research/novel approach.
- **informed** — Runs analysis DURING abliteration to auto-configure. Experimental — slower and less predictable than advanced.
- **surgical** — SAE features + neuron masking + head surgery + per-expert. Very slow (~1-2 hrs). Best for reasoning models.
- **optimized** — Bayesian hyperparameter search (Optuna TPE). Longest runtime but finds optimal parameters.
- **inverted** — Flips the refusal direction. Model becomes actively willing.
- **nuclear** — Maximum force combo for stubborn MoE models. Expert-granular.
### Direction Extraction Methods (--direction-method flag)
- **diff_means** (default) — Simple difference-in-means between refused/complied activations. Robust.
- **svd** — Multi-direction SVD extraction. Better for complex alignment.
- **leace** — LEACE (Linear Erasure via Closed-form Estimation). Optimal linear erasure.
### 4 Python-API-Only Methods
(NOT available via CLI — require Python import, which violates AGPL boundary. Mention to user only if they explicitly want to use OBLITERATUS as a library in their own AGPL project.)
- failspy, gabliteration, heretic, rdo
## Step 5: Run Abliteration
### Standard usage
```bash
# Default method (advanced) — recommended for most models
obliteratus obliterate <model_name> --method advanced --output-dir ./abliterated-models
# With 4-bit quantization (saves VRAM)
obliteratus obliterate <model_name> --method advanced --quantization 4bit --output-dir ./abliterated-models
# Large models (70B+) — conservative defaults
obliteratus obliterate <model_name> --method advanced --quantization 4bit --large-model --output-dir ./abliterated-models
```
### Fine-tuning parameters
```bash
obliteratus obliterate <model_name> \
--method advanced \
--direction-method diff_means \
--n-directions 4 \
--refinement-passes 2 \
--regularization 0.1 \
--quantization 4bit \
--output-dir ./abliterated-models \
--contribute # opt-in telemetry for community research
```
### Key flags
| Flag | Description | Default |
|:-----|:------------|:--------|
| `--method` | Abliteration method | advanced |
| `--direction-method` | Direction extraction | diff_means |
| `--n-directions` | Number of refusal directions (1-32) | method-dependent |
| `--refinement-passes` | Iterative passes (1-5) | 2 |
| `--regularization` | Regularization strength (0.0-1.0) | 0.1 |
| `--quantization` | Load in 4bit or 8bit | none (full precision) |
| `--large-model` | Conservative defaults for 120B+ | false |
| `--output-dir` | Where to save the abliterated model | ./obliterated_model |
| `--contribute` | Share anonymized results for research | false |
| `--verify-sample-size` | Number of test prompts for refusal check | 20 |
| `--dtype` | Model dtype (float16, bfloat16) | auto |
### Other execution modes
```bash
# Interactive guided mode (hardware → model → preset)
obliteratus interactive
# Web UI (Gradio)
obliteratus ui --port 7860
# Run a full ablation study from YAML config
obliteratus run config.yaml --preset quick
# Tournament: pit all methods against each other
obliteratus tourney <model_name>
```
## Step 6: Verify Results
After abliteration, check the output metrics:
| Metric | Good Value | Warning |
|:-------|:-----------|:--------|
| Refusal rate | < 5% (ideally ~0%) | > 10% means refusals persist |
| Perplexity change | < 10% increase | > 15% means coherence damage |
| KL divergence | < 0.1 | > 0.5 means significant distribution shift |
| Coherence | High / passes qualitative check | Degraded responses, repetition |
### If refusals persist (> 10%)
1. Try `aggressive` method
2. Increase `--n-directions` (e.g., 8 or 16)
3. Add `--refinement-passes 3`
4. Try `--direction-method svd` instead of diff_means
### If coherence is damaged (perplexity > 15% increase)
1. Reduce `--n-directions` (try 2)
2. Increase `--regularization` (try 0.3)
3. Reduce `--refinement-passes` to 1
4. Try `basic` method (gentler)
## Step 7: Use the Abliterated Model
The output is a standard HuggingFace model directory.
```bash
# Test locally with transformers
python3 -c "
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained('./abliterated-models/<model>')
tokenizer = AutoTokenizer.from_pretrained('./abliterated-models/<model>')
inputs = tokenizer('How do I pick a lock?', return_tensors='pt')
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
"
# Upload to HuggingFace Hub
huggingface-cli upload <username>/<model-name>-abliterated ./abliterated-models/<model>
# Serve with vLLM
vllm serve ./abliterated-models/<model>
```
## CLI Command Reference
| Command | Description |
|:--------|:------------|
| `obliteratus obliterate` | Main abliteration command |
| `obliteratus info <model>` | Print model architecture details |
| `obliteratus models --tier <tier>` | Browse curated models by compute tier |
| `obliteratus recommend <model>` | Telemetry-driven method/param suggestion |
| `obliteratus interactive` | Guided setup wizard |
| `obliteratus tourney <model>` | Tournament: all methods head-to-head |
| `obliteratus run <config.yaml>` | Execute ablation study from YAML |
| `obliteratus strategies` | List all registered ablation strategies |
| `obliteratus report <results.json>` | Regenerate visual reports |
| `obliteratus ui` | Launch Gradio web interface |
| `obliteratus aggregate` | Summarize community telemetry data |
## Analysis Modules
OBLITERATUS includes 28 analysis modules for mechanistic interpretability.
See `skill_view(name="obliteratus", file_path="references/analysis-modules.md")` for the full reference.
### Quick analysis commands
```bash
# Run specific analysis modules
obliteratus run analysis-config.yaml --preset quick
# Key modules to run first:
# - alignment_imprint: Fingerprint DPO/RLHF/CAI/SFT alignment method
# - concept_geometry: Single direction vs polyhedral cone
# - logit_lens: Which layer decides to refuse
# - anti_ouroboros: Self-repair risk score
# - causal_tracing: Causally necessary components
```
### Steering Vectors (Reversible Alternative)
Instead of permanent weight modification, use inference-time steering:
```python
# Python API only — for user's own projects
from obliteratus.analysis.steering_vectors import SteeringVectorFactory, SteeringHookManager
```
## Ablation Strategies
Beyond direction-based abliteration, OBLITERATUS includes structural ablation strategies:
- **Embedding Ablation** — Target embedding layer components
- **FFN Ablation** — Feed-forward network block removal
- **Head Pruning** — Attention head pruning
- **Layer Removal** — Full layer removal
List all available: `obliteratus strategies`
## Evaluation
OBLITERATUS includes built-in evaluation tools:
- Refusal rate benchmarking
- Perplexity comparison (before/after)
- LM Eval Harness integration for academic benchmarks
- Head-to-head competitor comparison
- Baseline performance tracking
## Platform Support
- **CUDA** — Full support (NVIDIA GPUs)
- **Apple Silicon (MLX)** — Supported via MLX backend
- **CPU** — Supported for tiny models (< 1B params)
## YAML Config Templates
Load templates for reproducible runs via `skill_view`:
- `templates/abliteration-config.yaml` — Standard single-model config
- `templates/analysis-study.yaml` — Pre-abliteration analysis study
- `templates/batch-abliteration.yaml` — Multi-model batch processing
## Telemetry
OBLITERATUS can optionally contribute anonymized run data to a global research dataset.
Enable with `--contribute` flag. No personal data is collected — only model name, method, metrics.
## Common Pitfalls
1. **Don't use `informed` as default** — it's experimental and slower. Use `advanced` for reliable results.
2. **Models under ~1B respond poorly to abliteration** — their refusal behaviors are shallow and fragmented, making clean direction extraction difficult. Expect partial results (20-40% remaining refusal). Models 3B+ have cleaner refusal directions and respond much better (often 0% refusal with `advanced`).
3. **`aggressive` can make things worse** — on small models it can damage coherence and actually increase refusal rate. Only use it if `advanced` leaves > 10% refusals on a 3B+ model.
4. **Always check perplexity** — if it spikes > 15%, the model is damaged. Reduce aggressiveness.
5. **MoE models need special handling** — use `nuclear` method for Mixtral, DeepSeek-MoE, etc.
6. **Quantized models can't be re-quantized** — abliterate the full-precision model, then quantize the output.
7. **VRAM estimation is approximate** — 4-bit quant helps but peak usage can spike during extraction.
8. **Reasoning models are sensitive** — use `surgical` for R1 distills to preserve chain-of-thought.
9. **Check `obliteratus recommend`** — telemetry data may have better parameters than defaults.
10. **AGPL license** — never `import obliteratus` in MIT/Apache projects. CLI invocation only.
11. **Large models (70B+)** — always use `--large-model` flag for conservative defaults.
12. **Spectral certification RED is common** — the spectral check often flags "incomplete" even when practical refusal rate is 0%. Check actual refusal rate rather than relying on spectral certification alone.
## Complementary Skills
- **vllm** — Serve abliterated models with high throughput
- **gguf** — Convert abliterated models to GGUF for llama.cpp
- **huggingface-tokenizers** — Work with model tokenizers

View File

@@ -1,166 +0,0 @@
# OBLITERATUS Analysis Modules — Reference
OBLITERATUS includes 28 analysis modules for mechanistic interpretability of refusal in LLMs.
These modules help understand how and where refusal behaviors are encoded before performing abliteration.
---
## Core Analysis (Run These First)
### 1. Alignment Imprint Detection (`alignment_imprint.py`)
Fingerprints whether a model was trained via DPO, RLHF, CAI, or SFT.
This determines which extraction strategy will work best.
### 2. Concept Cone Geometry (`concept_geometry.py`)
Determines if refusal is a single linear direction or a polyhedral cone
(set of multiple mechanisms). Single-direction models respond well to `basic`;
polyhedral models need `advanced` or `surgical`.
### 3. Refusal Logit Lens (`logit_lens.py`)
Identifies the specific layer where a model "decides" to refuse by decoding
intermediate layer representations into token space.
### 4. Ouroboros Detection (`anti_ouroboros.py`)
Identifies if a model attempts to "self-repair" refusal behaviors after
excision. Reports a risk score (0-1). High scores mean additional refinement
passes are needed.
### 5. Causal Tracing (`causal_tracing.py`)
Identifies which components (layers, heads, MLPs) are causally necessary
for refusal behavior using activation patching.
---
## Geometric Analysis
### 6. Cross-Layer Alignment (`cross_layer.py`)
Measures how refusal directions align across different layers. High alignment
means the refusal signal is consistent; low alignment suggests layer-specific
mechanisms.
### 7. Residual Stream Decomposition (`residual_stream.py`)
Decomposes the residual stream into attention and MLP contributions to
understand which component type contributes more to refusal.
### 8. Riemannian Manifold Geometry (`riemannian_manifold.py`)
Analyzes the curvature and geometry of the weight manifold near refusal
directions. Informs how aggressively projections can be applied without
damaging the manifold structure.
### 9. Whitened SVD (`whitened_svd.py`)
Covariance-normalized SVD extraction that separates guardrail signals from
natural activation variance. More precise than standard SVD for models with
high activation variance.
### 10. Concept Cone Geometry (extended)
Maps the full polyhedral structure of refusal, including cone angles,
face counts, and intersection patterns.
---
## Probing & Classification
### 11. Activation Probing (`activation_probing.py`)
Post-excision verification — probes for residual refusal concepts after
abliteration to ensure complete removal.
### 12. Probing Classifiers (`probing_classifiers.py`)
Trains linear classifiers to detect refusal in activations. Used both
before (to verify refusal exists) and after (to verify it's gone).
### 13. Activation Patching (`activation_patching.py`)
Interchange interventions — swaps activations between refused and complied
runs to identify causal components.
### 14. Tuned Lens (`tuned_lens.py`)
Trained version of logit lens that provides more accurate per-layer
decoding by learning affine transformations for each layer.
### 15. Multi-Token Position Analysis (`multi_token_position.py`)
Analyzes refusal signals across multiple token positions, not just the
last token. Important for models that distribute refusal across the sequence.
---
## Abliteration & Manipulation
### 16. SAE-Based Abliteration (`sae_abliteration.py`)
Uses Sparse Autoencoder features to identify and remove specific refusal
features. More surgical than direction-based methods.
### 17. Steering Vectors (`steering_vectors.py`)
Creates and applies inference-time steering vectors for reversible refusal
modification. Includes `SteeringVectorFactory` and `SteeringHookManager`.
### 18. LEACE Concept Erasure (`leace.py`)
Linear Erasure via Closed-form Estimation — mathematically optimal linear
concept removal. Available as both analysis module and direction extraction method.
### 19. Sparse Surgery (`sparse_surgery.py`)
High-precision weight modification targeting individual neurons and
weight matrix entries rather than full directions.
### 20. Conditional Abliteration (`conditional_abliteration.py`)
Targeted removal that only affects specific refusal categories while
preserving others (e.g., remove weapons refusal but keep CSAM refusal).
---
## Transfer & Robustness
### 21. Cross-Model Transfer (`cross_model_transfer.py`)
Tests whether refusal directions extracted from one model transfer to
another architecture. Measures universality of guardrail directions.
### 22. Defense Robustness (`defense_robustness.py`)
Evaluates how robust the abliteration is against various defense mechanisms
and re-alignment attempts.
### 23. Spectral Certification (`spectral_certification.py`)
Provides mathematical bounds on the completeness of refusal removal
using spectral analysis of the projection.
### 24. Wasserstein Optimal Extraction (`wasserstein_optimal.py`)
Uses optimal transport theory for more precise direction extraction
that minimizes distribution shift.
### 25. Wasserstein Transfer (`wasserstein_transfer.py`)
Distribution transfer between models using Wasserstein distance
for cross-architecture refusal direction mapping.
---
## Advanced / Research
### 26. Bayesian Kernel Projection (`bayesian_kernel_projection.py`)
Probabilistic feature mapping that estimates uncertainty in refusal
direction identification.
### 27. Cross-Model Universality Index
Measures if guardrail directions generalize across different model
architectures and training regimes.
### 28. Visualization (`visualization.py`)
Plotting and graphing utilities for all analysis modules. Generates
heatmaps, direction plots, and layer-wise analysis charts.
---
## Running Analysis
### Via CLI
```bash
# Run analysis from a YAML config
obliteratus run analysis-study.yaml --preset quick
# Available study presets:
# quick — Fast sanity check (2-3 modules)
# full — All core + geometric analysis
# jailbreak — Refusal circuit localization
# knowledge — Knowledge preservation analysis
# robustness — Stress testing / defense evaluation
```
### Via YAML Config
See the `templates/analysis-study.yaml` template for a complete example.
Load with: `skill_view(name="obliteratus", file_path="templates/analysis-study.yaml")`

View File

@@ -1,141 +0,0 @@
# OBLITERATUS Methods — Detailed Guide
> The CLI accepts 9 methods via `--method`: basic, advanced, aggressive, spectral_cascade,
> informed, surgical, optimized, inverted, nuclear.
> Four additional methods (failspy, gabliteration, heretic, rdo) are available only via the Python API.
## How Abliteration Works (Theory)
Abliteration identifies a "refusal direction" — a vector in the model's activation space that
corresponds to refusal behavior — and projects it out of the weight matrices.
Mathematically: `W_new = W_old - (W_old @ d @ d.T)` where `d` is the refusal direction.
The key challenge is finding accurate refusal directions without damaging other capabilities.
---
## Direction Extraction Methods
Before projecting, OBLITERATUS extracts refusal directions using one of three methods:
| Method | Flag | Description | Best For |
|:-------|:-----|:------------|:---------|
| Diff-in-Means | `--direction-method diff_means` | Difference between mean activations on refused vs. complied prompts | Default, fast, robust |
| SVD | `--direction-method svd` | Multi-direction extraction via Singular Value Decomposition | Complex alignment, multiple refusal mechanisms |
| LEACE | `--direction-method leace` | Linear Erasure via Closed-form Estimation — mathematically optimal | Maximum precision, research |
---
## Method Details
### basic
- **Directions:** 1 (single diff-in-means vector)
- **Speed:** Fast (~5-10 min for 8B model)
- **Risk:** Low
- **Use case:** Quick tests, prototyping, evaluating if abliteration works for a model
- **How it works:** Extracts one refusal direction and projects it out uniformly across all layers.
### advanced (DEFAULT — RECOMMENDED)
- **Directions:** 4 (multi-direction SVD)
- **Speed:** Medium (~10-20 min for 8B model)
- **Risk:** Low-Medium
- **Refinement passes:** 2
- **Use case:** Default for most models. Well-tested and reliable.
- **How it works:** Extracts multiple refusal directions via SVD, applies norm-preserving bi-projection to maintain weight matrix norms. Two refinement passes catch residual refusal.
### aggressive
- **Directions:** 8+ (whitened SVD + jailbreak-contrastive)
- **Speed:** Medium-Slow
- **Risk:** Medium-High (may damage coherence)
- **Use case:** When `advanced` leaves > 10% refusals. Stubborn models.
- **How it works:** Uses whitened SVD for covariance-normalized extraction, adds jailbreak-contrastive directions, performs attention head surgery on the most refusal-active heads.
### spectral_cascade
- **Speed:** Medium
- **Risk:** Medium
- **Use case:** Research, novel approaches
- **How it works:** DCT (Discrete Cosine Transform) frequency-domain decomposition of refusal signals. Separates high-frequency (surface-level) from low-frequency (deep) refusal patterns.
### informed (EXPERIMENTAL)
- **Speed:** Slow (~20-40 min for 8B model)
- **Risk:** Variable — results depend on analysis quality
- **Use case:** When you want auto-configuration, but be aware this is experimental and may not outperform `advanced`.
- **How it works:** Runs 4 analysis modules first (alignment imprint, concept geometry, logit lens, ouroboros detection), then auto-configures extraction strategy. Includes an "Ouroboros loop" that detects and counteracts self-repair.
- **Note:** The auto-detection can sometimes misconfigure. If results are poor, fall back to `advanced`.
### surgical
- **Speed:** Very slow (~1-2 hrs for 8B model)
- **Risk:** Low (very precise)
- **Use case:** Reasoning models (R1 distills, QwQ, etc.) where chain-of-thought must be preserved.
- **How it works:** Uses SAE (Sparse Autoencoder) features + individual neuron masking + attention head surgery + per-expert decomposition (for MoE). CoT-aware — identifies and protects reasoning-critical directions before projecting.
### optimized
- **Speed:** Very slow (hours — runs many trials)
- **Risk:** Low (finds optimal parameters)
- **Use case:** When quality matters more than speed. Production models.
- **How it works:** Bayesian hyperparameter search via Optuna TPE sampler. Optimizes n_directions, regularization, refinement passes, and layer selection jointly. Evaluates each configuration on refusal rate + perplexity.
### inverted
- **Speed:** Fast
- **Risk:** High (model behavior changes dramatically)
- **Use case:** Research, studying refusal mechanisms
- **How it works:** Instead of projecting out the refusal direction, reflects it. The model actively complies rather than passively not-refusing. Useful for understanding the geometry of alignment.
### nuclear
- **Speed:** Slow
- **Risk:** Medium-High
- **Use case:** Stubborn MoE models (DeepSeek-MoE, Mixtral, etc.)
- **How it works:** Combines expert-granular abliteration (EGA), steering vector injection, attention head pruning, and multi-pass refinement. Decomposes refusal signals into per-expert components for MoE architectures.
---
## Method Selection Flowchart
```
Is this a quick test?
→ YES: basic
→ NO: continue
Is it an MoE model (Mixtral, DeepSeek-MoE)?
→ YES: nuclear
→ NO: continue
Is it a reasoning model (R1, QwQ, CoT-focused)?
→ YES: surgical
→ NO: continue
Do you need the absolute best quality and have time?
→ YES: optimized
→ NO: advanced (recommended default)
Did advanced leave > 10% refusals?
→ YES: aggressive
→ Still refusing: nuclear
```
---
## Key Parameters
| Parameter | Range | Default | Effect |
|:----------|:------|:--------|:-------|
| `--n-directions` | 1-32 | method-dependent | More directions = more complete removal, but higher damage risk |
| `--regularization` | 0.0-1.0 | 0.1 | Higher = more conservative (less removal, less damage) |
| `--refinement-passes` | 1-5 | 2 | More passes catch residual refusal, but diminishing returns |
| `--quantization` | 4bit, 8bit | none | Reduces VRAM usage; quality impact minimal for extraction |
| `--verify-sample-size` | 10-200 | 20 | More samples = more accurate refusal rate estimate |
---
## Troubleshooting
| Problem | Likely Cause | Fix |
|:--------|:-------------|:----|
| Refusal rate > 20% | Too few directions | Increase `--n-directions`, try `aggressive` |
| Refusal rate 5-20% | Residual refusal | Add `--refinement-passes 3`, try `--direction-method svd` |
| Perplexity spike > 20% | Over-aggressive removal | Reduce `--n-directions`, increase `--regularization` |
| Repetitive output | Weight matrix damage | Use `basic` with fewer directions, check norm preservation |
| MoE model still refuses | Non-expert-aware method | Switch to `nuclear` |
| Reasoning degraded | CoT directions damaged | Use `surgical` method |
| OOM during extraction | Insufficient VRAM | Add `--quantization 4bit` and/or `--large-model` |

Some files were not shown because too many files have changed in this diff Show More