refactor: extract codex_responses logic into dedicated adapter

Move 10 Responses API format-conversion and normalization functions from run_agent.py into agent/codex_responses_adapter.py. All functions are now stateless module-level functions with zero self references. The AIAgent methods remain as thin one-line wrappers that delegate to the adapter, so all callers (tests, gateway, CLI) are unchanged. Functions extracted: - _deterministic_call_id: deterministic tool call ID generation - _split_responses_tool_id: composite ID splitting - _derive_responses_function_call_id: call_ to fc_ prefix conversion - _responses_tools: chat completions tool schema → Responses format - _chat_messages_to_responses_input: message format conversion - _preflight_codex_input_items: input item normalization - _preflight_codex_api_kwargs: API kwargs validation/cleaning - _extract_responses_message_text: text extraction from response items - _extract_responses_reasoning_text: reasoning extraction - _normalize_codex_response: full response normalization This brings codex_responses in line with anthropic_adapter.py and bedrock_adapter.py which already have their own adapter files. run_agent.py: 12410 → 11845 lines (-565 net)
2026-04-20 16:32:43 +05:30
1163 changed files with 15703 additions and 50091 deletions
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,5 +0,0 @@
-# hermes_agent package restructure (PR 1/3)
-# Commit 2: pure git mv — all source files into hermes_agent/
-65ca3ba93b3fa7fd2b15af5b62d54020061f3672
-# Commit 3: rewrite all imports for hermes_agent package
-4b16341975a1217588054f567d0f76dc5a3cc481
--- a/.github/actions/nix-setup/action.yml
+++ b/.github/actions/nix-setup/action.yml
@@ -1,8 +0,0 @@
-name: 'Setup Nix'
-description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
-
-runs:
-  using: composite
-  steps:
-    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
-    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
--- a/.github/workflows/nix-lockfile-check.yml
+++ b/.github/workflows/nix-lockfile-check.yml
@@ -1,68 +0,0 @@
-name: Nix Lockfile Check
-
-on:
-  pull_request:
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-check-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check:
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Resolve head SHA
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
-      - name: Check lockfile hashes
-        id: check
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      - name: Post sticky PR comment (stale)
-        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
-
-      - name: Clear sticky PR comment (resolved)
-        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Fail if stale
-        if: steps.check.outputs.stale == 'true'
-        run: exit 1
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -1,149 +0,0 @@
-name: Nix Lockfile Fix
-
-on:
-  workflow_dispatch:
-    inputs:
-      pr_number:
-        description: 'PR number to fix (leave empty to run on the selected branch)'
-        required: false
-        type: string
-  issue_comment:
-    types: [edited]
-
-permissions:
-  contents: write
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
-  cancel-in-progress: false
-
-jobs:
-  fix:
-    # Run on manual dispatch OR when a task-list checkbox in the sticky
-    # lockfile-check comment flips from `[ ]` to `[x]`.
-    if: |
-      github.event_name == 'workflow_dispatch' ||
-      (github.event_name == 'issue_comment'
-       && github.event.issue.pull_request != null
-       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
-       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Authorize & resolve PR
-        id: resolve
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
-        with:
-          script: |
-            // 1. Verify the actor has write access — applies to both checkbox
-            //    clicks and manual dispatch.
-            const { data: perm } =
-              await github.rest.repos.getCollaboratorPermissionLevel({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                username: context.actor,
-              });
-            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
-              core.setFailed(
-                `${context.actor} lacks write access (has: ${perm.permission})`
-              );
-              return;
-            }
-
-            // 2. Resolve which ref to check out.
-            let prNumber = '';
-            if (context.eventName === 'issue_comment') {
-              prNumber = String(context.payload.issue.number);
-            } else if (context.eventName === 'workflow_dispatch') {
-              prNumber = context.payload.inputs.pr_number || '';
-            }
-
-            if (!prNumber) {
-              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
-              core.setOutput('repo', context.repo.repo);
-              core.setOutput('owner', context.repo.owner);
-              core.setOutput('pr', '');
-              return;
-            }
-
-            const { data: pr } = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: Number(prNumber),
-            });
-            core.setOutput('ref', pr.head.ref);
-            core.setOutput('repo', pr.head.repo.name);
-            core.setOutput('owner', pr.head.repo.owner.login);
-            core.setOutput('pr', String(pr.number));
-
-      # Wipe the sticky lockfile-check comment to a "running" state as soon
-      # as the job is authorized, so the user sees their click was picked up
-      # before the ~minute of nix build work.
-      - name: Mark sticky as running
-        if: steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### 🔄 Applying lockfile fix…
-
-            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-        with:
-          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
-          ref: ${{ steps.resolve.outputs.ref }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          fetch-depth: 0
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles -- --apply
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/tui.nix nix/web.nix
-          git commit -m "fix(nix): refresh npm lockfile hashes"
-          git push
-
-      - name: Update sticky (applied)
-        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile fix applied
-
-            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (already current)
-        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile hashes already current
-
-            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (failed)
-        if: failure() && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ❌ Lockfile fix failed
-
-            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -4,6 +4,15 @@ on:
  push:
    branches: [main]
  pull_request:
+    paths:
+      - 'flake.nix'
+      - 'flake.lock'
+      - 'nix/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'hermes_cli/**'
+      - 'run_agent.py'
+      - 'acp_adapter/**'

 permissions:
  contents: read
@@ -20,8 +29,9 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-      - uses: ./.github/actions/nix-setup
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
+      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
      - name: Check flake
        if: runner.os == 'Linux'
        run: nix flake check --print-build-logs
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -12,59 +12,68 @@ source venv/bin/activate  # ALWAYS activate before running Python

 ```
 hermes-agent/
-├── hermes_agent/             # Single installable package
-│   ├── agent/                # Core conversation loop and agent internals
-│   │   ├── loop.py               # AIAgent class — core conversation loop
-│   │   ├── prompt_builder.py     # System prompt assembly
-│   │   ├── context/              # Context management (engine, compressor, references)
-│   │   ├── memory/               # Memory management (manager, provider)
-│   │   ├── image_gen/            # Image generation (provider, registry)
-│   │   ├── display.py            # KawaiiSpinner, tool preview formatting
-│   │   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
-│   │   └── trajectory.py         # Trajectory saving helpers
-│   ├── providers/            # LLM provider adapters and transports
-│   │   ├── anthropic_adapter.py  # Anthropic adapter
-│   │   ├── anthropic_transport.py # Anthropic transport
-│   │   ├── metadata.py           # Model context lengths, token estimation
-│   │   ├── auxiliary.py           # Auxiliary LLM client (vision, summarization)
-│   │   ├── caching.py            # Anthropic prompt caching
-│   │   └── credential_pool.py    # Credential management
-│   ├── tools/                # Tool implementations
-│   │   ├── dispatch.py           # Tool orchestration, discover_builtin_tools()
-│   │   ├── toolsets.py           # Toolset definitions
-│   │   ├── registry.py           # Central tool registry
-│   │   ├── terminal.py           # Terminal orchestration
-│   │   ├── browser/              # Browser tools (tool, cdp, camofox, providers/)
-│   │   ├── mcp/                  # MCP client and server
-│   │   ├── skills/               # Skill management (manager, tool, hub, guard, sync)
-│   │   ├── media/                # Voice, TTS, transcription, image gen
-│   │   ├── files/                # File operations (tools, operations, state)
-│   │   └── security/             # Path security, URL safety, approval
-│   ├── backends/             # Terminal backends (local, docker, ssh, modal, daytona, singularity)
-│   ├── cli/                  # CLI subcommands and setup
-│   │   ├── main.py               # Entry point — all `hermes` subcommands
-│   │   ├── repl.py               # HermesCLI class — interactive CLI orchestrator
-│   │   ├── config.py             # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
-│   │   ├── commands.py           # Slash command definitions
-│   │   ├── auth/                 # Provider credential resolution
-│   │   ├── models/               # Model catalog, provider lists, switching
-│   │   └── ui/                   # Banner, colors, skin engine, callbacks, tips
-│   ├── gateway/              # Messaging platform gateway
-│   │   ├── run.py                # Main loop, slash commands, message dispatch
-│   │   ├── session.py            # SessionStore — conversation persistence
-│   │   └── platforms/            # Adapters: telegram, discord, slack, whatsapp, etc.
-│   ├── acp/                  # ACP server (VS Code / Zed / JetBrains integration)
-│   ├── cron/                 # Scheduler (jobs.py, scheduler.py)
-│   ├── plugins/              # Plugin system (memory providers, context engines)
-│   ├── constants.py          # Shared constants
-│   ├── state.py              # SessionDB — SQLite session store
-│   ├── logging.py            # Logging configuration
-│   └── utils.py              # Shared utilities
-├── tui_gateway/          # Python JSON-RPC backend for the TUI
+├── run_agent.py          # AIAgent class — core conversation loop
+├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
+├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
+├── cli.py                # HermesCLI class — interactive CLI orchestrator
+├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
+├── agent/                # Agent internals
+│   ├── prompt_builder.py     # System prompt assembly
+│   ├── context_compressor.py # Auto context compression
+│   ├── prompt_caching.py     # Anthropic prompt caching
+│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
+│   ├── model_metadata.py     # Model context lengths, token estimation
+│   ├── models_dev.py         # models.dev registry integration (provider-aware context)
+│   ├── display.py            # KawaiiSpinner, tool preview formatting
+│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
+│   └── trajectory.py         # Trajectory saving helpers
+├── hermes_cli/           # CLI subcommands and setup
+│   ├── main.py           # Entry point — all `hermes` subcommands
+│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
+│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
+│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
+│   ├── setup.py          # Interactive setup wizard
+│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
+│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
+│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
+│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
+│   ├── models.py         # Model catalog, provider model lists
+│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
+│   └── auth.py           # Provider credential resolution
+├── tools/                # Tool implementations (one file per tool)
+│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
+│   ├── approval.py       # Dangerous command detection
+│   ├── terminal_tool.py  # Terminal orchestration
+│   ├── process_registry.py # Background process management
+│   ├── file_tools.py     # File read/write/search/patch
+│   ├── web_tools.py      # Web search/extract (Parallel + Firecrawl)
+│   ├── browser_tool.py   # Browserbase browser automation
+│   ├── code_execution_tool.py # execute_code sandbox
+│   ├── delegate_tool.py  # Subagent delegation
+│   ├── mcp_tool.py       # MCP client (~1050 lines)
+│   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
+├── gateway/              # Messaging platform gateway
+│   ├── run.py            # Main loop, slash commands, message dispatch
+│   ├── session.py        # SessionStore — conversation persistence
+│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
+│   ├── src/entry.tsx        # TTY gate + render()
+│   ├── src/app.tsx          # Main state machine and UI
+│   ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
+│   ├── src/app/             # Decomposed app logic (event handler, slash handler, stores, hooks)
+│   ├── src/components/      # Ink components (branding, markdown, prompts, pickers, etc.)
+│   ├── src/hooks/           # useCompletion, useInputHistory, useQueue, useVirtualHistory
+│   └── src/lib/             # Pure helpers (history, osc52, text, rpc, messages)
+├── tui_gateway/          # Python JSON-RPC backend for the TUI
+│   ├── entry.py             # stdio entrypoint
+│   ├── server.py            # RPC handlers and session logic
+│   ├── render.py            # Optional rich/ANSI bridge
+│   └── slash_worker.py      # Persistent HermesCLI subprocess for slash commands
+├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
+├── cron/                 # Scheduler (jobs.py, scheduler.py)
 ├── environments/         # RL training environments (Atropos)
-├── tests/                # Pytest suite
-└── web/                  # Vite + React web dashboard
+├── tests/                # Pytest suite (~3000 tests)
+└── batch_runner.py       # Parallel batch processing
 ```

 **User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
@@ -72,18 +81,18 @@ hermes-agent/
 ## File Dependency Chain

 ```
-hermes_agent/tools/registry.py  (no deps — imported by all tool files)
+tools/registry.py  (no deps — imported by all tool files)
       ↑
-hermes_agent/tools/*.py  (each calls registry.register() at import time)
+tools/*.py  (each calls registry.register() at import time)
       ↑
-hermes_agent/tools/dispatch.py  (imports registry + triggers tool discovery)
+model_tools.py  (imports tools/registry + triggers tool discovery)
       ↑
-hermes_agent/agent/loop.py, hermes_agent/cli/repl.py, environments/
+run_agent.py, cli.py, batch_runner.py, environments/
 ```

 ---

-## AIAgent Class (hermes_agent/agent/loop.py)
+## AIAgent Class (run_agent.py)

 ```python
 class AIAgent:
@@ -129,14 +138,14 @@ Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Re

 ---

-## CLI Architecture (hermes_agent/cli/repl.py)
+## CLI Architecture (cli.py)

 - **Rich** for banner/panels, **prompt_toolkit** for input with autocomplete
- **KawaiiSpinner** (`hermes_agent/agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results
- `load_cli_config()` in repl.py merges hardcoded defaults + user config YAML
- **Skin engine** (`hermes_agent/cli/ui/skin_engine.py`) — data-driven CLI theming; initialized from `display.skin` config key at startup; skins customize banner colors, spinner faces/verbs/wings, tool prefix, response box, branding text
+- **KawaiiSpinner** (`agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results
+- `load_cli_config()` in cli.py merges hardcoded defaults + user config YAML
+- **Skin engine** (`hermes_cli/skin_engine.py`) — data-driven CLI theming; initialized from `display.skin` config key at startup; skins customize banner colors, spinner faces/verbs/wings, tool prefix, response box, branding text
 - `process_command()` is a method on `HermesCLI` — dispatches on canonical command name resolved via `resolve_command()` from the central registry
- Skill slash commands: `hermes_agent/agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching
+- Skill slash commands: `agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching

 ### Slash Command Registry (`hermes_cli/commands.py`)

@@ -263,7 +272,7 @@ registry.register(

 **2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.

-Auto-discovery: any `hermes_agent/tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
+Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.

 The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.

@@ -489,11 +498,11 @@ Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) inst
 ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
 Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.

-### `_last_resolved_tool_names` is a process-global in `hermes_agent/tools/dispatch.py`
+### `_last_resolved_tool_names` is a process-global in `model_tools.py`
 `_run_single_child()` in `delegate_tool.py` saves and restores this global around subagent execution. If you add new code that reads this global, be aware it may be temporarily stale during child agent runs.

 ### DO NOT hardcode cross-tool references in schema descriptions
-Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `hermes_agent/tools/dispatch.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
+Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.

 ### Tests must not write to `~/.hermes/`
 The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
@@ -557,52 +566,3 @@ python -m pytest tests/ -q -n 4
 Worker count above 4 will surface test-ordering flakes that CI never sees.

 Always run the full suite before pushing changes.
-
-### Don't write change-detector tests
-
-A test is a **change-detector** if it fails whenever data that is **expected
-to change** gets updated — model catalogs, config version numbers,
-enumeration counts, hardcoded lists of provider models. These tests add no
-behavioral coverage; they just guarantee that routine source updates break
-CI and cost engineering time to "fix."
-
-**Do not write:**
-
-```python
-# catalog snapshot — breaks every model release
-assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
-assert "MiniMax-M2.7" in models
-
-# config version literal — breaks every schema bump
-assert DEFAULT_CONFIG["_config_version"] == 21
-
-# enumeration count — breaks every time a skill/provider is added
-assert len(_PROVIDER_MODELS["huggingface"]) == 8
-```
-
-**Do write:**
-
-```python
-# behavior: does the catalog plumbing work at all?
-assert "gemini" in _PROVIDER_MODELS
-assert len(_PROVIDER_MODELS["gemini"]) >= 1
-
-# behavior: does migration bump the user's version to current latest?
-assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
-
-# invariant: no plan-only model leaks into the legacy list
-assert not (set(moonshot_models) & coding_plan_only_models)
-
-# invariant: every model in the catalog has a context-length entry
-for m in _PROVIDER_MODELS["huggingface"]:
-    assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
-```
-
-The rule: if the test reads like a snapshot of current data, delete it. If
-it reads like a contract about how two pieces of data must relate, keep it.
-When a PR adds a new provider/model and you want a test, make the test
-assert the relationship (e.g. "catalog entries all have context lengths"),
-not the specific names.
-
-Reviewers should reject new change-detector tests; authors should convert
-them into invariants before re-requesting review.
--- a/6
+++ b/6
@@ -27,10 +27,12 @@ WORKDIR /opt/hermes
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
+COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
 COPY web/package.json web/package-lock.json web/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
+    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
    (cd web && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

@@ -38,7 +40,7 @@ RUN npm install --prefer-offline --no-audit && \
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

-# Build web dashboard (Vite outputs to hermes_agent/cli/web_dist/)
+# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
 RUN cd web && npm run build

 # ---------- Python virtualenv ----------
@@ -48,7 +50,7 @@ RUN uv venv && \
    uv pip install --no-cache-dir -e ".[all]"

 # ---------- Runtime ----------
-ENV HERMES_WEB_DIST=/opt/hermes/hermes_agent/cli/web_dist
+ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,3 @@
-graft hermes_agent
 graft skills
 graft optional-skills
 global-exclude __pycache__
--- a/hermes_agent/acp/init.py
+++ b/hermes_agent/acp/init.py
--- a/hermes_agent/acp/main.py
+++ b/hermes_agent/acp/main.py
--- a/hermes_agent/acp/auth.py
+++ b/hermes_agent/acp/auth.py
@@ -8,7 +8,7 @@ from typing import Optional
 def detect_provider() -> Optional[str]:
    """Resolve the active Hermes runtime provider, or None if unavailable."""
    try:
-        from hermes_agent.cli.runtime_provider import resolve_runtime_provider
+        from hermes_cli.runtime_provider import resolve_runtime_provider
        runtime = resolve_runtime_provider()
        api_key = runtime.get("api_key")
        provider = runtime.get("provider")
--- a/hermes_agent/acp/entry.py
+++ b/hermes_agent/acp/entry.py
@@ -17,7 +17,7 @@ import asyncio
 import logging
 import sys
 from pathlib import Path
-from hermes_agent.constants import get_hermes_home
+from hermes_constants import get_hermes_home


 # Methods clients send as periodic liveness probes. They are not part of the
@@ -83,7 +83,7 @@ def _setup_logging() -> None:

 def _load_env() -> None:
    """Load .env from HERMES_HOME (default ``~/.hermes``)."""
-    from hermes_agent.cli.env_loader import load_hermes_dotenv
+    from hermes_cli.env_loader import load_hermes_dotenv

    hermes_home = get_hermes_home()
    loaded = load_hermes_dotenv(hermes_home=hermes_home)
@@ -104,6 +104,11 @@ def main() -> None:
    logger = logging.getLogger(__name__)
    logger.info("Starting hermes-agent ACP adapter")

+    # Ensure the project root is on sys.path so ``from run_agent import AIAgent`` works
+    project_root = str(Path(__file__).resolve().parent.parent)
+    if project_root not in sys.path:
+        sys.path.insert(0, project_root)
+
    import acp
    from .server import HermesACPAgent

--- a/hermes_agent/acp/events.py
+++ b/hermes_agent/acp/events.py
@@ -88,7 +88,7 @@ def make_tool_progress_cb(
        snapshot = None
        if name in {"write_file", "patch", "skill_manage"}:
            try:
-                from hermes_agent.agent.display import capture_local_edit_snapshot
+                from agent.display import capture_local_edit_snapshot

                snapshot = capture_local_edit_snapshot(name, args)
            except Exception:
--- a/hermes_agent/acp/permissions.py
+++ b/hermes_agent/acp/permissions.py
@@ -63,9 +63,6 @@ def make_approval_callback(
            logger.warning("Permission request timed out or failed: %s", exc)
            return "deny"

-        if response is None:
-            return "deny"
-
        outcome = response.outcome
        if isinstance(outcome, AllowedOutcome):
            option_id = outcome.option_id
--- a/hermes_agent/acp/server.py
+++ b/hermes_agent/acp/server.py
@@ -4,7 +4,6 @@ from __future__ import annotations

 import asyncio
 import logging
-import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -52,31 +51,26 @@ try:
 except ImportError:
    from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]

-from hermes_agent.acp.auth import detect_provider
-from hermes_agent.acp.events import (
+from acp_adapter.auth import detect_provider, has_provider
+from acp_adapter.events import (
    make_message_cb,
    make_step_cb,
    make_thinking_cb,
    make_tool_progress_cb,
 )
-from hermes_agent.acp.permissions import make_approval_callback
-from hermes_agent.acp.session import SessionManager, SessionState
+from acp_adapter.permissions import make_approval_callback
+from acp_adapter.session import SessionManager, SessionState

 logger = logging.getLogger(__name__)

 try:
-    from hermes_agent.cli import __version__ as HERMES_VERSION
+    from hermes_cli import __version__ as HERMES_VERSION
 except Exception:
    HERMES_VERSION = "0.0.0"

 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")

-# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
-# does not expose a client-side limit, so this is a fixed cap that clients
-# paginate against using `cursor` / `next_cursor`.
-_LIST_SESSIONS_PAGE_SIZE = 50
-

 def _extract_text(
    prompt: list[
@@ -172,7 +166,7 @@ class HermesACPAgent(acp.Agent):
        provider = getattr(state.agent, "provider", None) or detect_provider() or "openrouter"

        try:
-            from hermes_agent.cli.models.models import curated_models_for_provider, normalize_provider, provider_label
+            from hermes_cli.models import curated_models_for_provider, normalize_provider, provider_label

            normalized_provider = normalize_provider(provider)
            provider_name = provider_label(normalized_provider)
@@ -235,7 +229,7 @@ class HermesACPAgent(acp.Agent):
        new_model = raw_model.strip()

        try:
-            from hermes_agent.cli.models.models import detect_provider_for_model, parse_model_input
+            from hermes_cli.models import detect_provider_for_model, parse_model_input

            target_provider, new_model = parse_model_input(new_model, current_provider)
            if target_provider == current_provider:
@@ -257,7 +251,7 @@ class HermesACPAgent(acp.Agent):
            return

        try:
-            from hermes_agent.tools.mcp.tool import register_mcp_servers
+            from tools.mcp_tool import register_mcp_servers

            config_map: dict[str, dict] = {}
            for server in mcp_servers:
@@ -285,7 +279,7 @@ class HermesACPAgent(acp.Agent):
            return

        try:
-            from hermes_agent.tools.dispatch import get_tool_definitions
+            from model_tools import get_tool_definitions

            enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
@@ -357,18 +351,9 @@ class HermesACPAgent(acp.Agent):
        )

    async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        # Only accept authenticate() calls whose method_id matches the
-        # provider we advertised in initialize(). Without this check,
-        # authenticate() would acknowledge any method_id as long as the
-        # server has provider credentials configured — harmless under
-        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
-        # API hygiene and confusing if ACP ever grows multi-method auth.
-        provider = detect_provider()
-        if not provider:
-            return None
-        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
-            return None
-        return AuthenticateResponse()
+        if has_provider():
+            return AuthenticateResponse()
+        return None

    # ---- Session management -------------------------------------------------

@@ -452,28 +437,7 @@ class HermesACPAgent(acp.Agent):
        cwd: str | None = None,
        **kwargs: Any,
    ) -> ListSessionsResponse:
-        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
-
-        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
-        normalizes and filters by working directory. ``cursor`` is a ``session_id``
-        previously returned as ``next_cursor``; results resume after that entry.
-        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
-        results remain, ``next_cursor`` is set to the last returned ``session_id``.
-        """
        infos = self.session_manager.list_sessions(cwd=cwd)
-
-        if cursor:
-            for idx, s in enumerate(infos):
-                if s["session_id"] == cursor:
-                    infos = infos[idx + 1:]
-                    break
-            else:
-                # Unknown cursor -> empty page (do not fall back to full list).
-                infos = []
-
-        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
-        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
-
        sessions = []
        for s in infos:
            updated_at = s.get("updated_at")
@@ -487,9 +451,7 @@ class HermesACPAgent(acp.Agent):
                    updated_at=updated_at,
                )
            )
-
-        next_cursor = sessions[-1].session_id if has_more and sessions else None
-        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
+        return ListSessionsResponse(sessions=sessions)

    # ---- Prompt (core) ------------------------------------------------------

@@ -555,32 +517,15 @@ class HermesACPAgent(acp.Agent):
        agent.step_callback = step_cb
        agent.message_callback = message_cb

-        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
-        # Set it INSIDE _run_agent so the TLS write happens in the executor
-        # thread — setting it here would write to the event-loop thread's TLS,
-        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
-        # takes the CLI-interactive path (which calls the registered
-        # callback via prompt_dangerous_approval) instead of the
-        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
-        # ACP's conn.request_permission maps cleanly to the interactive
-        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
-        # which requires a notify_cb registered in _gateway_notify_cbs.
-        previous_approval_cb = None
-        previous_interactive = None
+        if approval_cb:
+            try:
+                from tools import terminal_tool as _terminal_tool
+                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
+                _terminal_tool.set_approval_callback(approval_cb)
+            except Exception:
+                logger.debug("Could not set ACP approval callback", exc_info=True)

        def _run_agent() -> dict:
-            nonlocal previous_approval_cb, previous_interactive
-            if approval_cb:
-                try:
-                    from hermes_agent.tools import terminal as _terminal_tool
-                    previous_approval_cb = _terminal_tool._get_approval_callback()
-                    _terminal_tool.set_approval_callback(approval_cb)
-                except Exception:
-                    logger.debug("Could not set ACP approval callback", exc_info=True)
-            # Signal to tools.approval that we have an interactive callback
-            # and the non-interactive auto-approve path must not fire.
-            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
-            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
                    user_message=user_text,
@@ -592,14 +537,9 @@ class HermesACPAgent(acp.Agent):
                logger.exception("Agent error in session %s", session_id)
                return {"final_response": f"Error: {e}", "messages": state.history}
            finally:
-                # Restore HERMES_INTERACTIVE.
-                if previous_interactive is None:
-                    os.environ.pop("HERMES_INTERACTIVE", None)
-                else:
-                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                if approval_cb:
                    try:
-                        from hermes_agent.tools import terminal as _terminal_tool
+                        from tools import terminal_tool as _terminal_tool
                        _terminal_tool.set_approval_callback(previous_approval_cb)
                    except Exception:
                        logger.debug("Could not restore approval callback", exc_info=True)
@@ -618,7 +558,7 @@ class HermesACPAgent(acp.Agent):
        final_response = result.get("final_response", "")
        if final_response:
            try:
-                from hermes_agent.agent.title_generator import maybe_auto_title
+                from agent.title_generator import maybe_auto_title

                maybe_auto_title(
                    self.session_manager._get_db(),
@@ -673,8 +613,8 @@ class HermesACPAgent(acp.Agent):
            await self._conn.session_update(
                session_id=session_id,
                update=AvailableCommandsUpdate(
-                    session_update="available_commands_update",
-                    available_commands=self._available_commands(),
+                    sessionUpdate="available_commands_update",
+                    availableCommands=self._available_commands(),
                ),
            )
        except Exception:
@@ -753,7 +693,7 @@ class HermesACPAgent(acp.Agent):

    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
-            from hermes_agent.tools.dispatch import get_tool_definitions
+            from model_tools import get_tool_definitions
            toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
            if not tools:
@@ -804,7 +744,7 @@ class HermesACPAgent(acp.Agent):
            if not hasattr(agent, "_compress_context"):
                return "Context compression not available for this agent."

-            from hermes_agent.providers.metadata import estimate_messages_tokens_rough
+            from agent.model_metadata import estimate_messages_tokens_rough

            original_count = len(state.history)
            approx_tokens = estimate_messages_tokens_rough(state.history)
--- a/hermes_agent/acp/session.py
+++ b/hermes_agent/acp/session.py
@@ -8,7 +8,7 @@ history.
 """
 from __future__ import annotations

-from hermes_agent.constants import get_hermes_home
+from hermes_constants import get_hermes_home

 import copy
 import json
@@ -100,7 +100,7 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
    if not task_id:
        return
    try:
-        from hermes_agent.tools.terminal import register_task_env_overrides
+        from tools.terminal_tool import register_task_env_overrides
        register_task_env_overrides(task_id, {"cwd": cwd})
    except Exception:
        logger.debug("Failed to register ACP task cwd override", exc_info=True)
@@ -111,7 +111,7 @@ def _clear_task_cwd(task_id: str) -> None:
    if not task_id:
        return
    try:
-        from hermes_agent.tools.terminal import clear_task_env_overrides
+        from tools.terminal_tool import clear_task_env_overrides
        clear_task_env_overrides(task_id)
    except Exception:
        logger.debug("Failed to clear ACP task cwd override", exc_info=True)
@@ -355,7 +355,7 @@ class SessionManager:
        if self._db_instance is not None:
            return self._db_instance
        try:
-            from hermes_agent.state import SessionDB
+            from hermes_state import SessionDB
            hermes_home = get_hermes_home()
            self._db_instance = SessionDB(db_path=hermes_home / "state.db")
            return self._db_instance
@@ -523,9 +523,9 @@ class SessionManager:
        if self._agent_factory is not None:
            return self._agent_factory()

-        from hermes_agent.agent.loop import AIAgent
-        from hermes_agent.cli.config import load_config
-        from hermes_agent.cli.runtime_provider import resolve_runtime_provider
+        from run_agent import AIAgent
+        from hermes_cli.config import load_config
+        from hermes_cli.runtime_provider import resolve_runtime_provider

        config = load_config()
        model_cfg = config.get("model")
--- a/hermes_agent/acp/tools.py
+++ b/hermes_agent/acp/tools.py
@@ -103,7 +103,7 @@ def _build_patch_mode_content(patch_text: str) -> List[Any]:
        return [acp.tool_content(acp.text_block(""))]

    try:
-        from hermes_agent.tools.patch_parser import OperationType, parse_v4a_patch
+        from tools.patch_parser import OperationType, parse_v4a_patch

        operations, error = parse_v4a_patch(patch_text)
        if error or not operations:
@@ -243,7 +243,7 @@ def _build_tool_complete_content(

    if tool_name in {"write_file", "patch", "skill_manage"}:
        try:
-            from hermes_agent.agent.display import extract_edit_diff
+            from agent.display import extract_edit_diff

            diff_text = extract_edit_diff(
                tool_name,
--- a/hermes_agent/agent/init.py
+++ b/hermes_agent/agent/init.py
--- a/hermes_agent/providers/anthropic_adapter.py
+++ b/hermes_agent/providers/anthropic_adapter.py
@@ -16,10 +16,9 @@ import logging
 import os
 from pathlib import Path

-from hermes_agent.constants import get_hermes_home
+from hermes_constants import get_hermes_home
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
-from hermes_agent.utils import normalize_proxy_env_vars

 try:
    import anthropic as _anthropic_sdk
@@ -266,14 +265,6 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    return True  # Any other endpoint is a third-party proxy


-def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
-    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
-    normalized = _normalize_base_url_text(base_url)
-    if not normalized:
-        return False
-    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
-
-
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -301,7 +292,7 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional[float] = None):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -317,9 +308,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional
            "The 'anthropic' package is required for the Anthropic provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
-
-    normalize_proxy_env_vars()
-
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
@@ -331,18 +319,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional
        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

-    if _is_kimi_coding_endpoint(base_url):
-        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
-        # to be recognized as a valid Coding Agent. Without it, returns 403.
-        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
-        kwargs["api_key"] = api_key
-        kwargs["default_headers"] = {
-            "User-Agent": "claude-code/0.1.0",
-            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
-        }
-    elif _requires_bearer_auth(normalized_base_url):
+    if _requires_bearer_auth(normalized_base_url):
        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer *** for regular API keys. Route those endpoints
+        # Authorization: Bearer even for regular API keys. Route those endpoints
        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
        # Check this before OAuth token shape detection because MiniMax secrets do
        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1426,25 +1405,11 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
-    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
-    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
-    # validates the message history and requires every prior assistant
-    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
-    # Anthropic path never populates that field, and
-    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
-    # on third-party endpoints — so the request fails with HTTP 400
-    # "thinking is enabled but reasoning_content is missing in assistant
-    # tool call message at index N".  Kimi's reasoning is driven server-side
-    # on the /coding route, so skip Anthropic's thinking parameter entirely
-    # for that host.  (Kimi on chat_completions enables thinking via
-    # extra_body in the ChatCompletionsTransport — see #13503.)
-    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
-    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
+    if reasoning_config and isinstance(reasoning_config, dict):
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
@@ -1560,42 +1525,3 @@ def normalize_anthropic_response(
        ),
        finish_reason,
    )
-
-
-def normalize_anthropic_response_v2(
-    response,
-    strip_tool_prefix: bool = False,
-) -> "NormalizedResponse":
-    """Normalize Anthropic response to NormalizedResponse.
-
-    Wraps the existing normalize_anthropic_response() and maps its output
-    to the shared transport types.  This allows incremental migration —
-    one call site at a time — without changing the original function.
-    """
-    from hermes_agent.providers.types import NormalizedResponse, build_tool_call
-
-    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
-
-    tool_calls = None
-    if assistant_msg.tool_calls:
-        tool_calls = [
-            build_tool_call(
-                id=tc.id,
-                name=tc.function.name,
-                arguments=tc.function.arguments,
-            )
-            for tc in assistant_msg.tool_calls
-        ]
-
-    provider_data = {}
-    if getattr(assistant_msg, "reasoning_details", None):
-        provider_data["reasoning_details"] = assistant_msg.reasoning_details
-
-    return NormalizedResponse(
-        content=assistant_msg.content,
-        tool_calls=tool_calls,
-        finish_reason=finish_reason,
-        reasoning=getattr(assistant_msg, "reasoning", None),
-        usage=None,  # Anthropic usage is on the raw response, not the normaliser
-        provider_data=provider_data or None,
-    )
--- a/hermes_agent/providers/auxiliary.py
+++ b/hermes_agent/providers/auxiliary.py
@@ -41,17 +41,13 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

-if TYPE_CHECKING:
-    from hermes_agent.providers.gemini_adapter import GeminiNativeClient
-
-from hermes_agent.providers.credential_pool import load_pool
-from hermes_agent.cli.config import get_hermes_home
-from hermes_agent.constants import OPENROUTER_BASE_URL
-from hermes_agent.utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
+from agent.credential_pool import load_pool
+from hermes_cli.config import get_hermes_home
+from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

@@ -99,37 +95,84 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
    return _PROVIDER_ALIASES.get(normalized, normalized)


-# Sentinel: when returned by _fixed_temperature_for_model(), callers must
-# strip the ``temperature`` key from API kwargs entirely so the provider's
-# server-side default applies.  Kimi/Moonshot models manage temperature
-# internally — sending *any* value (even the "correct" one) can conflict
-# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
-OMIT_TEMPERATURE: object = object()
+_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
+    "kimi-for-coding": 0.6,
+}

+# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
+# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
+# value 0.6.  Any other value will result in an error."  The same lock applies
+# to the other k2.* models served on that endpoint.  Enumerated explicitly so
+# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
+# the standard chat API and third parties) are NOT clamped.
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
+_KIMI_INSTANT_MODELS: frozenset = frozenset({
+    "kimi-k2.5",
+    "kimi-k2-turbo-preview",
+    "kimi-k2-0905-preview",
+})
+_KIMI_THINKING_MODELS: frozenset = frozenset({
+    "kimi-k2-thinking",
+    "kimi-k2-thinking-turbo",
+})

-def _is_kimi_model(model: Optional[str]) -> bool:
-    """True for any Kimi / Moonshot model that manages temperature server-side."""
-    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
-    return bare.startswith("kimi-") or bare == "kimi"
+# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different
+# temperature contract than the Coding Plan endpoint above.  Empirically,
+# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400
+# "invalid temperature: only 1 is allowed for this model" — the Coding Plan
+# lock (0.6 for non-thinking) does not apply.  `kimi-k2-turbo-preview` and the
+# thinking variants already match the Coding Plan contract on the public
+# endpoint, so we only override the models that diverge.
+# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the
+# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see
+# hermes_cli/auth.py:_kimi_base_url_for_key).
+_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = {
+    "kimi-k2.5": 1.0,
+}


 def _fixed_temperature_for_model(
    model: Optional[str],
    base_url: Optional[str] = None,
-) -> "Optional[float] | object":
-    """Return a temperature directive for models with strict contracts.
+) -> Optional[float]:
+    """Return a required temperature override for models with strict contracts.

-    Returns:
-        ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
-            provider chooses its own default.  Used for all Kimi / Moonshot
-            models whose gateway selects temperature server-side.
-        ``float`` — a specific value the caller must use (reserved for future
-            models with fixed-temperature contracts).
-        ``None`` — no override; caller should use its own default.
+    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
+    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
+    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
+    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+
+    When ``base_url`` points to Moonshot's public chat endpoint
+    (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public
+    API only accepts ``temperature=1``, not 0.6.  That override takes precedence
+    over the Coding Plan defaults above.
+
+    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
+    which is the separate non-coding K2 family with variable temperature.
    """
-    if _is_kimi_model(model):
-        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
-        return OMIT_TEMPERATURE
+    normalized = (model or "").strip().lower()
+    bare = normalized.rsplit("/", 1)[-1]
+
+    # Public Moonshot API has a stricter contract for some models than the
+    # Coding Plan endpoint — check it first so it wins on conflict.
+    if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()):
+        public = _KIMI_PUBLIC_API_OVERRIDES.get(bare)
+        if public is not None:
+            logger.debug(
+                "Forcing temperature=%s for %r on public Moonshot API", public, model
+            )
+            return public
+
+    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
+    if fixed is not None:
+        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
+        return fixed
+    if bare in _KIMI_THINKING_MODELS:
+        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
+        return 1.0
+    if bare in _KIMI_INSTANT_MODELS:
+        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
+        return 0.6
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -164,16 +207,6 @@ _OR_HEADERS = {
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

-# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
-# referrerUrl and X-Title maps to appName in the gateway's analytics.
-from hermes_agent.cli import __version__ as _HERMES_VERSION
-
-_AI_GATEWAY_HEADERS = {
-    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-    "X-Title": "Hermes Agent",
-    "User-Agent": f"HermesAgent/{_HERMES_VERSION}",
-}
-
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
@@ -185,6 +218,8 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
+_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
+_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -575,7 +610,7 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from hermes_agent.providers.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
+        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response

        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
@@ -607,7 +642,7 @@ class _AnthropicCompletionsAdapter:
        # temperature for models that still accept it. build_anthropic_kwargs
        # additionally strips these keys as a safety net — keep both layers.
        if temperature is not None:
-            from hermes_agent.providers.anthropic_adapter import _forbids_sampling_params
+            from agent.anthropic_adapter import _forbids_sampling_params
            if not _forbids_sampling_params(model):
                anthropic_kwargs["temperature"] = temperature

@@ -729,33 +764,6 @@ def _nous_base_url() -> str:
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)


-def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
-    """Return fresh Nous runtime credentials when available.
-
-    This mirrors the main agent's 401 recovery path and keeps auxiliary
-    clients aligned with the singleton auth store + mint flow instead of
-    relying only on whatever raw tokens happen to be sitting in auth.json
-    or the credential pool.
-    """
-    try:
-        from hermes_agent.cli.auth.auth import resolve_nous_runtime_credentials
-
-        creds = resolve_nous_runtime_credentials(
-            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
-            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-            force_mint=force_refresh,
-        )
-    except Exception as exc:
-        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
-        return None
-
-    api_key = str(creds.get("api_key") or "").strip()
-    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
-    if not api_key or not base_url:
-        return None
-    return api_key, base_url
-
-
 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store.

@@ -772,7 +780,7 @@ def _read_codex_access_token() -> Optional[str]:
            return token

    try:
-        from hermes_agent.cli.auth.auth import _read_codex_tokens
+        from hermes_cli.auth import _read_codex_tokens
        data = _read_codex_tokens()
        tokens = data.get("tokens", {})
        access_token = tokens.get("access_token")
@@ -799,18 +807,14 @@ def _read_codex_access_token() -> Optional[str]:
        return None


-# TODO(refactor): This function has messy types and duplicated logic (pool vs direct creds).
-#     Ideal fix: (1) define an AuxiliaryClient Protocol both OpenAI/GeminiNativeClient satisfy,
-#     (2) return a NamedTuple or dataclass instead of raw tuple, (3) extract the repeated
-#     Gemini/Kimi/Copilot client-building into a helper.
-def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeClient"]], Optional[str]]:
+def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Try each API-key provider in PROVIDER_REGISTRY order.

    Returns (client, model) for the first provider with usable runtime
    credentials, or (None, None) if none are configured.
    """
    try:
-        from hermes_agent.cli.auth.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
+        from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
    except ImportError:
        logger.debug("Could not import PROVIDER_REGISTRY for API-key fallback")
        return None, None
@@ -823,7 +827,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli
            # Without this gate, Claude Code credentials get silently used
            # as auxiliary fallback when the user's primary provider fails.
            try:
-                from hermes_agent.cli.auth.auth import is_provider_explicitly_configured
+                from hermes_cli.auth import is_provider_explicitly_configured
                if not is_provider_explicitly_configured("anthropic"):
                    continue
            except ImportError:
@@ -844,15 +848,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli
                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
            if provider_id == "gemini":
-                from hermes_agent.providers.gemini_adapter import GeminiNativeClient, is_native_gemini_base_url
+                from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url

                if is_native_gemini_base_url(base_url):
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
            extra = {}
-            if base_url_host_matches(base_url, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-                from hermes_agent.cli.models.models import copilot_default_headers
+            if "api.kimi.com" in base_url.lower():
+                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            elif "api.githubcopilot.com" in base_url.lower():
+                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
            return OpenAI(api_key=api_key, base_url=base_url, **extra), model
@@ -870,15 +874,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli
            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
        if provider_id == "gemini":
-            from hermes_agent.providers.gemini_adapter import GeminiNativeClient, is_native_gemini_base_url
+            from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url

            if is_native_gemini_base_url(base_url):
                return GeminiNativeClient(api_key=api_key, base_url=base_url), model
        extra = {}
-        if base_url_host_matches(base_url, "api.kimi.com"):
-            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_agent.cli.models.models import copilot_default_headers
+        if "api.kimi.com" in base_url.lower():
+            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+        elif "api.githubcopilot.com" in base_url.lower():
+            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
        return OpenAI(api_key=api_key, base_url=base_url, **extra), model
@@ -914,7 +918,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    # if another session already recorded a 429, skip Nous entirely
    # to avoid piling more requests onto the tapped RPH bucket.
    try:
-        from hermes_agent.providers.nous_rate_guard import nous_rate_limit_remaining
+        from agent.nous_rate_guard import nous_rate_limit_remaining
        _remaining = nous_rate_limit_remaining()
        if _remaining is not None and _remaining > 0:
            logger.debug(
@@ -926,50 +930,29 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
        pass

    nous = _read_nous_auth()
-    runtime = _resolve_nous_runtime_api(force_refresh=False)
-    if runtime is None and not nous:
+    if not nous:
        return None, None
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-
-    # Ask the Portal which model it currently recommends for this task type.
-    # The /api/nous/recommended-models endpoint is the authoritative source:
-    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
-    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
-    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
-    # or returns a null recommendation for this task type.
-    model = _NOUS_MODEL
-    try:
-        from hermes_agent.cli.models.models import get_nous_recommended_aux_model
-        recommended = get_nous_recommended_aux_model(vision=vision)
-        if recommended:
-            model = recommended
-            logger.debug(
-                "Auxiliary/%s: using Portal-recommended model %s",
-                "vision" if vision else "text", model,
-            )
-        else:
-            logger.debug(
-                "Auxiliary/%s: no Portal recommendation, falling back to %s",
-                "vision" if vision else "text", model,
-            )
-    except Exception as exc:
-        logger.debug(
-            "Auxiliary/%s: recommended-models lookup failed (%s); "
-            "falling back to %s",
-            "vision" if vision else "text", exc, model,
-        )
-
-    if runtime is not None:
-        api_key, base_url = runtime
+    if nous.get("source") == "pool":
+        model = "gemini-3-flash"
    else:
-        api_key = _nous_api_key(nous or {})
-        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
+        model = _NOUS_MODEL
+    # Free-tier users can't use paid auxiliary models — use the free
+    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+    try:
+        from hermes_cli.models import check_nous_free_tier
+        if check_nous_free_tier():
+            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
+            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
+                         model, "vision" if vision else "text")
+    except Exception:
+        pass
    return (
        OpenAI(
-            api_key=api_key,
-            base_url=base_url,
+            api_key=_nous_api_key(nous),
+            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
        ),
        model,
    )
@@ -982,7 +965,7 @@ def _read_main_model() -> str:
    model. Environment variables are no longer consulted.
    """
    try:
-        from hermes_agent.cli.config import load_config
+        from hermes_cli.config import load_config
        cfg = load_config()
        model_cfg = cfg.get("model", {})
        if isinstance(model_cfg, str) and model_cfg.strip():
@@ -1003,7 +986,7 @@ def _read_main_provider() -> str:
    if not configured.
    """
    try:
-        from hermes_agent.cli.config import load_config
+        from hermes_cli.config import load_config
        cfg = load_config()
        model_cfg = cfg.get("model", {})
        if isinstance(model_cfg, dict):
@@ -1023,7 +1006,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
    environment.
    """
    try:
-        from hermes_agent.cli.runtime_provider import resolve_runtime_provider
+        from hermes_cli.runtime_provider import resolve_runtime_provider

        runtime = resolve_runtime_provider(requested="custom")
    except Exception as exc:
@@ -1047,7 +1030,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
        return None, None, None

    custom_base = custom_base.strip().rstrip("/")
-    if base_url_host_matches(custom_base, "openrouter.ai"):
+    if "openrouter.ai" in custom_base.lower():
        # requested='custom' falls back to OpenRouter when no custom endpoint is
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
        return None, None, None
@@ -1081,8 +1064,6 @@ def _validate_proxy_env_urls() -> None:
    """
    from urllib.parse import urlparse

-    normalize_proxy_env_vars()
-
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = str(os.environ.get(key) or "").strip()
@@ -1138,7 +1119,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
        # LiteLLM proxies, etc.).  Must NEVER be treated as OAuth —
        # Anthropic OAuth claims only apply to api.anthropic.com.
        try:
-            from hermes_agent.providers.anthropic_adapter import build_anthropic_client
+            from agent.anthropic_adapter import build_anthropic_client
            real_client = build_anthropic_client(custom_key, custom_base)
        except ImportError:
            logger.warning(
@@ -1180,7 +1161,7 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:

 def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    try:
-        from hermes_agent.providers.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
+        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
    except ImportError:
        return None, None

@@ -1200,7 +1181,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
    base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
    try:
-        from hermes_agent.cli.config import load_config
+        from hermes_cli.config import load_config
        cfg = load_config()
        model_cfg = cfg.get("model")
        if isinstance(model_cfg, dict):
@@ -1212,7 +1193,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    except Exception:
        pass

-    from hermes_agent.providers.anthropic_adapter import _is_oauth_token
+    from agent.anthropic_adapter import _is_oauth_token
    is_oauth = _is_oauth_token(token)
    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
@@ -1313,15 +1294,6 @@ def _is_connection_error(exc: Exception) -> bool:
    return False


-def _is_auth_error(exc: Exception) -> bool:
-    """Detect auth failures that should trigger provider-specific refresh."""
-    status = getattr(exc, "status_code", None)
-    if status == 401:
-        return True
-    err_lower = str(exc).lower()
-    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
-
-
 def _try_payment_fallback(
    failed_provider: str,
    task: str = None,
@@ -1480,14 +1452,14 @@ def _to_async_client(sync_client, model: str):
    if isinstance(sync_client, AnthropicAuxiliaryClient):
        return AsyncAnthropicAuxiliaryClient(sync_client), model
    try:
-        from hermes_agent.providers.gemini_adapter import GeminiNativeClient, AsyncGeminiNativeClient
+        from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient

        if isinstance(sync_client, GeminiNativeClient):
            return AsyncGeminiNativeClient(sync_client), model
    except ImportError:
        pass
    try:
-        from hermes_agent.agent.copilot_acp_client import CopilotACPClient
+        from agent.copilot_acp_client import CopilotACPClient
        if isinstance(sync_client, CopilotACPClient):
            return sync_client, model
    except ImportError:
@@ -1497,15 +1469,15 @@ def _to_async_client(sync_client, model: str):
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
-    sync_base_url = str(sync_client.base_url)
-    if base_url_host_matches(sync_base_url, "openrouter.ai"):
+    base_lower = str(sync_client.base_url).lower()
+    if "openrouter" in base_lower:
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
-    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
-        from hermes_agent.cli.models.models import copilot_default_headers
+    elif "api.githubcopilot.com" in base_lower:
+        from hermes_cli.models import copilot_default_headers

        async_kwargs["default_headers"] = copilot_default_headers()
-    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
-        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+    elif "api.kimi.com" in base_lower:
+        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
    return AsyncOpenAI(**async_kwargs), model


@@ -1514,7 +1486,7 @@ def _normalize_resolved_model(model_name: Optional[str], provider: str) -> Optio
    if not model_name:
        return model_name
    try:
-        from hermes_agent.cli.models.normalize import normalize_model_for_provider
+        from hermes_cli.model_normalize import normalize_model_for_provider

        return normalize_model_for_provider(model_name, provider)
    except Exception:
@@ -1581,7 +1553,8 @@ def resolve_provider_client(
        # Auto-detect: api.openai.com + codex model name pattern
        if api_mode and api_mode != "codex_responses":
            return False  # explicit non-codex mode
-        if base_url_hostname(base_url_str) == "api.openai.com":
+        normalized_base = (base_url_str or "").strip().lower()
+        if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
            model_lower = (model_str or "").lower()
            if "codex" in model_lower:
                return True
@@ -1629,13 +1602,7 @@ def resolve_provider_client(

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
    if provider == "nous":
-        # Detect vision tasks: either explicit model override from
-        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
-        _is_vision = (
-            model in _PROVIDER_VISION_MODELS.values()
-            or (model or "").strip().lower() == "mimo-v2-omni"
-        )
-        client, default = _try_nous(vision=_is_vision)
+        client, default = _try_nous()
        if client is None:
            logger.warning("resolve_provider_client: nous requested "
                           "but Nous Portal not configured (run: hermes auth)")
@@ -1691,10 +1658,10 @@ def resolve_provider_client(
                provider,
            )
            extra = {}
-            if base_url_host_matches(custom_base, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
-                from hermes_agent.cli.models.models import copilot_default_headers
+            if "api.kimi.com" in custom_base.lower():
+                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            elif "api.githubcopilot.com" in custom_base.lower():
+                from hermes_cli.models import copilot_default_headers
                extra["default_headers"] = copilot_default_headers()
            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
            client = _wrap_if_needed(client, final_model, custom_base)
@@ -1716,7 +1683,7 @@ def resolve_provider_client(

    # ── Named custom providers (config.yaml custom_providers list) ───
    try:
-        from hermes_agent.cli.runtime_provider import _get_named_custom_provider
+        from hermes_cli.runtime_provider import _get_named_custom_provider
        custom_entry = _get_named_custom_provider(provider)
        if custom_entry:
            custom_base = custom_entry.get("base_url", "").strip()
@@ -1746,13 +1713,13 @@ def resolve_provider_client(

    # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
    try:
-        from hermes_agent.cli.auth.auth import (
+        from hermes_cli.auth import (
            PROVIDER_REGISTRY,
            resolve_api_key_provider_credentials,
            resolve_external_process_provider_credentials,
        )
    except ImportError:
-        logger.debug("hermes_agent.cli.auth not available for provider %s", provider)
+        logger.debug("hermes_cli.auth not available for provider %s", provider)
        return None, None

    pconfig = PROVIDER_REGISTRY.get(provider)
@@ -1788,7 +1755,7 @@ def resolve_provider_client(
        final_model = _normalize_resolved_model(model or default_model, provider)

        if provider == "gemini":
-            from hermes_agent.providers.gemini_adapter import GeminiNativeClient, is_native_gemini_base_url
+            from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url

            if is_native_gemini_base_url(base_url):
                client = GeminiNativeClient(api_key=api_key, base_url=base_url)
@@ -1798,10 +1765,10 @@ def resolve_provider_client(

        # Provider-specific headers
        headers = {}
-        if base_url_host_matches(base_url, "api.kimi.com"):
-            headers["User-Agent"] = "claude-code/0.1.0"
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_agent.cli.models.models import copilot_default_headers
+        if "api.kimi.com" in base_url.lower():
+            headers["User-Agent"] = "KimiCLI/1.30.0"
+        elif "api.githubcopilot.com" in base_url.lower():
+            from hermes_cli.models import copilot_default_headers

            headers.update(copilot_default_headers())
        client = OpenAI(api_key=api_key, base_url=base_url,
@@ -1813,7 +1780,7 @@ def resolve_provider_client(
        # routes through responses.stream().
        if provider == "copilot" and final_model and not raw_codex:
            try:
-                from hermes_agent.cli.models.models import _should_use_copilot_responses_api
+                from hermes_cli.models import _should_use_copilot_responses_api
                if _should_use_copilot_responses_api(final_model):
                    logger.debug(
                        "resolve_provider_client: copilot model %s needs "
@@ -1852,7 +1819,7 @@ def resolve_provider_client(
                    "process credentials are incomplete"
                )
                return None, None
-            from hermes_agent.agent.copilot_acp_client import CopilotACPClient
+            from agent.copilot_acp_client import CopilotACPClient

            client = CopilotACPClient(
                api_key=api_key,
@@ -2031,35 +1998,24 @@ def resolve_vision_provider_client(
        #      _PROVIDER_VISION_MODELS provides per-provider vision model
        #      overrides when the provider has a dedicated multimodal model
        #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
-        #      strict vision backend with tier-aware defaults, so it must not
-        #      fall through to the user's text chat model here.
+        #      zai → glm-5v-turbo).
        #   2. OpenRouter  (vision-capable aggregator fallback)
        #   3. Nous Portal (vision-capable aggregator fallback)
        #   4. Stop
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            if main_provider == "nous":
-                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
-                if sync_client is not None:
-                    logger.info(
-                        "Vision auto-detect: using main provider %s (%s)",
-                        main_provider, default_model or resolved_model or main_model,
-                    )
-                    return _finalize(main_provider, sync_client, default_model)
-            else:
-                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-                rpc_client, rpc_model = resolve_provider_client(
-                    main_provider, vision_model,
-                    api_mode=resolved_api_mode)
-                if rpc_client is not None:
-                    logger.info(
-                        "Vision auto-detect: using main provider %s (%s)",
-                        main_provider, rpc_model or vision_model,
-                    )
-                    return _finalize(
-                        main_provider, rpc_client, rpc_model or vision_model)
+            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+            rpc_client, rpc_model = resolve_provider_client(
+                main_provider, vision_model,
+                api_mode=resolved_api_mode)
+            if rpc_client is not None:
+                logger.info(
+                    "Vision auto-detect: using main provider %s (%s)",
+                    main_provider, rpc_model or vision_model,
+                )
+                return _finalize(
+                    main_provider, rpc_client, rpc_model or vision_model)

        # Fall back through aggregators (uses their dedicated vision model,
        # not the user's main model) when main provider has no client.
@@ -2106,7 +2062,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    # Only use max_completion_tokens for direct OpenAI custom endpoints
    if (not or_key
            and _read_nous_auth() is None
-            and base_url_hostname(custom_base) == "api.openai.com"):
+            and "api.openai.com" in custom_base.lower()):
        return {"max_completion_tokens": value}
    return {"max_tokens": value}

@@ -2134,76 +2090,6 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded


-def _client_cache_key(
-    provider: str,
-    *,
-    async_mode: bool,
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    api_mode: Optional[str] = None,
-    main_runtime: Optional[Dict[str, Any]] = None,
-) -> tuple:
-    runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
-
-
-def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
-    with _client_cache_lock:
-        old_entry = _client_cache.get(cache_key)
-        if old_entry is not None and old_entry[0] is not client:
-            _force_close_async_httpx(old_entry[0])
-            try:
-                close_fn = getattr(old_entry[0], "close", None)
-                if callable(close_fn):
-                    close_fn()
-            except Exception:
-                pass
-        _client_cache[cache_key] = (client, default_model, bound_loop)
-
-
-def _refresh_nous_auxiliary_client(
-    *,
-    cache_provider: str,
-    model: Optional[str],
-    async_mode: bool,
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    api_mode: Optional[str] = None,
-    main_runtime: Optional[Dict[str, Any]] = None,
-) -> Tuple[Optional[Any], Optional[str]]:
-    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
-    runtime = _resolve_nous_runtime_api(force_refresh=True)
-    if runtime is None:
-        return None, model
-
-    fresh_key, fresh_base_url = runtime
-    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
-    final_model = model
-
-    current_loop = None
-    if async_mode:
-        try:
-            import asyncio as _aio
-            current_loop = _aio.get_event_loop()
-        except RuntimeError:
-            pass
-        client, final_model = _to_async_client(sync_client, final_model or "")
-    else:
-        client = sync_client
-
-    cache_key = _client_cache_key(
-        cache_provider,
-        async_mode=async_mode,
-        base_url=base_url,
-        api_key=api_key,
-        api_mode=api_mode,
-        main_runtime=main_runtime,
-    )
-    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
-    return client, final_model
-
-
 def neuter_async_httpx_del() -> None:
    """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.

@@ -2305,7 +2191,7 @@ def cleanup_stale_async_clients() -> None:

 def _is_openrouter_client(client: Any) -> bool:
    for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
-        if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
+        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
            return True
    return False

@@ -2357,14 +2243,8 @@ def _get_cached_client(
        except RuntimeError:
            pass
    runtime = _normalize_main_runtime(main_runtime)
-    cache_key = _client_cache_key(
-        provider,
-        async_mode=async_mode,
-        base_url=base_url,
-        api_key=api_key,
-        api_mode=api_mode,
-        main_runtime=main_runtime,
-    )
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2475,7 +2355,7 @@ def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
    if not task:
        return {}
    try:
-        from hermes_agent.cli.config import load_config
+        from hermes_cli.config import load_config
        config = load_config()
    except ImportError:
        return {}
@@ -2595,9 +2475,7 @@ def _build_call_kwargs(
    }

    fixed_temperature = _fixed_temperature_for_model(model, base_url)
-    if fixed_temperature is OMIT_TEMPERATURE:
-        temperature = None  # strip — let server choose
-    elif fixed_temperature is not None:
+    if fixed_temperature is not None:
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2605,7 +2483,7 @@ def _build_call_kwargs(
    # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
    # the aux model is flipped to 4.7.
    if temperature is not None:
-        from hermes_agent.providers.anthropic_adapter import _forbids_sampling_params
+        from agent.anthropic_adapter import _forbids_sampling_params
        if _forbids_sampling_params(model):
            temperature = None

@@ -2617,7 +2495,7 @@ def _build_call_kwargs(
        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
        if provider == "custom":
            custom_base = base_url or _current_custom_base_url()
-            if base_url_hostname(custom_base) == "api.openai.com":
+            if "api.openai.com" in custom_base.lower():
                kwargs["max_completion_tokens"] = max_tokens
            else:
                kwargs["max_tokens"] = max_tokens
@@ -2812,29 +2690,6 @@ def call_llm(
                    raise
                first_err = retry_err

-        # ── Nous auth refresh parity with main agent ──────────────────
-        client_is_nous = (
-            resolved_provider == "nous"
-            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
-        )
-        if _is_auth_error(first_err) and client_is_nous:
-            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
-                cache_provider=resolved_provider or "nous",
-                model=final_model,
-                async_mode=False,
-                base_url=resolved_base_url,
-                api_key=resolved_api_key,
-                api_mode=resolved_api_mode,
-                main_runtime=main_runtime,
-            )
-            if refreshed_client is not None:
-                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
-                            task or "call")
-                if refreshed_model and refreshed_model != kwargs.get("model"):
-                    kwargs["model"] = refreshed_model
-                return _validate_llm_response(
-                    refreshed_client.chat.completions.create(**kwargs), task)
-
        # ── Payment / credit exhaustion fallback ──────────────────────
        # When the resolved provider returns 402 or a credit-related error,
        # try alternative providers instead of giving up.  This handles the
@@ -3033,28 +2888,6 @@ async def async_call_llm(
                    raise
                first_err = retry_err

-        # ── Nous auth refresh parity with main agent ──────────────────
-        client_is_nous = (
-            resolved_provider == "nous"
-            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
-        )
-        if _is_auth_error(first_err) and client_is_nous:
-            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
-                cache_provider=resolved_provider or "nous",
-                model=final_model,
-                async_mode=True,
-                base_url=resolved_base_url,
-                api_key=resolved_api_key,
-                api_mode=resolved_api_mode,
-            )
-            if refreshed_client is not None:
-                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
-                            task or "call")
-                if refreshed_model and refreshed_model != kwargs.get("model"):
-                    kwargs["model"] = refreshed_model
-                return _validate_llm_response(
-                    await refreshed_client.chat.completions.create(**kwargs), task)
-
        # ── Payment / connection fallback (mirrors sync call_llm) ─────
        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        is_auto = resolved_provider in ("auto", "", None)
--- a/hermes_agent/providers/bedrock_adapter.py
+++ b/hermes_agent/providers/bedrock_adapter.py
--- a/hermes_agent/providers/codex_adapter.py
+++ b/hermes_agent/providers/codex_adapter.py
@@ -18,102 +18,10 @@ import uuid
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional

-from hermes_agent.agent.prompt_builder import DEFAULT_AGENT_IDENTITY
+from agent.prompt_builder import DEFAULT_AGENT_IDENTITY

 logger = logging.getLogger(__name__)

-
-# ---------------------------------------------------------------------------
-# Multimodal content helpers
-# ---------------------------------------------------------------------------
-
-def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
-    """Convert chat-style multimodal content to Responses API input parts.
-
-    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
-
-    Returns an empty list when ``content`` is not a list or contains no
-    recognized parts — callers fall back to the string path.
-    """
-    if not isinstance(content, list):
-        return []
-    converted: List[Dict[str, Any]] = []
-    for part in content:
-        if isinstance(part, str):
-            if part:
-                converted.append({"type": "input_text", "text": part})
-            continue
-        if not isinstance(part, dict):
-            continue
-        ptype = str(part.get("type") or "").strip().lower()
-        if ptype in {"text", "input_text", "output_text"}:
-            text = part.get("text")
-            if isinstance(text, str) and text:
-                converted.append({"type": "input_text", "text": text})
-            continue
-        if ptype in {"image_url", "input_image"}:
-            image_ref = part.get("image_url")
-            detail = part.get("detail")
-            if isinstance(image_ref, dict):
-                url = image_ref.get("url")
-                detail = image_ref.get("detail", detail)
-            else:
-                url = image_ref
-            if not isinstance(url, str) or not url:
-                continue
-            image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
-            if isinstance(detail, str) and detail.strip():
-                image_part["detail"] = detail.strip()
-            converted.append(image_part)
-    return converted
-
-
-def _summarize_user_message_for_log(content: Any) -> str:
-    """Return a short text summary of a user message for logging/trajectory.
-
-    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
-    parts from the API server.  Logging, spinner previews, and trajectory
-    files all want a plain string — this helper extracts the first chunk of
-    text and notes any attached images.  Returns an empty string for empty
-    lists and ``str(content)`` for unexpected scalar types.
-    """
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        text_bits: List[str] = []
-        image_count = 0
-        for part in content:
-            if isinstance(part, str):
-                if part:
-                    text_bits.append(part)
-                continue
-            if not isinstance(part, dict):
-                continue
-            ptype = str(part.get("type") or "").strip().lower()
-            if ptype in {"text", "input_text", "output_text"}:
-                text = part.get("text")
-                if isinstance(text, str) and text:
-                    text_bits.append(text)
-            elif ptype in {"image_url", "input_image"}:
-                image_count += 1
-        summary = " ".join(text_bits).strip()
-        if image_count:
-            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
-            summary = f"{note} {summary}" if summary else note
-        return summary
-    try:
-        return str(content)
-    except Exception:
-        return ""
-
-
-# ---------------------------------------------------------------------------
-# ID helpers
-# ---------------------------------------------------------------------------
-
 def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
    """Generate a deterministic call_id from tool call content.

@@ -172,17 +80,14 @@ def _derive_responses_function_call_id(
    return f"fc_{digest}"


-# ---------------------------------------------------------------------------
-# Schema conversion
-# ---------------------------------------------------------------------------
-
 def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
    """Convert chat-completions tool schemas to Responses function-tool schemas."""
-    if not tools:
+    source_tools = tools
+    if not source_tools:
        return None

    converted: List[Dict[str, Any]] = []
-    for item in tools:
+    for item in source_tools:
        fn = item.get("function", {}) if isinstance(item, dict) else {}
        name = fn.get("name")
        if not isinstance(name, str) or not name.strip():
@@ -197,10 +102,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
    return converted or None


-# ---------------------------------------------------------------------------
-# Message format conversion
-# ---------------------------------------------------------------------------
-
 def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items."""
    items: List[Dict[str, Any]] = []
@@ -215,14 +116,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di

        if role in {"user", "assistant"}:
            content = msg.get("content", "")
-            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content)
-                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
-                )
-            else:
-                content_parts = []
-                content_text = str(content) if content is not None else ""
+            content_text = str(content) if content is not None else ""

            if role == "assistant":
                # Replay encrypted reasoning items from previous turns
@@ -245,9 +139,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                                seen_item_ids.add(item_id)
                            has_codex_reasoning = True

-                if content_parts:
-                    items.append({"role": "assistant", "content": content_parts})
-                elif content_text.strip():
+                if content_text.strip():
                    items.append({"role": "assistant", "content": content_text})
                elif has_codex_reasoning:
                    # The Responses API requires a following item after each
@@ -300,12 +192,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                        })
                continue

-            # Non-assistant (user) role: emit multimodal parts when present,
-            # otherwise fall back to the text payload.
-            if content_parts:
-                items.append({"role": role, "content": content_parts})
-            else:
-                items.append({"role": role, "content": content_text})
+            items.append({"role": role, "content": content_text})
            continue

        if role == "tool":
@@ -325,10 +212,6 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
    return items


-# ---------------------------------------------------------------------------
-# Input preflight / validation
-# ---------------------------------------------------------------------------
-
 def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
    if not isinstance(raw_items, list):
        raise ValueError("Codex Responses input must be a list of input items.")
@@ -410,46 +293,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
            content = item.get("content", "")
            if content is None:
                content = ""
-            if isinstance(content, list):
-                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``input_image``).
-                # Validate each part and pass through.
-                validated: List[Dict[str, Any]] = []
-                for part_idx, part in enumerate(content):
-                    if isinstance(part, str):
-                        if part:
-                            validated.append({"type": "input_text", "text": part})
-                        continue
-                    if not isinstance(part, dict):
-                        raise ValueError(
-                            f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
-                        )
-                    ptype = str(part.get("type") or "").strip().lower()
-                    if ptype in {"input_text", "text", "output_text"}:
-                        text = part.get("text", "")
-                        if not isinstance(text, str):
-                            text = str(text or "")
-                        validated.append({"type": "input_text", "text": text})
-                    elif ptype in {"input_image", "image_url"}:
-                        image_ref = part.get("image_url", "")
-                        detail = part.get("detail")
-                        if isinstance(image_ref, dict):
-                            url = image_ref.get("url", "")
-                            detail = image_ref.get("detail", detail)
-                        else:
-                            url = image_ref
-                        if not isinstance(url, str):
-                            url = str(url or "")
-                        image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
-                        if isinstance(detail, str) and detail.strip():
-                            image_part["detail"] = detail.strip()
-                        validated.append(image_part)
-                    else:
-                        raise ValueError(
-                            f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
-                        )
-                normalized.append({"role": role, "content": validated})
-                continue
            if not isinstance(content, str):
                content = str(content)

@@ -606,10 +449,6 @@ def _preflight_codex_api_kwargs(
    return normalized


-# ---------------------------------------------------------------------------
-# Response extraction helpers
-# ---------------------------------------------------------------------------
-
 def _extract_responses_message_text(item: Any) -> str:
    """Extract assistant text from a Responses message output item."""
    content = getattr(item, "content", None)
@@ -644,10 +483,6 @@ def _extract_responses_reasoning_text(item: Any) -> str:
    return ""


-# ---------------------------------------------------------------------------
-# Full response normalization
-# ---------------------------------------------------------------------------
-
 def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    """Normalize a Responses API object to an assistant_message-like object."""
    output = getattr(response, "output", None)
@@ -811,3 +646,5 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    else:
        finish_reason = "stop"
    return assistant_message, finish_reason
+
+
--- a/hermes_agent/agent/context/compressor.py
+++ b/hermes_agent/agent/context/compressor.py
@@ -24,14 +24,13 @@ import re
 import time
 from typing import Any, Dict, List, Optional

-from hermes_agent.providers.auxiliary import call_llm
-from hermes_agent.agent.context.engine import ContextEngine
-from hermes_agent.providers.metadata import (
+from agent.auxiliary_client import call_llm
+from agent.context_engine import ContextEngine
+from agent.model_metadata import (
    MINIMUM_CONTEXT_LENGTH,
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
-from hermes_agent.agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

@@ -551,15 +550,11 @@ class ContextCompressor(ContextEngine):
        Includes tool call arguments and result content (up to
        ``_CONTENT_MAX`` chars per message) so the summarizer can preserve
        specific details like file paths, commands, and outputs.
-
-        All content is redacted before serialization to prevent secrets
-        (API keys, tokens, passwords) from leaking into the summary that
-        gets sent to the auxiliary model and persisted across compactions.
        """
        parts = []
        for msg in turns:
            role = msg.get("role", "unknown")
-            content = redact_sensitive_text(msg.get("content") or "")
+            content = msg.get("content") or ""

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -580,7 +575,7 @@ class ContextCompressor(ContextEngine):
                        if isinstance(tc, dict):
                            fn = tc.get("function", {})
                            name = fn.get("name", "?")
-                            args = redact_sensitive_text(fn.get("arguments", ""))
+                            args = fn.get("arguments", "")
                            # Truncate long arguments but keep enough for context
                            if len(args) > self._TOOL_ARGS_MAX:
                                args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -640,11 +635,7 @@ class ContextCompressor(ContextEngine):
            "only output the structured summary. "
            "Do NOT include any preamble, greeting, or prefix. "
            "Write the summary in the same language the user was using in the "
-            "conversation — do not translate or switch to English. "
-            "NEVER include API keys, tokens, passwords, secrets, credentials, "
-            "or connection strings in the summary — replace any that appear "
-            "with [REDACTED]. Note that the user had credentials present, but "
-            "do not preserve their values."
+            "conversation — do not translate or switch to English."
        )

        # Shared structured template (used by both paths).
@@ -701,7 +692,7 @@ Be specific with file paths, commands, line numbers, and results.]
 [What remains to be done — framed as context, not instructions]

 ## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
+[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]

 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

@@ -741,7 +732,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""

        try:
            call_kwargs = {
@@ -764,9 +755,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Handle cases where content is not a string (e.g., dict from llama.cpp)
            if not isinstance(content, str):
                content = str(content) if content else ""
-            # Redact the summary output as well — the summarizer LLM may
-            # ignore prompt instructions and echo back secrets verbatim.
-            summary = redact_sensitive_text(content.strip())
+            summary = content.strip()
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
@@ -807,7 +796,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(turns_to_summarize)  # retry immediately
+                return self._generate_summary(messages, summary_budget)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
--- a/hermes_agent/agent/context/engine.py
+++ b/hermes_agent/agent/context/engine.py
--- a/hermes_agent/agent/context/references.py
+++ b/hermes_agent/agent/context/references.py
@@ -11,7 +11,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Awaitable, Callable

-from hermes_agent.providers.metadata import estimate_tokens_rough
+from agent.model_metadata import estimate_tokens_rough

 _QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
 REFERENCE_PATTERN = re.compile(
@@ -315,7 +315,7 @@ async def _fetch_url_content(


 async def _default_url_fetcher(url: str) -> str:
-    from hermes_agent.tools.web import web_extract_tool
+    from tools.web_tools import web_extract_tool

    raw = await web_extract_tool([url], format="markdown", use_llm_processing=True)
    payload = json.loads(raw)
@@ -340,7 +340,7 @@ def _resolve_path(cwd: Path, target: str, *, allowed_root: Path | None = None) -


 def _ensure_reference_path_allowed(path: Path) -> None:
-    from hermes_agent.constants import get_hermes_home
+    from hermes_constants import get_hermes_home
    home = Path(os.path.expanduser("~")).resolve()
    hermes_home = get_hermes_home().resolve()

--- a/hermes_agent/agent/copilot_acp_client.py
+++ b/hermes_agent/agent/copilot_acp_client.py
@@ -21,9 +21,6 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any

-from hermes_agent.agent.file_safety import get_read_block_error, is_write_denied
-from hermes_agent.agent.redact import redact_sensitive_text
-
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0

@@ -57,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    }


-def _permission_denied(message_id: Any) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "result": {
-            "outcome": {
-                "outcome": "cancelled",
-            }
-        },
-    }
-
-
 def _format_messages_as_prompt(
    messages: list[dict[str, Any]],
    model: str | None = None,
@@ -401,8 +386,6 @@ class CopilotACPClient:
        stderr_tail: deque[str] = deque(maxlen=40)

        def _stdout_reader() -> None:
-            if proc.stdout is None:
-                return
            for line in proc.stdout:
                try:
                    inbox.put(json.loads(line))
@@ -550,13 +533,18 @@ class CopilotACPClient:
        params = msg.get("params") or {}

        if method == "session/request_permission":
-            response = _permission_denied(message_id)
+            response = {
+                "jsonrpc": "2.0",
+                "id": message_id,
+                "result": {
+                    "outcome": {
+                        "outcome": "allow_once",
+                    }
+                },
+            }
        elif method == "fs/read_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                block_error = get_read_block_error(str(path))
-                if block_error:
-                    raise PermissionError(block_error)
                content = path.read_text() if path.exists() else ""
                line = params.get("line")
                limit = params.get("limit")
@@ -565,8 +553,6 @@ class CopilotACPClient:
                    start = line - 1
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
-                if content:
-                    content = redact_sensitive_text(content)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -579,10 +565,6 @@ class CopilotACPClient:
        elif method == "fs/write_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                if is_write_denied(str(path)):
-                    raise PermissionError(
-                        f"Write denied: '{path}' is a protected system/credential file."
-                    )
                path.parent.mkdir(parents=True, exist_ok=True)
                path.write_text(str(params.get("content") or ""))
                response = {
--- a/hermes_agent/providers/credential_pool.py
+++ b/hermes_agent/providers/credential_pool.py
@@ -13,9 +13,9 @@ from dataclasses import dataclass, fields, replace
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

-from hermes_agent.constants import OPENROUTER_BASE_URL
-import hermes_agent.cli.auth.auth as auth_mod
-from hermes_agent.cli.auth.auth import (
+from hermes_constants import OPENROUTER_BASE_URL
+import hermes_cli.auth as auth_mod
+from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
    DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
    PROVIDER_REGISTRY,
@@ -29,7 +29,6 @@ from hermes_agent.cli.auth.auth import (
    _save_auth_store,
    _save_provider_state,
    read_credential_pool,
-    read_provider_credentials,
    write_credential_pool,
 )

@@ -39,7 +38,7 @@ logger = logging.getLogger(__name__)
 def _load_config_safe() -> Optional[dict]:
    """Load config.yaml, returning None on any error."""
    try:
-        from hermes_agent.cli.config import load_config
+        from hermes_cli.config import load_config

        return load_config()
    except Exception:
@@ -289,7 +288,7 @@ def _iter_custom_providers(config: Optional[dict] = None):
    if not isinstance(custom_providers, list):
        # Fall back to the v12+ providers dict via the compatibility layer
        try:
-            from hermes_agent.cli.config import get_compatible_custom_providers
+            from hermes_cli.config import get_compatible_custom_providers

            custom_providers = get_compatible_custom_providers(config)
        except Exception:
@@ -322,7 +321,7 @@ def get_custom_provider_pool_key(base_url: str) -> Optional[str]:

 def list_custom_pool_providers() -> List[str]:
    """Return all 'custom:*' pool keys that have entries in auth.json."""
-    pool_data = read_credential_pool()
+    pool_data = read_credential_pool(None)
    return sorted(
        key for key in pool_data
        if key.startswith(CUSTOM_POOL_PREFIX)
@@ -430,7 +429,7 @@ class CredentialPool:
        if self.provider != "anthropic" or entry.source != "claude_code":
            return entry
        try:
-            from hermes_agent.providers.anthropic_adapter import read_claude_code_credentials
+            from agent.anthropic_adapter import read_claude_code_credentials
            creds = read_claude_code_credentials()
            if not creds:
                return entry
@@ -525,7 +524,7 @@ class CredentialPool:

        try:
            if self.provider == "anthropic":
-                from hermes_agent.providers.anthropic_adapter import refresh_anthropic_oauth_pure
+                from agent.anthropic_adapter import refresh_anthropic_oauth_pure

                refreshed = refresh_anthropic_oauth_pure(
                    entry.refresh_token,
@@ -542,7 +541,7 @@ class CredentialPool:
                # see the latest tokens.
                if entry.source == "claude_code":
                    try:
-                        from hermes_agent.providers.anthropic_adapter import _write_claude_code_credentials
+                        from agent.anthropic_adapter import _write_claude_code_credentials
                        _write_claude_code_credentials(
                            refreshed["access_token"],
                            refreshed["refresh_token"],
@@ -604,7 +603,7 @@ class CredentialPool:
                if synced.refresh_token != entry.refresh_token:
                    logger.debug("Retrying refresh with synced token from credentials file")
                    try:
-                        from hermes_agent.providers.anthropic_adapter import refresh_anthropic_oauth_pure
+                        from agent.anthropic_adapter import refresh_anthropic_oauth_pure
                        refreshed = refresh_anthropic_oauth_pure(
                            synced.refresh_token,
                            use_json=synced.source.endswith("hermes_pkce"),
@@ -621,7 +620,7 @@ class CredentialPool:
                        self._replace_entry(synced, updated)
                        self._persist()
                        try:
-                            from hermes_agent.providers.anthropic_adapter import _write_claude_code_credentials
+                            from agent.anthropic_adapter import _write_claude_code_credentials
                            _write_claude_code_credentials(
                                refreshed["access_token"],
                                refreshed["refresh_token"],
@@ -876,20 +875,6 @@ class CredentialPool:
            self._current_id = None
        return removed

-    def remove_entry(self, entry_id: str) -> Optional[PooledCredential]:
-        for idx, entry in enumerate(self._entries):
-            if entry.id == entry_id:
-                removed = self._entries.pop(idx)
-                self._entries = [
-                    replace(e, priority=new_priority)
-                    for new_priority, e in enumerate(self._entries)
-                ]
-                self._persist()
-                if self._current_id == removed.id:
-                    self._current_id = None
-                return removed
-        return None
-
    def resolve_target(self, target: Any) -> Tuple[Optional[int], Optional[PooledCredential], Optional[str]]:
        raw = str(target or "").strip()
        if not raw:
@@ -998,35 +983,32 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    active_sources: Set[str] = set()
    auth_store = _load_auth_store()

-    # Shared suppression gate — used at every upsert site so
-    # `hermes auth remove <provider> <N>` is stable across all source types.
-    try:
-        from hermes_agent.cli.auth.auth import is_source_suppressed as _is_suppressed
-    except ImportError:
-        def _is_suppressed(_p, _s):  # type: ignore[misc]
-            return False
-
    if provider == "anthropic":
        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
        # when the user has explicitly configured anthropic as their provider.
        # Without this gate, auxiliary client fallback chains silently read
        # ~/.claude/.credentials.json without user consent.  See PR #4210.
        try:
-            from hermes_agent.cli.auth.auth import is_provider_explicitly_configured
+            from hermes_cli.auth import is_provider_explicitly_configured
            if not is_provider_explicitly_configured("anthropic"):
                return changed, active_sources
        except ImportError:
            pass

-        from hermes_agent.providers.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials
+        from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials

        for source_name, creds in (
            ("hermes_pkce", read_hermes_oauth_credentials()),
            ("claude_code", read_claude_code_credentials()),
        ):
            if creds and creds.get("accessToken"):
-                if _is_suppressed(provider, source_name):
-                    continue
+                # Check if user explicitly removed this source
+                try:
+                    from hermes_cli.auth import is_source_suppressed
+                    if is_source_suppressed(provider, source_name):
+                        continue
+                except ImportError:
+                    pass
                active_sources.add(source_name)
                changed |= _upsert_entry(
                    entries,
@@ -1044,7 +1026,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup

    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
-        if state and not _is_suppressed(provider, "device_code"):
+        if state:
            active_sources.add("device_code")
            # Prefer a user-supplied label embedded in the singleton state
            # (set by persist_nous_credentials(label=...) when the user ran
@@ -1081,25 +1063,24 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN).  They don't live in
        # the auth store or credential pool, so we resolve them here.
        try:
-            from hermes_agent.cli.auth.copilot import resolve_copilot_token
+            from hermes_cli.copilot_auth import resolve_copilot_token
            token, source = resolve_copilot_token()
            if token:
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    pconfig = PROVIDER_REGISTRY.get(provider)
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": token,
-                            "base_url": pconfig.inference_base_url if pconfig else "",
-                            "label": source,
-                        },
-                    )
+                active_sources.add(source_name)
+                pconfig = PROVIDER_REGISTRY.get(provider)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": token,
+                        "base_url": pconfig.inference_base_url if pconfig else "",
+                        "label": source,
+                    },
+                )
        except Exception as exc:
            logger.debug("Copilot token seed failed: %s", exc)

@@ -1110,26 +1091,25 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # Use refresh_if_expiring=False to avoid network calls during
        # pool loading / provider discovery.
        try:
-            from hermes_agent.cli.auth.auth import resolve_qwen_runtime_credentials
+            from hermes_cli.auth import resolve_qwen_runtime_credentials
            creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
            token = creds.get("api_key", "")
            if token:
                source_name = creds.get("source", "qwen-cli")
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_OAUTH,
-                            "access_token": token,
-                            "expires_at_ms": creds.get("expires_at_ms"),
-                            "base_url": creds.get("base_url", ""),
-                            "label": creds.get("auth_file", source_name),
-                        },
-                    )
+                active_sources.add(source_name)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_OAUTH,
+                        "access_token": token,
+                        "expires_at_ms": creds.get("expires_at_ms"),
+                        "base_url": creds.get("base_url", ""),
+                        "label": creds.get("auth_file", source_name),
+                    },
+                )
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

@@ -1138,7 +1118,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # the device_code source as suppressed so it won't be re-seeded from
        # the Hermes auth store.  Without this gate the removal is instantly
        # undone on the next load_pool() call.
-        if _is_suppressed(provider, "device_code"):
+        codex_suppressed = False
+        try:
+            from hermes_cli.auth import is_source_suppressed
+            codex_suppressed = is_source_suppressed(provider, "device_code")
+        except ImportError:
+            pass
+        if codex_suppressed:
            return changed, active_sources

        state = _load_provider_state(auth_store, "openai-codex")
@@ -1172,22 +1158,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
-    # Honour user suppression — `hermes auth remove <provider> <N>` for an
-    # env-seeded credential marks the env:<VAR> source as suppressed so it
-    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
-    # Without this gate the removal is silently undone on the next
-    # load_pool() call whenever the var is still exported by the shell.
-    try:
-        from hermes_agent.cli.auth.auth import is_source_suppressed as _is_source_suppressed
-    except ImportError:
-        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
-            return False
    if provider == "openrouter":
        token = os.getenv("OPENROUTER_API_KEY", "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
-            if _is_source_suppressed(provider, source):
-                return changed, active_sources
            active_sources.add(source)
            changed |= _upsert_entry(
                entries,
@@ -1224,8 +1198,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        if not token:
            continue
        source = f"env:{env_var}"
-        if _is_source_suppressed(provider, source):
-            continue
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
@@ -1270,13 +1242,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
    changed = False
    active_sources: Set[str] = set()

-    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
-    try:
-        from hermes_agent.cli.auth.auth import is_source_suppressed as _is_suppressed
-    except ImportError:
-        def _is_suppressed(_p, _s):  # type: ignore[misc]
-            return False
-
    # Seed from the custom_providers config entry's api_key field
    cp_config = _get_custom_provider_config(pool_key)
    if cp_config:
@@ -1285,20 +1250,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
        name = str(cp_config.get("name") or "").strip()
        if api_key:
            source = f"config:{name}"
-            if not _is_suppressed(pool_key, source):
-                active_sources.add(source)
-                changed |= _upsert_entry(
-                    entries,
-                    pool_key,
-                    source,
-                    {
-                        "source": source,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": api_key,
-                        "base_url": base_url,
-                        "label": name or source,
-                    },
-                )
+            active_sources.add(source)
+            changed |= _upsert_entry(
+                entries,
+                pool_key,
+                source,
+                {
+                    "source": source,
+                    "auth_type": AUTH_TYPE_API_KEY,
+                    "access_token": api_key,
+                    "base_url": base_url,
+                    "label": name or source,
+                },
+            )

    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
    try:
@@ -1318,20 +1282,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                matched_key = get_custom_provider_pool_key(model_base_url)
                if matched_key == pool_key:
                    source = "model_config"
-                    if not _is_suppressed(pool_key, source):
-                        active_sources.add(source)
-                        changed |= _upsert_entry(
-                            entries,
-                            pool_key,
-                            source,
-                            {
-                                "source": source,
-                                "auth_type": AUTH_TYPE_API_KEY,
-                                "access_token": model_api_key,
-                                "base_url": model_base_url,
-                                "label": "model_config",
-                            },
-                        )
+                    active_sources.add(source)
+                    changed |= _upsert_entry(
+                        entries,
+                        pool_key,
+                        source,
+                        {
+                            "source": source,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": model_api_key,
+                            "base_url": model_base_url,
+                            "label": "model_config",
+                        },
+                    )
    except Exception:
        pass

@@ -1340,7 +1303,7 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b

 def load_pool(provider: str) -> CredentialPool:
    provider = (provider or "").strip().lower()
-    raw_entries = read_provider_credentials(provider)
+    raw_entries = read_credential_pool(provider)
    entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]

    if provider.startswith(CUSTOM_POOL_PREFIX):
--- a/hermes_agent/agent/display.py
+++ b/hermes_agent/agent/display.py
@@ -13,7 +13,7 @@ from dataclasses import dataclass, field
 from difflib import unified_diff
 from pathlib import Path

-from hermes_agent.utils import safe_json_loads
+from utils import safe_json_loads

 # ANSI escape codes for coloring tool failure indicators
 _RED = "\033[31m"
@@ -43,7 +43,7 @@ def _diff_ansi() -> dict[str, str]:
    plus = "\033[38;2;255;255;255;48;2;20;90;20m"

    try:
-        from hermes_agent.cli.ui.skin_engine import get_active_skin
+        from hermes_cli.skin_engine import get_active_skin
        skin = get_active_skin()

        def _hex_fg(key: str, fallback_rgb: tuple[int, int, int]) -> str:
@@ -118,7 +118,7 @@ def get_tool_preview_max_len() -> int:
 def _get_skin():
    """Get the active skin config, or None if not available."""
    try:
-        from hermes_agent.cli.ui.skin_engine import get_active_skin
+        from hermes_cli.skin_engine import get_active_skin
        return get_active_skin()
    except Exception:
        return None
@@ -148,7 +148,7 @@ def get_tool_emoji(tool_name: str, default: str = "⚡") -> str:
            return override
    # 2. Registry default
    try:
-        from hermes_agent.tools.registry import registry
+        from tools.registry import registry
        emoji = registry.get_emoji(tool_name, default="")
        if emoji:
            return emoji
@@ -311,7 +311,7 @@ def _resolve_skill_manage_paths(args: dict) -> list[Path]:
    if not action or not name:
        return []

-    from hermes_agent.tools.skills.manager import _find_skill, _resolve_skill_dir
+    from tools.skill_manager_tool import _find_skill, _resolve_skill_dir

    if action == "create":
        skill_dir = _resolve_skill_dir(name, args.get("category"))
@@ -729,7 +729,6 @@ class KawaiiSpinner:
                time.sleep(0.1)
                continue
            frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
-            assert self.start_time is not None  # start() sets it before thread starts
            elapsed = time.time() - self.start_time
            if wings:
                left, right = wings[self.frame_idx % len(wings)]
--- a/hermes_agent/providers/errors.py
+++ b/hermes_agent/providers/errors.py
--- a/hermes_agent/providers/gemini_cloudcode_adapter.py
+++ b/hermes_agent/providers/gemini_cloudcode_adapter.py
@@ -38,9 +38,9 @@ from typing import Any, Dict, Iterator, List, Optional

 import httpx

-from hermes_agent.providers import google_oauth
-from hermes_agent.providers.gemini_schema import sanitize_gemini_tool_parameters
-from hermes_agent.providers.google_code_assist import (
+from agent import google_oauth
+from agent.gemini_schema import sanitize_gemini_tool_parameters
+from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
    FREE_TIER_ID,
    CodeAssistError,
@@ -799,8 +799,7 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    _raw_details = err_obj.get("details")
-    err_details_list = _raw_details if isinstance(_raw_details, list) else []
+    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []

    # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
    # than one ErrorInfo (rare), so we pick the first one with a reason.
--- a/hermes_agent/providers/gemini_adapter.py
+++ b/hermes_agent/providers/gemini_adapter.py
@@ -27,7 +27,7 @@ from typing import Any, Dict, Iterator, List, Optional

 import httpx

-from hermes_agent.providers.gemini_schema import sanitize_gemini_tool_parameters
+from agent.gemini_schema import sanitize_gemini_tool_parameters

 logger = logging.getLogger(__name__)

@@ -613,8 +613,7 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    _raw_details = err_obj.get("details")
-    details_list = _raw_details if isinstance(_raw_details, list) else []
+    details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []

    reason = ""
    retry_after: Optional[float] = None
--- a/hermes_agent/providers/gemini_schema.py
+++ b/hermes_agent/providers/gemini_schema.py
--- a/hermes_agent/providers/google_code_assist.py
+++ b/hermes_agent/providers/google_code_assist.py
--- a/hermes_agent/providers/google_oauth.py
+++ b/hermes_agent/providers/google_oauth.py
@@ -60,7 +60,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

-from hermes_agent.constants import get_hermes_home
+from hermes_constants import get_hermes_home

 logger = logging.getLogger(__name__)

--- a/hermes_agent/agent/insights.py
+++ b/hermes_agent/agent/insights.py
@@ -10,7 +10,7 @@ multi-platform architecture with additional cost estimation and platform
 breakdown capabilities.

 Usage:
-    from hermes_agent.agent.insights import InsightsEngine
+    from agent.insights import InsightsEngine
    engine = InsightsEngine(db)
    report = engine.generate(days=30)
    print(engine.format_terminal(report))
@@ -22,7 +22,7 @@ from collections import Counter, defaultdict
 from datetime import datetime
 from typing import Any, Dict, List

-from hermes_agent.providers.pricing import (
+from agent.usage_pricing import (
    CanonicalUsage,
    DEFAULT_PRICING,
    estimate_usage_cost,
@@ -124,7 +124,6 @@ class InsightsEngine:
        # Gather raw data
        sessions = self._get_sessions(cutoff, source)
        tool_usage = self._get_tool_usage(cutoff, source)
-        skill_usage = self._get_skill_usage(cutoff, source)
        message_stats = self._get_message_stats(cutoff, source)

        if not sessions:
@@ -136,15 +135,6 @@ class InsightsEngine:
                "models": [],
                "platforms": [],
                "tools": [],
-                "skills": {
-                    "summary": {
-                        "total_skill_loads": 0,
-                        "total_skill_edits": 0,
-                        "total_skill_actions": 0,
-                        "distinct_skills_used": 0,
-                    },
-                    "top_skills": [],
-                },
                "activity": {},
                "top_sessions": [],
            }
@@ -154,7 +144,6 @@ class InsightsEngine:
        models = self._compute_model_breakdown(sessions)
        platforms = self._compute_platform_breakdown(sessions)
        tools = self._compute_tool_breakdown(tool_usage)
-        skills = self._compute_skill_breakdown(skill_usage)
        activity = self._compute_activity_patterns(sessions)
        top_sessions = self._compute_top_sessions(sessions)

@@ -167,7 +156,6 @@ class InsightsEngine:
            "models": models,
            "platforms": platforms,
            "tools": tools,
-            "skills": skills,
            "activity": activity,
            "top_sessions": top_sessions,
        }
@@ -296,82 +284,6 @@ class InsightsEngine:
            for name, count in tool_counts.most_common()
        ]

-    def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
-        """Extract per-skill usage from assistant tool calls."""
-        skill_counts: Dict[str, Dict[str, Any]] = {}
-
-        if source:
-            cursor = self._conn.execute(
-                """SELECT m.tool_calls, m.timestamp
-                   FROM messages m
-                   JOIN sessions s ON s.id = m.session_id
-                   WHERE s.started_at >= ? AND s.source = ?
-                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
-                (cutoff, source),
-            )
-        else:
-            cursor = self._conn.execute(
-                """SELECT m.tool_calls, m.timestamp
-                   FROM messages m
-                   JOIN sessions s ON s.id = m.session_id
-                   WHERE s.started_at >= ?
-                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
-                (cutoff,),
-            )
-
-        for row in cursor.fetchall():
-            try:
-                calls = row["tool_calls"]
-                if isinstance(calls, str):
-                    calls = json.loads(calls)
-                if not isinstance(calls, list):
-                    continue
-            except (json.JSONDecodeError, TypeError):
-                continue
-
-            timestamp = row["timestamp"]
-            for call in calls:
-                if not isinstance(call, dict):
-                    continue
-                func = call.get("function", {})
-                tool_name = func.get("name")
-                if tool_name not in {"skill_view", "skill_manage"}:
-                    continue
-
-                args = func.get("arguments")
-                if isinstance(args, str):
-                    try:
-                        args = json.loads(args)
-                    except (json.JSONDecodeError, TypeError):
-                        continue
-                if not isinstance(args, dict):
-                    continue
-
-                skill_name = args.get("name")
-                if not isinstance(skill_name, str) or not skill_name.strip():
-                    continue
-
-                entry = skill_counts.setdefault(
-                    skill_name,
-                    {
-                        "skill": skill_name,
-                        "view_count": 0,
-                        "manage_count": 0,
-                        "last_used_at": None,
-                    },
-                )
-                if tool_name == "skill_view":
-                    entry["view_count"] += 1
-                else:
-                    entry["manage_count"] += 1
-
-                if timestamp is not None and (
-                    entry["last_used_at"] is None or timestamp > entry["last_used_at"]
-                ):
-                    entry["last_used_at"] = timestamp
-
-        return list(skill_counts.values())
-
    def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
        """Get aggregate message statistics."""
        if source:
@@ -563,46 +475,6 @@ class InsightsEngine:
            })
        return result

-    def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
-        """Process per-skill usage into summary + ranked list."""
-        total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
-        total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
-        total_skill_actions = total_skill_loads + total_skill_edits
-
-        top_skills = []
-        for skill in skill_usage:
-            total_count = skill["view_count"] + skill["manage_count"]
-            percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
-            top_skills.append({
-                "skill": skill["skill"],
-                "view_count": skill["view_count"],
-                "manage_count": skill["manage_count"],
-                "total_count": total_count,
-                "percentage": percentage,
-                "last_used_at": skill.get("last_used_at"),
-            })
-
-        top_skills.sort(
-            key=lambda s: (
-                s["total_count"],
-                s["view_count"],
-                s["manage_count"],
-                s["last_used_at"] or 0,
-                s["skill"],
-            ),
-            reverse=True,
-        )
-
-        return {
-            "summary": {
-                "total_skill_loads": total_skill_loads,
-                "total_skill_edits": total_skill_edits,
-                "total_skill_actions": total_skill_actions,
-                "distinct_skills_used": len(skill_usage),
-            },
-            "top_skills": top_skills,
-        }
-
    def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
        """Analyze activity patterns by day of week and hour."""
        day_counts = Counter()  # 0=Monday ... 6=Sunday
@@ -798,28 +670,6 @@ class InsightsEngine:
                lines.append(f"  ... and {len(report['tools']) - 15} more tools")
            lines.append("")

-        # Skill usage
-        skills = report.get("skills", {})
-        top_skills = skills.get("top_skills", [])
-        if top_skills:
-            lines.append("  🧠 Top Skills")
-            lines.append("  " + "─" * 56)
-            lines.append(f"  {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
-            for skill in top_skills[:10]:
-                last_used = "—"
-                if skill.get("last_used_at"):
-                    last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
-                lines.append(
-                    f"  {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
-                )
-            summary = skills.get("summary", {})
-            lines.append(
-                f"  Distinct skills: {summary.get('distinct_skills_used', 0)}  "
-                f"Loads: {summary.get('total_skill_loads', 0):,}  "
-                f"Edits: {summary.get('total_skill_edits', 0):,}"
-            )
-            lines.append("")
-
        # Activity patterns
        act = report.get("activity", {})
        if act.get("by_day"):
@@ -903,18 +753,6 @@ class InsightsEngine:
                lines.append(f"  {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
            lines.append("")

-        skills = report.get("skills", {})
-        if skills.get("top_skills"):
-            lines.append("**🧠 Top Skills:**")
-            for skill in skills["top_skills"][:5]:
-                suffix = ""
-                if skill.get("last_used_at"):
-                    suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
-                lines.append(
-                    f"  {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
-                )
-            lines.append("")
-
        # Activity summary
        act = report.get("activity", {})
        if act.get("busiest_day") and act.get("busiest_hour"):
--- a/hermes_agent/agent/manual_compression_feedback.py
+++ b/hermes_agent/agent/manual_compression_feedback.py
--- a/hermes_agent/agent/memory/manager.py
+++ b/hermes_agent/agent/memory/manager.py
@@ -33,8 +33,8 @@ import logging
 import re
 from typing import Any, Dict, List, Optional

-from hermes_agent.agent.memory.provider import MemoryProvider
-from hermes_agent.tools.registry import tool_error
+from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error

 logger = logging.getLogger(__name__)

@@ -361,7 +361,7 @@ class MemoryManager:
        ``get_hermes_home()`` themselves.
        """
        if "hermes_home" not in kwargs:
-            from hermes_agent.constants import get_hermes_home
+            from hermes_constants import get_hermes_home
            kwargs["hermes_home"] = str(get_hermes_home())
        for provider in self._providers:
            try:
--- a/hermes_agent/agent/memory/provider.py
+++ b/hermes_agent/agent/memory/provider.py
--- a/hermes_agent/providers/metadata.py
+++ b/hermes_agent/providers/metadata.py
@@ -14,9 +14,7 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from hermes_agent.utils import base_url_host_matches, base_url_hostname
-
-from hermes_agent.constants import OPENROUTER_MODELS_URL
+from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)

@@ -118,6 +116,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
+    "gpt-5.3-codex-spark": 128000,    # Spark variant has reduced 128k context
    "gpt-5.1-chat": 128000,           # Chat variant has 128k context
    "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
    "gpt-4.1": 1047576,
@@ -170,7 +169,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    "Qwen/Qwen3.5-35B-A3B": 131072,
    "deepseek-ai/DeepSeek-V3.2": 65536,
    "moonshotai/Kimi-K2.5": 262144,
-    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 256000,
@@ -213,15 +211,8 @@ def _normalize_base_url(base_url: str) -> str:
    return (base_url or "").strip().rstrip("/")


-def _auth_headers(api_key: str = "") -> Dict[str, str]:
-    token = str(api_key or "").strip()
-    if not token:
-        return {}
-    return {"Authorization": f"Bearer {token}"}
-
-
 def _is_openrouter_base_url(base_url: str) -> bool:
-    return base_url_host_matches(base_url, "openrouter.ai")
+    return "openrouter.ai" in _normalize_base_url(base_url).lower()


 def _is_custom_endpoint(base_url: str) -> bool:
@@ -319,7 +310,7 @@ def is_local_endpoint(base_url: str) -> bool:
    return False


-def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
+def detect_local_server_type(base_url: str) -> Optional[str]:
    """Detect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -331,10 +322,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

-    headers = _auth_headers(api_key)
-
    try:
-        with httpx.Client(timeout=2.0, headers=headers) as client:
+        with httpx.Client(timeout=2.0) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
                r = client.get(f"{server_url}/api/v1/models")
@@ -521,59 +510,6 @@ def fetch_endpoint_model_metadata(
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    last_error: Optional[Exception] = None

-    if is_local_endpoint(normalized):
-        try:
-            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
-                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
-                response = requests.get(
-                    server_url.rstrip("/") + "/api/v1/models",
-                    headers=headers,
-                    timeout=10,
-                )
-                response.raise_for_status()
-                payload = response.json()
-                cache: Dict[str, Dict[str, Any]] = {}
-                for model in payload.get("models", []):
-                    if not isinstance(model, dict):
-                        continue
-                    model_id = model.get("key") or model.get("id")
-                    if not model_id:
-                        continue
-                    entry: Dict[str, Any] = {"name": model.get("name", model_id)}
-
-                    context_length = None
-                    for inst in model.get("loaded_instances", []) or []:
-                        if not isinstance(inst, dict):
-                            continue
-                        cfg = inst.get("config", {})
-                        ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
-                        if isinstance(ctx, int) and ctx > 0:
-                            context_length = ctx
-                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
-                    if context_length is not None:
-                        entry["context_length"] = context_length
-
-                    max_completion_tokens = _extract_max_completion_tokens(model)
-                    if max_completion_tokens is not None:
-                        entry["max_completion_tokens"] = max_completion_tokens
-
-                    pricing = _extract_pricing(model)
-                    if pricing:
-                        entry["pricing"] = pricing
-
-                    _add_model_aliases(cache, model_id, entry)
-                    alt_id = model.get("id")
-                    if isinstance(alt_id, str) and alt_id and alt_id != model_id:
-                        _add_model_aliases(cache, alt_id, entry)
-
-                _endpoint_model_metadata_cache[normalized] = cache
-                _endpoint_model_metadata_cache_time[normalized] = time.time()
-                return cache
-        except Exception as exc:
-            last_error = exc
-
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
@@ -636,7 +572,7 @@ def fetch_endpoint_model_metadata(

 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
-    from hermes_agent.constants import get_hermes_home
+    from hermes_constants import get_hermes_home
    return get_hermes_home() / "context_length_cache.yaml"


@@ -780,7 +716,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    return False


-def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
+def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
    """Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -798,16 +734,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
        server_url = server_url[:-3]

    try:
-        server_type = detect_local_server_type(base_url, api_key=api_key)
+        server_type = detect_local_server_type(base_url)
    except Exception:
        return None
    if server_type != "ollama":
        return None

-    headers = _auth_headers(api_key)
-
    try:
-        with httpx.Client(timeout=3.0, headers=headers) as client:
+        with httpx.Client(timeout=3.0) as client:
            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
            if resp.status_code != 200:
                return None
@@ -835,7 +769,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
    return None


-def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
+def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx

@@ -848,15 +782,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

-    headers = _auth_headers(api_key)
-
    try:
-        server_type = detect_local_server_type(base_url, api_key=api_key)
+        server_type = detect_local_server_type(base_url)
    except Exception:
        server_type = None

    try:
-        with httpx.Client(timeout=3.0, headers=headers) as client:
+        with httpx.Client(timeout=3.0) as client:
            # Ollama: /api/show returns model details with context info
            if server_type == "ollama":
                resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -1067,7 +999,7 @@ def get_model_context_length(
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
-                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
+                local_ctx = _query_local_context_length(model, base_url)
                if local_ctx and local_ctx > 0:
                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
@@ -1081,7 +1013,7 @@ def get_model_context_length(

    # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
    if provider == "anthropic" or (
-        base_url and base_url_hostname(base_url) == "api.anthropic.com"
+        base_url and "api.anthropic.com" in base_url
    ):
        ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
        if ctx:
@@ -1090,13 +1022,9 @@ def get_model_context_length(
    # 4b. AWS Bedrock — use static context length table.
    # Bedrock's ListFoundationModels doesn't expose context window sizes,
    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
+    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
        try:
-            from hermes_agent.providers.bedrock_adapter import get_bedrock_context_length
+            from agent.bedrock_adapter import get_bedrock_context_length
            return get_bedrock_context_length(model)
        except ImportError:
            pass  # boto3 not installed — fall through to generic resolution
@@ -1118,7 +1046,7 @@ def get_model_context_length(
        if ctx:
            return ctx
    if effective_provider:
-        from hermes_agent.providers.metadata_dev import lookup_models_dev_context
+        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
        if ctx:
            return ctx
@@ -1141,7 +1069,7 @@ def get_model_context_length(

    # 9. Query local server as last resort
    if base_url and is_local_endpoint(base_url):
-        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
+        local_ctx = _query_local_context_length(model, base_url)
        if local_ctx and local_ctx > 0:
            save_context_length(model, base_url, local_ctx)
            return local_ctx
--- a/hermes_agent/providers/metadata_dev.py
+++ b/hermes_agent/providers/metadata_dev.py
@@ -25,7 +25,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

-from hermes_agent.utils import atomic_json_write
+from utils import atomic_json_write

 import requests

@@ -179,7 +179,7 @@ _MODELS_DEV_TO_PROVIDER: Optional[Dict[str, str]] = None

 def _get_cache_path() -> Path:
    """Return path to disk cache file."""
-    from hermes_agent.constants import get_hermes_home
+    from hermes_constants import get_hermes_home
    return get_hermes_home() / "models_dev_cache.json"


--- a/hermes_agent/providers/nous_rate_guard.py
+++ b/hermes_agent/providers/nous_rate_guard.py
@@ -28,7 +28,7 @@ _STATE_FILENAME = "nous.json"
 def _state_path() -> str:
    """Return the path to the Nous rate limit state file."""
    try:
-        from hermes_agent.constants import get_hermes_home
+        from hermes_constants import get_hermes_home
        base = get_hermes_home()
    except ImportError:
        base = os.path.join(os.path.expanduser("~"), ".hermes")
--- a/hermes_agent/agent/prompt_builder.py
+++ b/hermes_agent/agent/prompt_builder.py
@@ -12,10 +12,10 @@ import threading
 from collections import OrderedDict
 from pathlib import Path

-from hermes_agent.constants import get_hermes_home, get_skills_dir, is_wsl
+from hermes_constants import get_hermes_home, get_skills_dir, is_wsl
 from typing import Optional

-from hermes_agent.agent.skill_utils import (
+from agent.skill_utils import (
    extract_skill_conditions,
    extract_skill_description,
    get_all_skills_dirs,
@@ -24,7 +24,7 @@ from hermes_agent.agent.skill_utils import (
    parse_frontmatter,
    skill_matches_platform,
 )
-from hermes_agent.utils import atomic_json_write
+from utils import atomic_json_write

 logger = logging.getLogger(__name__)

@@ -350,13 +350,7 @@ PLATFORM_HINTS = {
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal. "
-        "File delivery: there is no attachment channel — the user reads your "
-        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
-        "(those are only intercepted on messaging platforms like Telegram, "
-        "Discord, Slack, etc.; on the CLI they render as literal text). "
-        "When referring to a file you created or changed, just state its "
-        "absolute path in plain text; the user can open it from there."
+        "renderable inside a terminal."
    ),
    "sms": (
        "You are communicating via SMS. Keep responses concise and use plain text "
@@ -619,7 +613,7 @@ def build_skills_system_prompt(
    # ── Layer 1: in-process LRU cache ─────────────────────────────────
    # Include the resolved platform so per-platform disabled-skill lists
    # produce distinct cache entries (gateway serves multiple platforms).
-    from hermes_agent.gateway.session_context import get_session_env
+    from gateway.session_context import get_session_env
    _platform_hint = (
        os.environ.get("HERMES_PLATFORM")
        or get_session_env("HERMES_SESSION_PLATFORM")
@@ -824,8 +818,8 @@ def build_skills_system_prompt(
 def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -> str:
    """Build a compact Nous subscription capability block for the system prompt."""
    try:
-        from hermes_agent.cli.nous_subscription import get_nous_subscription_features
-        from hermes_agent.tools.backend_helpers import managed_nous_tools_enabled
+        from hermes_cli.nous_subscription import get_nous_subscription_features
+        from tools.tool_backend_helpers import managed_nous_tools_enabled
    except Exception as exc:
        logger.debug("Failed to import Nous subscription helper: %s", exc)
        return ""
@@ -911,7 +905,7 @@ def load_soul_md() -> Optional[str]:
    ``skip_soul=True`` so SOUL.md isn't injected twice.
    """
    try:
-        from hermes_agent.cli.config import ensure_hermes_home
+        from hermes_cli.config import ensure_hermes_home
        ensure_hermes_home()
    except Exception as e:
        logger.debug("Could not ensure HERMES_HOME before loading SOUL.md: %s", e)
--- a/hermes_agent/providers/caching.py
+++ b/hermes_agent/providers/caching.py
--- a/hermes_agent/providers/rate_limiting.py
+++ b/hermes_agent/providers/rate_limiting.py
--- a/hermes_agent/agent/redact.py
+++ b/hermes_agent/agent/redact.py
@@ -13,48 +13,6 @@ import re

 logger = logging.getLogger(__name__)

-# Sensitive query-string parameter names (case-insensitive exact match).
-# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
-# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
-_SENSITIVE_QUERY_PARAMS = frozenset({
-    "access_token",
-    "refresh_token",
-    "id_token",
-    "token",
-    "api_key",
-    "apikey",
-    "client_secret",
-    "password",
-    "auth",
-    "jwt",
-    "session",
-    "secret",
-    "key",
-    "code",           # OAuth authorization codes
-    "signature",      # pre-signed URL signatures
-    "x-amz-signature",
-})
-
-# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
-# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
-# Ported from nearai/ironclaw#2529.
-_SENSITIVE_BODY_KEYS = frozenset({
-    "access_token",
-    "refresh_token",
-    "id_token",
-    "token",
-    "api_key",
-    "apikey",
-    "client_secret",
-    "password",
-    "auth",
-    "jwt",
-    "secret",
-    "private_key",
-    "authorization",
-    "key",
-})
-
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
 # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
 _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -150,30 +108,6 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")

-# URLs containing query strings — matches `scheme://...?...[# or end]`.
-# Used to scan text for URLs whose query params may contain secrets.
-# Ported from nearai/ironclaw#2529.
-_URL_WITH_QUERY_RE = re.compile(
-    r"(https?|wss?|ftp)://"          # scheme
-    r"([^\s/?#]+)"                    # authority (may include userinfo)
-    r"([^\s?#]*)"                     # path
-    r"\?([^\s#]+)"                    # query (required)
-    r"(#\S*)?",                       # optional fragment
-)
-
-# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
-# (not just DB protocols already covered by _DB_CONNSTR_RE above).
-# Catches things like `https://user:token@api.example.com/v1/foo`.
-_URL_USERINFO_RE = re.compile(
-    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
-)
-
-# Form-urlencoded body detection: conservative — only applies when the entire
-# text looks like a query string (k=v&k=v pattern with no newlines).
-_FORM_BODY_RE = re.compile(
-    r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
-)
-
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -187,72 +121,6 @@ def _mask_token(token: str) -> str:
    return f"{token[:6]}...{token[-4:]}"


-def _redact_query_string(query: str) -> str:
-    """Redact sensitive parameter values in a URL query string.
-
-    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
-    replaced with `***`. Non-sensitive keys pass through unchanged.
-    Empty or malformed pairs are preserved as-is.
-    """
-    if not query:
-        return query
-    parts = []
-    for pair in query.split("&"):
-        if "=" not in pair:
-            parts.append(pair)
-            continue
-        key, _, value = pair.partition("=")
-        if key.lower() in _SENSITIVE_QUERY_PARAMS:
-            parts.append(f"{key}=***")
-        else:
-            parts.append(pair)
-    return "&".join(parts)
-
-
-def _redact_url_query_params(text: str) -> str:
-    """Scan text for URLs with query strings and redact sensitive params.
-
-    Catches opaque tokens that don't match vendor prefix regexes, e.g.
-    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
-    """
-    def _sub(m: re.Match) -> str:
-        scheme = m.group(1)
-        authority = m.group(2)
-        path = m.group(3)
-        query = _redact_query_string(m.group(4))
-        fragment = m.group(5) or ""
-        return f"{scheme}://{authority}{path}?{query}{fragment}"
-    return _URL_WITH_QUERY_RE.sub(_sub, text)
-
-
-def _redact_url_userinfo(text: str) -> str:
-    """Strip `user:password@` from HTTP/WS/FTP URLs.
-
-    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
-    separately by `_DB_CONNSTR_RE`.
-    """
-    return _URL_USERINFO_RE.sub(
-        lambda m: f"{m.group(1)}://{m.group(2)}:***@",
-        text,
-    )
-
-
-def _redact_form_body(text: str) -> str:
-    """Redact sensitive values in a form-urlencoded body.
-
-    Only applies when the entire input looks like a pure form body
-    (k=v&k=v with no newlines, no other text). Single-line non-form
-    text passes through unchanged. This is a conservative pass — the
-    `_redact_url_query_params` function handles embedded query strings.
-    """
-    if not text or "\n" in text or "&" not in text:
-        return text
-    # The body-body form check is strict: only trigger on clean k=v&k=v.
-    if not _FORM_BODY_RE.match(text.strip()):
-        return text
-    return _redact_query_string(text.strip())
-
-
 def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

@@ -305,16 +173,6 @@ def redact_sensitive_text(text: str) -> str:
    # JWT tokens (eyJ... — base64-encoded JSON headers)
    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)

-    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
-    # DB schemes are handled above by _DB_CONNSTR_RE.
-    text = _redact_url_userinfo(text)
-
-    # URL query params containing opaque tokens (?access_token=…&code=…)
-    text = _redact_url_query_params(text)
-
-    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
-    text = _redact_form_body(text)
-
    # Discord user/role mentions (<@snowflake_id>)
    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)

--- a/hermes_agent/providers/retry.py
+++ b/hermes_agent/providers/retry.py
--- a/hermes_agent/agent/skill_commands.py
+++ b/hermes_agent/agent/skill_commands.py
@@ -8,12 +8,11 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
 import json
 import logging
 import re
-import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional

-from hermes_agent.constants import display_hermes_home
+from hermes_constants import display_hermes_home

 logger = logging.getLogger(__name__)

@@ -23,110 +22,6 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
-# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
-# left as-is so the user can debug them.
-_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
-
-# Matches inline shell snippets like:  !`date +%Y-%m-%d`
-# Non-greedy, single-line only — no newlines inside the backticks.
-_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
-
-# Cap inline-shell output so a runaway command can't blow out the context.
-_INLINE_SHELL_MAX_OUTPUT = 4000
-
-
-def _load_skills_config() -> dict:
-    """Load the ``skills`` section of config.yaml (best-effort)."""
-    try:
-        from hermes_agent.cli.config import load_config
-
-        cfg = load_config() or {}
-        skills_cfg = cfg.get("skills")
-        if isinstance(skills_cfg, dict):
-            return skills_cfg
-    except Exception:
-        logger.debug("Could not read skills config", exc_info=True)
-    return {}
-
-
-def _substitute_template_vars(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None,
-) -> str:
-    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
-
-    Only substitutes tokens for which a concrete value is available —
-    unresolved tokens are left in place so the author can spot them.
-    """
-    if not content:
-        return content
-
-    skill_dir_str = str(skill_dir) if skill_dir else None
-
-    def _replace(match: re.Match) -> str:
-        token = match.group(1)
-        if token == "HERMES_SKILL_DIR" and skill_dir_str:
-            return skill_dir_str
-        if token == "HERMES_SESSION_ID" and session_id:
-            return str(session_id)
-        return match.group(0)
-
-    return _SKILL_TEMPLATE_RE.sub(_replace, content)
-
-
-def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
-    """Execute a single inline-shell snippet and return its stdout (trimmed).
-
-    Failures return a short ``[inline-shell error: ...]`` marker instead of
-    raising, so one bad snippet can't wreck the whole skill message.
-    """
-    try:
-        completed = subprocess.run(
-            ["bash", "-c", command],
-            cwd=str(cwd) if cwd else None,
-            capture_output=True,
-            text=True,
-            timeout=max(1, int(timeout)),
-            check=False,
-        )
-    except subprocess.TimeoutExpired:
-        return f"[inline-shell timeout after {timeout}s: {command}]"
-    except FileNotFoundError:
-        return f"[inline-shell error: bash not found]"
-    except Exception as exc:
-        return f"[inline-shell error: {exc}]"
-
-    output = (completed.stdout or "").rstrip("\n")
-    if not output and completed.stderr:
-        output = completed.stderr.rstrip("\n")
-    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
-        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
-    return output
-
-
-def _expand_inline_shell(
-    content: str,
-    skill_dir: Path | None,
-    timeout: int,
-) -> str:
-    """Replace every !`cmd` snippet in ``content`` with its stdout.
-
-    Runs each snippet with the skill directory as CWD so relative paths in
-    the snippet work the way the author expects.
-    """
-    if "!`" not in content:
-        return content
-
-    def _replace(match: re.Match) -> str:
-        cmd = match.group(1).strip()
-        if not cmd:
-            return ""
-        return _run_inline_shell(cmd, skill_dir, timeout)
-
-    return _INLINE_SHELL_RE.sub(_replace, content)
-

 def build_plan_path(
    user_instruction: str = "",
@@ -156,7 +51,7 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
        return None

    try:
-        from hermes_agent.tools.skills.tool import SKILLS_DIR, skill_view
+        from tools.skills_tool import SKILLS_DIR, skill_view

        identifier_path = Path(raw_identifier).expanduser()
        if identifier_path.is_absolute():
@@ -202,7 +97,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
    without needing to read config.yaml itself.
    """
    try:
-        from hermes_agent.agent.skill_utils import (
+        from agent.skill_utils import (
            extract_skill_config_vars,
            parse_frontmatter,
            resolve_skill_config_values,
@@ -238,36 +133,14 @@ def _build_skill_message(
    activation_note: str,
    user_instruction: str = "",
    runtime_note: str = "",
-    session_id: str | None = None,
 ) -> str:
    """Format a loaded skill into a user/system message payload."""
-    from hermes_agent.tools.skills.tool import SKILLS_DIR
+    from tools.skills_tool import SKILLS_DIR

    content = str(loaded_skill.get("content") or "")

-    # ── Template substitution and inline-shell expansion ──
-    # Done before anything else so downstream blocks (setup notes,
-    # supporting-file hints) see the expanded content.
-    skills_cfg = _load_skills_config()
-    if skills_cfg.get("template_vars", True):
-        content = _substitute_template_vars(content, skill_dir, session_id)
-    if skills_cfg.get("inline_shell", False):
-        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
-        content = _expand_inline_shell(content, skill_dir, timeout)
-
    parts = [activation_note, "", content.strip()]

-    # ── Inject the absolute skill directory so the agent can reference
-    #    bundled scripts without an extra skill_view() round-trip. ──
-    if skill_dir:
-        parts.append("")
-        parts.append(f"[Skill directory: {skill_dir}]")
-        parts.append(
-            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
-            "`templates/config.yaml`) against that directory, then run them "
-            "with the terminal tool using the absolute path."
-        )
-
    # ── Inject resolved skill config values ──
    _inject_skill_config(loaded_skill, parts)

@@ -315,13 +188,11 @@ def _build_skill_message(
            # Skill is from an external dir — use the skill name instead
            skill_view_target = skill_dir.name
        parts.append("")
-        parts.append("[This skill has supporting files:]")
+        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
        for sf in supporting:
-            parts.append(f"- {sf}  ->  {skill_dir / sf}")
+            parts.append(f"- {sf}")
        parts.append(
-            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
-            f'file_path="<path>"), or run scripts directly by absolute path '
-            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
+            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
        )

    if user_instruction:
@@ -344,8 +215,8 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    global _skill_commands
    _skill_commands = {}
    try:
-        from hermes_agent.tools.skills.tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        from hermes_agent.agent.skill_utils import get_external_skills_dirs
+        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
+        from agent.skill_utils import get_external_skills_dirs
        disabled = _get_disabled_skill_names()
        seen_names: set = set()

@@ -461,7 +332,6 @@ def build_skill_invocation_message(
        activation_note,
        user_instruction=user_instruction,
        runtime_note=runtime_note,
-        session_id=task_id,
    )


@@ -500,7 +370,6 @@ def build_preloaded_skills_prompt(
                loaded_skill,
                skill_dir,
                activation_note,
-                session_id=task_id,
            )
        )
        loaded_names.append(skill_name)
--- a/hermes_agent/agent/skill_utils.py
+++ b/hermes_agent/agent/skill_utils.py
@@ -12,7 +12,7 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple

-from hermes_agent.constants import get_config_path, get_skills_dir
+from hermes_constants import get_config_path, get_skills_dir

 logger = logging.getLogger(__name__)

@@ -145,7 +145,7 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    if not isinstance(skills_cfg, dict):
        return set()

-    from hermes_agent.gateway.session_context import get_session_env
+    from gateway.session_context import get_session_env
    resolved_platform = (
        platform
        or os.getenv("HERMES_PLATFORM")
@@ -455,8 +455,7 @@ def parse_qualified_name(name: str) -> Tuple[Optional[str], str]:
    """
    if ":" not in name:
        return None, name
-    ns, bare = name.split(":", 1)
-    return ns, bare
+    return tuple(name.split(":", 1))  # type: ignore[return-value]


 def is_valid_namespace(candidate: Optional[str]) -> bool:
--- a/hermes_agent/agent/subdirectory_hints.py
+++ b/hermes_agent/agent/subdirectory_hints.py
@@ -19,7 +19,7 @@ import shlex
 from pathlib import Path
 from typing import Dict, Any, Optional, Set

-from hermes_agent.agent.prompt_builder import _scan_context_content
+from agent.prompt_builder import _scan_context_content

 logger = logging.getLogger(__name__)

--- a/hermes_agent/agent/title_generator.py
+++ b/hermes_agent/agent/title_generator.py
@@ -8,7 +8,7 @@ import logging
 import threading
 from typing import Optional

-from hermes_agent.providers.auxiliary import call_llm
+from agent.auxiliary_client import call_llm

 logger = logging.getLogger(__name__)

--- a/hermes_agent/agent/trajectory.py
+++ b/hermes_agent/agent/trajectory.py
--- a/hermes_agent/providers/pricing.py
+++ b/hermes_agent/providers/pricing.py
@@ -5,8 +5,7 @@ from datetime import datetime, timezone
 from decimal import Decimal
 from typing import Any, Dict, Literal, Optional

-from hermes_agent.providers.metadata import fetch_endpoint_model_metadata, fetch_model_metadata
-from hermes_agent.utils import base_url_host_matches
+from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

@@ -394,7 +393,7 @@ def resolve_billing_route(

    if provider_name == "openai-codex":
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
-    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
+    if provider_name == "openrouter" or "openrouter.ai" in base:
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
--- a/scripts/batch_runner.py
+++ b/scripts/batch_runner.py
@@ -20,13 +20,9 @@ Usage:
    python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
 """

-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
 import json
 import logging
+import os
 import time
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple
@@ -39,13 +35,13 @@ from rich.console import Console
 logger = logging.getLogger(__name__)
 import fire

-from hermes_agent.agent.loop import AIAgent
-from hermes_agent.tools.distributions import (
+from run_agent import AIAgent
+from toolset_distributions import (
    list_distributions, 
    sample_toolsets_from_distribution,
    validate_distribution
 )
-from hermes_agent.tools.dispatch import TOOL_TO_TOOLSET_MAP
+from model_tools import TOOL_TO_TOOLSET_MAP


 # Global configuration for worker processes
@@ -293,7 +289,7 @@ def _process_single_prompt(
                if config.get("verbose"):
                    print(f"   Prompt {prompt_index}: Docker image check failed: {img_err}", flush=True)

-        from hermes_agent.tools.terminal import register_task_env_overrides
+        from tools.terminal_tool import register_task_env_overrides
        overrides = {
            "docker_image": container_image,
            "modal_image": container_image,
@@ -448,7 +444,6 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
            if not reasoning.get("has_any_reasoning", True):
                print(f"   🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
                discarded_no_reasoning += 1
-                completed_in_batch.append(prompt_index)
                continue
            
            # Get and normalize tool stats for consistent schema across all entries
@@ -712,7 +707,7 @@ class BatchRunner:
        """
        checkpoint_data["last_updated"] = datetime.now().isoformat()

-        from hermes_agent.utils import atomic_json_write
+        from utils import atomic_json_write
        if lock:
            with lock:
                atomic_json_write(self.checkpoint_file, checkpoint_data)
@@ -1130,7 +1125,7 @@ def main(
    num_workers: int = 4,
    resume: bool = False,
    verbose: bool = False,
-    show_distributions: bool = False,
+    list_distributions: bool = False,
    ephemeral_system_prompt: str = None,
    log_prefix_chars: int = 100,
    providers_allowed: str = None,
@@ -1158,7 +1153,7 @@ def main(
        num_workers (int): Number of parallel worker processes (default: 4)
        resume (bool): Resume from checkpoint if run was interrupted (default: False)
        verbose (bool): Enable verbose logging (default: False)
-        show_distributions (bool): List available toolset distributions and exit
+        list_distributions (bool): List available toolset distributions and exit
        ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
        providers_allowed (str): Comma-separated list of OpenRouter providers to allow (e.g. "anthropic,openai")
@@ -1190,16 +1185,16 @@ def main(
                               --prefill_messages_file=configs/prefill_opus.json
        
        # List available distributions
-        python batch_runner.py --show_distributions
+        python batch_runner.py --list_distributions
    """
    # Handle list distributions
-    if show_distributions:
-        from hermes_agent.tools.distributions import print_distribution_info
-
+    if list_distributions:
+        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
+        
        print("📊 Available Toolset Distributions")
        print("=" * 70)
-
-        all_dists = list_distributions()
+        
+        all_dists = get_all_dists()
        for dist_name in sorted(all_dists.keys()):
            print_distribution_info(dist_name)
        
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -770,12 +770,10 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (default 3 parallel, configurable).
+# Supports single tasks and batch mode (up to 3 parallel).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
-  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
-  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -919,39 +917,3 @@ display:
 #   # Names and usernames are NOT affected (user-chosen, publicly visible).
 #   # Routing/delivery still uses the original values internally.
 #   redact_pii: false
-
-# =============================================================================
-# Shell-script hooks
-# =============================================================================
-# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
-# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
-# stdout the script may return JSON that either blocks the tool call or
-# injects context into the next LLM call.
-#
-# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
-#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
-#   pre_api_request, post_api_request, on_session_start, on_session_end,
-#   on_session_finalize, on_session_reset, subagent_stop
-#
-# First-use consent: each (event, command) pair prompts once on a TTY, then
-# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
-# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
-# hooks_auto_accept key below.
-#
-# See website/docs/user-guide/features/hooks.md for the full JSON wire
-# protocol and worked examples.
-#
-# hooks:
-#   pre_tool_call:
-#     - matcher: "terminal"
-#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
-#       timeout: 10
-#   post_tool_call:
-#     - matcher: "write_file|patch"
-#       command: "~/.hermes/agent-hooks/auto-format.sh"
-#   pre_llm_call:
-#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
-#   subagent_stop:
-#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
-#
-# hooks_auto_accept: false
--- a/hermes_agent/cli/repl.py
+++ b/hermes_agent/cli/repl.py
--- a/hermes_agent/cron/init.py
+++ b/hermes_agent/cron/init.py
@@ -15,7 +15,7 @@ The gateway ticks the scheduler every 60 seconds. A file lock prevents
 duplicate execution if multiple processes overlap.
 """

-from hermes_agent.cron.jobs import (
+from cron.jobs import (
    create_job,
    get_job,
    list_jobs,
@@ -26,7 +26,7 @@ from hermes_agent.cron.jobs import (
    trigger_job,
    JOBS_FILE,
 )
-from hermes_agent.cron.scheduler import tick
+from cron.scheduler import tick

 __all__ = [
    "create_job",
--- a/hermes_agent/cron/jobs.py
+++ b/hermes_agent/cron/jobs.py
@@ -9,18 +9,17 @@ import copy
 import json
 import logging
 import tempfile
-import threading
 import os
 import re
 import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
-from hermes_agent.constants import get_hermes_home
+from hermes_constants import get_hermes_home
 from typing import Optional, Dict, List, Any

 logger = logging.getLogger(__name__)

-from hermes_agent.time import now as _hermes_now
+from hermes_time import now as _hermes_now

 try:
    from croniter import croniter
@@ -35,11 +34,6 @@ except ImportError:
 HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
-
-# In-process lock protecting load_jobs→modify→save_jobs cycles.
-# Required when tick() runs jobs in parallel threads — without this,
-# concurrent mark_job_run / advance_next_run calls can clobber each other.
-_jobs_file_lock = threading.Lock()
 OUTPUT_DIR = CRON_DIR / "output"
 ONESHOT_GRACE_SECONDS = 120

@@ -600,44 +594,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
    ``delivery_error`` is tracked separately from the agent error — a job
    can succeed (agent produced output) but fail delivery (platform down).
    """
-    with _jobs_file_lock:
-        jobs = load_jobs()
-        for i, job in enumerate(jobs):
-            if job["id"] == job_id:
-                now = _hermes_now().isoformat()
-                job["last_run_at"] = now
-                job["last_status"] = "ok" if success else "error"
-                job["last_error"] = error if not success else None
-                # Track delivery failures separately — cleared on successful delivery
-                job["last_delivery_error"] = delivery_error
+    jobs = load_jobs()
+    for i, job in enumerate(jobs):
+        if job["id"] == job_id:
+            now = _hermes_now().isoformat()
+            job["last_run_at"] = now
+            job["last_status"] = "ok" if success else "error"
+            job["last_error"] = error if not success else None
+            # Track delivery failures separately — cleared on successful delivery
+            job["last_delivery_error"] = delivery_error
+            
+            # Increment completed count
+            if job.get("repeat"):
+                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
                
-                # Increment completed count
-                if job.get("repeat"):
-                    job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
-                    
-                    # Check if we've hit the repeat limit
-                    times = job["repeat"].get("times")
-                    completed = job["repeat"]["completed"]
-                    if times is not None and times > 0 and completed >= times:
-                        # Remove the job (limit reached)
-                        jobs.pop(i)
-                        save_jobs(jobs)
-                        return
-                
-                # Compute next run
-                job["next_run_at"] = compute_next_run(job["schedule"], now)
+                # Check if we've hit the repeat limit
+                times = job["repeat"].get("times")
+                completed = job["repeat"]["completed"]
+                if times is not None and times > 0 and completed >= times:
+                    # Remove the job (limit reached)
+                    jobs.pop(i)
+                    save_jobs(jobs)
+                    return
+            
+            # Compute next run
+            job["next_run_at"] = compute_next_run(job["schedule"], now)

-                # If no next run (one-shot completed), disable
-                if job["next_run_at"] is None:
-                    job["enabled"] = False
-                    job["state"] = "completed"
-                elif job.get("state") != "paused":
-                    job["state"] = "scheduled"
+            # If no next run (one-shot completed), disable
+            if job["next_run_at"] is None:
+                job["enabled"] = False
+                job["state"] = "completed"
+            elif job.get("state") != "paused":
+                job["state"] = "scheduled"

-                save_jobs(jobs)
-                return
+            save_jobs(jobs)
+            return

-        logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
+    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)


 def advance_next_run(job_id: str) -> bool:
@@ -652,21 +645,20 @@ def advance_next_run(job_id: str) -> bool:

    Returns True if next_run_at was advanced, False otherwise.
    """
-    with _jobs_file_lock:
-        jobs = load_jobs()
-        for job in jobs:
-            if job["id"] == job_id:
-                kind = job.get("schedule", {}).get("kind")
-                if kind not in ("cron", "interval"):
-                    return False
-                now = _hermes_now().isoformat()
-                new_next = compute_next_run(job["schedule"], now)
-                if new_next and new_next != job.get("next_run_at"):
-                    job["next_run_at"] = new_next
-                    save_jobs(jobs)
-                    return True
+    jobs = load_jobs()
+    for job in jobs:
+        if job["id"] == job_id:
+            kind = job.get("schedule", {}).get("kind")
+            if kind not in ("cron", "interval"):
                return False
-        return False
+            now = _hermes_now().isoformat()
+            new_next = compute_next_run(job["schedule"], now)
+            if new_next and new_next != job.get("next_run_at"):
+                job["next_run_at"] = new_next
+                save_jobs(jobs)
+                return True
+            return False
+    return False


 def get_due_jobs() -> List[Dict[str, Any]]:
--- a/hermes_agent/cron/scheduler.py
+++ b/hermes_agent/cron/scheduler.py
@@ -29,9 +29,14 @@ except ImportError:
 from pathlib import Path
 from typing import List, Optional

-from hermes_agent.constants import get_hermes_home
-from hermes_agent.cli.config import load_config
-from hermes_agent.time import now as _hermes_now
+# Add parent directory to path for imports BEFORE repo-level imports.
+# Without this, standalone invocations (e.g. after `hermes update` reloads
+# the module) fail with ModuleNotFoundError for hermes_time et al.
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from hermes_constants import get_hermes_home
+from hermes_cli.config import load_config
+from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

@@ -71,7 +76,7 @@ _LEGACY_HOME_TARGET_ENV_VARS = {
    "QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL",
 }

-from hermes_agent.cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
+from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run

 # Sentinel: when a cron agent has nothing new to report, it can start its
 # response with this marker to suppress delivery.  Output is still saved
@@ -147,7 +152,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
        platform_name, rest = deliver_value.split(":", 1)
        platform_key = platform_name.lower()

-        from hermes_agent.tools.send_message import _parse_target_ref
+        from tools.send_message_tool import _parse_target_ref

        parsed_chat_id, parsed_thread_id, is_explicit = _parse_target_ref(platform_key, rest)
        if is_explicit:
@@ -157,7 +162,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d

        # Resolve human-friendly labels like "Alice (dm)" to real IDs.
        try:
-            from hermes_agent.gateway.channel_directory import resolve_channel_name
+            from gateway.channel_directory import resolve_channel_name
            resolved = resolve_channel_name(platform_key, chat_id)
            if resolved:
                parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
@@ -247,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)

            future = asyncio.run_coroutine_threadsafe(coro, loop)
-            try:
-                result = future.result(timeout=30)
-            except TimeoutError:
-                future.cancel()
-                raise
+            result = future.result(timeout=30)
            if result and not getattr(result, "success", True):
                logger.warning(
                    "Job '%s': media send failed for %s: %s",
@@ -280,8 +281,8 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
            return msg
        return None  # local-only jobs don't deliver — not a failure

-    from hermes_agent.tools.send_message import _send_to_platform
-    from hermes_agent.gateway.config import load_gateway_config, Platform
+    from tools.send_message_tool import _send_to_platform
+    from gateway.config import load_gateway_config, Platform

    platform_map = {
        "telegram": Platform.TELEGRAM,
@@ -327,7 +328,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        delivery_content = content

    # Extract MEDIA: tags so attachments are forwarded as files, not raw text
-    from hermes_agent.gateway.platforms.base import BasePlatformAdapter
+    from gateway.platforms.base import BasePlatformAdapter
    media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)

    try:
@@ -381,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                        runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                        loop,
                    )
-                    try:
-                        send_result = future.result(timeout=60)
-                    except TimeoutError:
-                        future.cancel()
-                        raise
+                    send_result = future.result(timeout=60)
                    if send_result and not getattr(send_result, "success", True):
                        err = getattr(send_result, "error", "unknown")
                        logger.warning(
@@ -425,6 +422,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                # fresh thread that has no running loop.
                coro.close()
+                import concurrent.futures
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                    future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                    result = future.result(timeout=30)
@@ -434,9 +432,8 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                delivery_errors.append(msg)
                continue

-            error = result.get("error") if result else None
-            if error:
-                msg = f"delivery error: {error}"
+            if result and result.get("error"):
+                msg = f"delivery error: {result['error']}"
                logger.error("Job '%s': %s", job["id"], msg)
                delivery_errors.append(msg)
                continue
@@ -503,7 +500,7 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
        (success, output) — on failure *output* contains the error message so the
        LLM can report the problem to the user.
    """
-    from hermes_agent.constants import get_hermes_home
+    from hermes_constants import get_hermes_home

    scripts_dir = get_hermes_home() / "scripts"
    scripts_dir.mkdir(parents=True, exist_ok=True)
@@ -545,7 +542,7 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:

        # Redact secrets from both stdout and stderr before any return path.
        try:
-            from hermes_agent.agent.redact import redact_sensitive_text
+            from agent.redact import redact_sensitive_text
            stdout = redact_sensitive_text(stdout)
            stderr = redact_sensitive_text(stderr)
        except Exception:
@@ -658,7 +655,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
    if not skill_names:
        return prompt

-    from hermes_agent.tools.skills.tool import skill_view
+    from tools.skills_tool import skill_view

    parts = []
    skipped: list[str] = []
@@ -702,13 +699,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    Returns:
        Tuple of (success, full_output_doc, final_response, error_message)
    """
-    from hermes_agent.agent.loop import AIAgent
+    from run_agent import AIAgent
    
    # Initialize SQLite session store so cron job messages are persisted
    # and discoverable via session_search (same pattern as gateway/run.py).
    _session_db = None
    try:
-        from hermes_agent.state import SessionDB
+        from hermes_state import SessionDB
        _session_db = SessionDB()
    except Exception as e:
        logger.debug("Job '%s': SQLite session store not available: %s", job.get("id", "?"), e)
@@ -750,17 +747,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    # scheduler process — every job this process runs is a cron job.
    os.environ["HERMES_CRON_SESSION"] = "1"

-    # Use ContextVars for per-job session/delivery state so parallel jobs
-    # don't clobber each other's targets (os.environ is process-global).
-    from hermes_agent.gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
-
-    _ctx_tokens = set_session_vars(
-        platform=origin["platform"] if origin else "",
-        chat_id=str(origin["chat_id"]) if origin else "",
-        chat_name=origin.get("chat_name", "") if origin else "",
-    )
-
    try:
+        # Inject origin context so the agent's send_message tool knows the chat.
+        # Must be INSIDE the try block so the finally cleanup always runs.
+        if origin:
+            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
+            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
+            if origin.get("chat_name"):
+                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
@@ -771,10 +765,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        delivery_target = _resolve_delivery_target(job)
        if delivery_target:
-            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
-            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
+            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
+            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
            if delivery_target.get("thread_id") is not None:
-                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
+                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -797,7 +791,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        # Apply IPv4 preference if configured.
        try:
-            from hermes_agent.constants import apply_ipv4_preference
+            from hermes_constants import apply_ipv4_preference
            _net_cfg = _cfg.get("network", {})
            if isinstance(_net_cfg, dict) and _net_cfg.get("force_ipv4"):
                apply_ipv4_preference(force=True)
@@ -805,7 +799,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            pass

        # Reasoning config from config.yaml
-        from hermes_agent.constants import parse_reasoning_effort
+        from hermes_constants import parse_reasoning_effort
        effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
        reasoning_config = parse_reasoning_effort(effort)

@@ -813,13 +807,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        prefill_messages = None
        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
        if prefill_file:
+            import json as _json
            pfpath = Path(prefill_file).expanduser()
            if not pfpath.is_absolute():
                pfpath = _hermes_home / pfpath
            if pfpath.exists():
                try:
                    with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = json.load(_pf)
+                        prefill_messages = _json.load(_pf)
                    if not isinstance(prefill_messages, list):
                        prefill_messages = None
                except Exception as e:
@@ -832,7 +827,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        # Provider routing
        pr = _cfg.get("provider_routing", {})

-        from hermes_agent.cli.runtime_provider import (
+        from hermes_cli.runtime_provider import (
            resolve_runtime_provider,
            format_runtime_provider_error,
        )
@@ -852,7 +847,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        runtime_provider = str(runtime.get("provider") or "").strip().lower()
        if runtime_provider:
            try:
-                from hermes_agent.providers.credential_pool import load_pool
+                from agent.credential_pool import load_pool
                pool = load_pool(runtime_provider)
                if pool.has_credentials():
                    credential_pool = pool
@@ -1017,8 +1012,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
-        # Clean up ContextVar session/delivery state for this job.
-        clear_session_vars(_ctx_tokens)
+        # Clean up injected env vars so they don't leak to other jobs
+        for key in (
+            "HERMES_SESSION_PLATFORM",
+            "HERMES_SESSION_CHAT_ID",
+            "HERMES_SESSION_CHAT_NAME",
+            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
+            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
+            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
+        ):
+            os.environ.pop(key, None)
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -1071,41 +1074,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
        if verbose:
            logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))

-        # Advance next_run_at for all recurring jobs FIRST, under the file lock,
-        # before any execution begins.  This preserves at-most-once semantics.
+        executed = 0
        for job in due_jobs:
-            advance_next_run(job["id"])
-
-        # Resolve max parallel workers: env var > config.yaml > unbounded.
-        # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
-        _max_workers: Optional[int] = None
-        try:
-            _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
-            if _env_par:
-                _max_workers = int(_env_par) or None
-        except (ValueError, TypeError):
-            logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
-        if _max_workers is None:
            try:
-                _ucfg = load_config() or {}
-                _cfg_par = (
-                    _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
-                ).get("max_parallel_jobs")
-                if _cfg_par is not None:
-                    _max_workers = int(_cfg_par) or None
-            except Exception:
-                pass
+                # For recurring jobs (cron/interval), advance next_run_at to the
+                # next future occurrence BEFORE execution.  This way, if the
+                # process crashes mid-run, the job won't re-fire on restart.
+                # One-shot jobs are left alone so they can retry on restart.
+                advance_next_run(job["id"])

-        if verbose:
-            logger.info(
-                "Running %d job(s) in parallel (max_workers=%s)",
-                len(due_jobs),
-                _max_workers if _max_workers else "unbounded",
-            )
-
-        def _process_job(job: dict) -> bool:
-            """Run one due job end-to-end: execute, save, deliver, mark."""
-            try:
                success, output, final_response, error = run_job(job)

                output_file = save_job_output(job["id"], output)
@@ -1137,23 +1114,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"

                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
-                return True
+                executed += 1

            except Exception as e:
                logger.error("Error processing job %s: %s", job['id'], e)
                mark_job_run(job["id"], False, str(e))
-                return False

-        # Run all due jobs concurrently, each in its own ContextVar copy
-        # so session/delivery state stays isolated per-thread.
-        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
-            _futures = []
-            for job in due_jobs:
-                _ctx = contextvars.copy_context()
-                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
-            _results = [f.result() for f in _futures]
-
-        return sum(_results)
+        return executed
    finally:
        if fcntl:
            fcntl.flock(lock_fd, fcntl.LOCK_UN)
--- a/datagen-config-examples/run_browser_tasks.sh
+++ b/datagen-config-examples/run_browser_tasks.sh
@@ -29,7 +29,7 @@ echo "📝 Logging to: $LOG_FILE"
 # Point to the example dataset in this directory
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

-python scripts/batch_runner.py \
+python batch_runner.py \
  --dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
  --batch_size=5 \
  --run_name="browser_tasks_example" \
--- a/datagen-config-examples/web_research.yaml
+++ b/datagen-config-examples/web_research.yaml
@@ -4,7 +4,7 @@
 # Generates tool-calling trajectories for multi-step web research tasks.
 #
 # Usage:
-#   python scripts/batch_runner.py \
+#   python batch_runner.py \
 #     --config datagen-config-examples/web_research.yaml \
 #     --run_name web_research_v1

--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -65,7 +65,7 @@ fi

 # Sync bundled skills (manifest-based so user edits are preserved)
 if [ -d "$INSTALL_DIR/skills" ]; then
-    hermes-skills-sync
+    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

 exec hermes "$@"
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -18,14 +18,11 @@ import logging
 import os
 import uuid
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, Set

-if TYPE_CHECKING:
-    from hermes_agent.tools.budget_config import BudgetConfig
-
-from hermes_agent.tools.dispatch import handle_function_call
-from hermes_agent.tools.terminal import get_active_env
-from hermes_agent.tools.result_storage import maybe_persist_tool_result, enforce_turn_budget
+from model_tools import handle_function_call
+from tools.terminal_tool import get_active_env
+from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget

 # Thread pool for running sync tool calls that internally use asyncio.run()
 # (e.g., the Modal/Docker/Daytona terminal backends). Running them in a separate
@@ -164,7 +161,7 @@ class HermesAgentLoop:
                        thresholds, per-turn aggregate budget, and preview size.
                        If None, uses DEFAULT_BUDGET (current hardcoded values).
        """
-        from hermes_agent.tools.budget_config import DEFAULT_BUDGET
+        from tools.budget_config import DEFAULT_BUDGET
        self.server = server
        self.tool_schemas = tool_schemas
        self.valid_tool_names = valid_tool_names
@@ -190,7 +187,7 @@ class HermesAgentLoop:
        tool_errors: List[ToolError] = []

        # Per-loop TodoStore for the todo tool (ephemeral, dies with the loop)
-        from hermes_agent.tools.todo import TodoStore, todo_tool as _todo_tool
+        from tools.todo_tool import TodoStore, todo_tool as _todo_tool
        _todo_store = TodoStore()

        # Extract user task from first user message for browser_snapshot context
--- a/environments/benchmarks/terminalbench_2/terminalbench2_env.py
+++ b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
@@ -60,7 +60,7 @@ from atroposlib.envs.server_handling.server_manager import APIServerConfig
 from environments.agent_loop import AgentResult, HermesAgentLoop
 from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
 from environments.tool_context import ToolContext
-from hermes_agent.tools.terminal import (
+from tools.terminal_tool import (
    register_task_env_overrides,
    clear_task_env_overrides,
    cleanup_vm,
@@ -876,7 +876,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
            # Let cancellations propagate (finally blocks run cleanup_vm)
            await asyncio.gather(*eval_tasks, return_exceptions=True)
            # Belt-and-suspenders: clean up any remaining sandboxes
-            from hermes_agent.tools.terminal import cleanup_all_environments
+            from tools.terminal_tool import cleanup_all_environments
            cleanup_all_environments()
            print("All sandboxes cleaned up.")
            return
@@ -984,7 +984,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):

        # Kill all remaining sandboxes. Timed-out tasks leave orphaned thread
        # pool workers still executing commands -- cleanup_all stops them.
-        from hermes_agent.tools.terminal import cleanup_all_environments
+        from tools.terminal_tool import cleanup_all_environments
        print("\nCleaning up all sandboxes...")
        cleanup_all_environments()

--- a/environments/benchmarks/yc_bench/yc_bench_env.py
+++ b/environments/benchmarks/yc_bench/yc_bench_env.py
@@ -709,7 +709,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
            tqdm.write("\n[INTERRUPTED] Stopping evaluation...")
            pbar.close()
            try:
-                from hermes_agent.tools.terminal import cleanup_all_environments
+                from tools.terminal_tool import cleanup_all_environments
                cleanup_all_environments()
            except Exception:
                pass
@@ -819,7 +819,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
            print(f"Results saved to: {self._streaming_path}")

        try:
-            from hermes_agent.tools.terminal import cleanup_all_environments
+            from tools.terminal_tool import cleanup_all_environments
            cleanup_all_environments()
        except Exception:
            pass
--- a/environments/hermes_base_env.py
+++ b/environments/hermes_base_env.py
@@ -62,15 +62,15 @@ from atroposlib.type_definitions import Item

 from environments.agent_loop import AgentResult, HermesAgentLoop
 from environments.tool_context import ToolContext
-from hermes_agent.tools.budget_config import (
+from tools.budget_config import (
    DEFAULT_RESULT_SIZE_CHARS,
    DEFAULT_TURN_BUDGET_CHARS,
    DEFAULT_PREVIEW_SIZE_CHARS,
 )

 # Import hermes-agent toolset infrastructure
-from hermes_agent.tools.dispatch import get_tool_definitions
-from hermes_agent.tools.distributions import sample_toolsets_from_distribution
+from model_tools import get_tool_definitions
+from toolset_distributions import sample_toolsets_from_distribution

 logger = logging.getLogger(__name__)

@@ -209,7 +209,7 @@ class HermesAgentEnvConfig(BaseEnvConfig):

    def build_budget_config(self):
        """Build a BudgetConfig from env config fields."""
-        from hermes_agent.tools.budget_config import BudgetConfig
+        from tools.budget_config import BudgetConfig
        return BudgetConfig(
            default_result_size=self.default_result_size_chars,
            turn_budget=self.turn_budget_chars,
--- a/environments/tool_context.py
+++ b/environments/tool_context.py
@@ -31,9 +31,9 @@ from typing import Any, Dict, List, Optional
 import asyncio
 import concurrent.futures

-from hermes_agent.tools.dispatch import handle_function_call
-from hermes_agent.tools.terminal import cleanup_vm
-from hermes_agent.tools.browser.tool import cleanup_browser
+from model_tools import handle_function_call
+from tools.terminal_tool import cleanup_vm
+from tools.browser_tool import cleanup_browser

 logger = logging.getLogger(__name__)

@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
    try:
        loop = asyncio.get_running_loop()
        # We're in an async context -- need to run in thread
+        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
            future = pool.submit(
                handle_function_call, tool_name, arguments, task_id
@@ -446,7 +447,7 @@ class ToolContext:
        """
        # Kill any background processes from this rollout (safety net)
        try:
-            from hermes_agent.tools.process_registry import process_registry
+            from tools.process_registry import process_registry
            killed = process_registry.kill_all(task_id=self.task_id)
            if killed:
                logger.debug("Process cleanup for task %s: killed %d process(es)", self.task_id, killed)
--- a/hermes_agent/gateway/init.py
+++ b/hermes_agent/gateway/init.py
--- a/hermes_agent/gateway/builtin_hooks/init.py
+++ b/hermes_agent/gateway/builtin_hooks/init.py
--- a/hermes_agent/gateway/builtin_hooks/boot_md.py
+++ b/hermes_agent/gateway/builtin_hooks/boot_md.py
@@ -20,9 +20,9 @@ suppress delivery.
 import logging
 import threading

-logger = logging.getLogger(__name__)
+logger = logging.getLogger("hooks.boot-md")

-from hermes_agent.constants import get_hermes_home
+from hermes_constants import get_hermes_home
 HERMES_HOME = get_hermes_home()
 BOOT_FILE = HERMES_HOME / "BOOT.md"

@@ -45,7 +45,7 @@ def _build_boot_prompt(content: str) -> str:
 def _run_boot_agent(content: str) -> None:
    """Spawn a one-shot agent session to execute the boot instructions."""
    try:
-        from hermes_agent.agent.loop import AIAgent
+        from run_agent import AIAgent

        prompt = _build_boot_prompt(content)
        agent = AIAgent(
--- a/hermes_agent/gateway/channel_directory.py
+++ b/hermes_agent/gateway/channel_directory.py
@@ -11,8 +11,8 @@ import logging
 from datetime import datetime
 from typing import Any, Dict, List, Optional

-from hermes_agent.cli.config import get_hermes_home
-from hermes_agent.utils import atomic_json_write
+from hermes_cli.config import get_hermes_home
+from utils import atomic_json_write

 logger = logging.getLogger(__name__)

@@ -63,7 +63,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:

    Returns the directory dict and writes it to DIRECTORY_PATH.
    """
-    from hermes_agent.gateway.config import Platform
+    from gateway.config import Platform

    platforms: Dict[str, List[Dict[str, str]]] = {}

@@ -144,7 +144,7 @@ def _build_slack(adapter) -> List[Dict[str, str]]:
        return _build_from_sessions("slack")

    try:
-        from hermes_agent.tools.send_message import _send_slack  # noqa: F401
+        from tools.send_message_tool import _send_slack  # noqa: F401
        # Use the Slack Web API directly if available
    except Exception:
        pass
--- a/hermes_agent/gateway/config.py
+++ b/hermes_agent/gateway/config.py
@@ -16,8 +16,8 @@ from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Any
 from enum import Enum

-from hermes_agent.cli.config import get_hermes_home
-from hermes_agent.utils import is_truthy_value
+from hermes_cli.config import get_hermes_home
+from utils import is_truthy_value

 logger = logging.getLogger(__name__)

@@ -576,14 +576,6 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                if "mention_patterns" in platform_cfg:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
-                if "dm_policy" in platform_cfg:
-                    bridged["dm_policy"] = platform_cfg["dm_policy"]
-                if "allow_from" in platform_cfg:
-                    bridged["allow_from"] = platform_cfg["allow_from"]
-                if "group_policy" in platform_cfg:
-                    bridged["group_policy"] = platform_cfg["group_policy"]
-                if "group_allow_from" in platform_cfg:
-                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
@@ -670,7 +662,8 @@ def load_gateway_config() -> GatewayConfig:
                if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                    os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
+                    import json as _json
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
                frc = telegram_cfg.get("free_response_chats")
                if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                    if isinstance(frc, list):
@@ -707,20 +700,6 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
-                if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
-                    os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
-                af = whatsapp_cfg.get("allow_from")
-                if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
-                    if isinstance(af, list):
-                        af = ",".join(str(v) for v in af)
-                    os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
-                if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
-                    os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
-                gaf = whatsapp_cfg.get("group_allow_from")
-                if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
-                    if isinstance(gaf, list):
-                        gaf = ",".join(str(v) for v in gaf)
-                    os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)

            # DingTalk settings → env vars (env vars take precedence)
            dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -821,7 +800,7 @@ def _validate_gateway_config(config: "GatewayConfig") -> None:
    # without changing placeholder values get a clear startup error instead
    # of a confusing "auth failed" from the platform API.
    try:
-        from hermes_agent.cli.auth.auth import has_usable_secret
+        from hermes_cli.auth import has_usable_secret
    except ImportError:
        has_usable_secret = None  # type: ignore[assignment]

@@ -1258,6 +1237,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            if legacy_home:
                qq_home = legacy_home
                qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
+                import logging
                logging.getLogger(__name__).warning(
                    "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                    "in your .env for consistency with the platform key."
--- a/hermes_agent/gateway/delivery.py
+++ b/hermes_agent/gateway/delivery.py
@@ -14,7 +14,7 @@ from datetime import datetime
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Any

-from hermes_agent.cli.config import get_hermes_home
+from hermes_cli.config import get_hermes_home

 logger = logging.getLogger(__name__)

--- a/hermes_agent/gateway/display_config.py
+++ b/hermes_agent/gateway/display_config.py
--- a/hermes_agent/gateway/hooks.py
+++ b/hermes_agent/gateway/hooks.py
@@ -25,7 +25,7 @@ from typing import Any, Callable, Dict, List, Optional

 import yaml

-from hermes_agent.cli.config import get_hermes_home
+from hermes_cli.config import get_hermes_home


 HOOKS_DIR = get_hermes_home() / "hooks"
@@ -54,7 +54,7 @@ class HookRegistry:
    def _register_builtin_hooks(self) -> None:
        """Register built-in hooks that are always active."""
        try:
-            from hermes_agent.gateway.builtin_hooks.boot_md import handle as boot_md_handle
+            from gateway.builtin_hooks.boot_md import handle as boot_md_handle

            self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
            self._loaded_hooks.append({
--- a/hermes_agent/gateway/mirror.py
+++ b/hermes_agent/gateway/mirror.py
@@ -14,7 +14,7 @@ import logging
 from datetime import datetime
 from typing import Optional

-from hermes_agent.cli.config import get_hermes_home
+from hermes_cli.config import get_hermes_home

 logger = logging.getLogger(__name__)

@@ -118,7 +118,7 @@ def _append_to_sqlite(session_id: str, message: dict) -> None:
    """Append a message to the SQLite session database."""
    db = None
    try:
-        from hermes_agent.state import SessionDB
+        from hermes_state import SessionDB
        db = SessionDB()
        db.append_message(
            session_id=session_id,
--- a/hermes_agent/gateway/pairing.py
+++ b/hermes_agent/gateway/pairing.py
@@ -27,7 +27,7 @@ import time
 from pathlib import Path
 from typing import Optional

-from hermes_agent.constants import get_hermes_dir
+from hermes_constants import get_hermes_dir


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
--- a/hermes_agent/gateway/platforms/ADDING_A_PLATFORM.md
+++ b/hermes_agent/gateway/platforms/ADDING_A_PLATFORM.md
--- a/hermes_agent/gateway/platforms/init.py
+++ b/hermes_agent/gateway/platforms/init.py
--- a/hermes_agent/gateway/platforms/api_server.py
+++ b/hermes_agent/gateway/platforms/api_server.py
@@ -32,16 +32,16 @@ import sqlite3
 import time
 import uuid
 from typing import Any, Dict, List, Optional
+
 try:
    from aiohttp import web
-
    AIOHTTP_AVAILABLE = True
 except ImportError:
    AIOHTTP_AVAILABLE = False
    web = None  # type: ignore[assignment]

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    SendResult,
    is_network_accessible,
@@ -59,11 +59,6 @@ MAX_NORMALIZED_TEXT_LENGTH = 65_536  # 64 KB cap for normalized content parts
 MAX_CONTENT_LIST_SIZE = 1_000  # Max items when content is an array


-def check_api_server_requirements() -> bool:
-    """Check if API server adapter dependencies are available."""
-    return AIOHTTP_AVAILABLE
-
-
 def _normalize_chat_content(
    content: Any, *, _max_depth: int = 10, _depth: int = 0,
 ) -> str:
@@ -122,159 +117,11 @@ def _normalize_chat_content(
        return ""


-# Content part type aliases used by the OpenAI Chat Completions and Responses
-# APIs.  We accept both spellings on input and emit a single canonical internal
-# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
-# rest of the agent pipeline already understands.
-_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
-_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
-_FILE_PART_TYPES = frozenset({"file", "input_file"})
+def check_api_server_requirements() -> bool:
+    """Check if API server dependencies are available."""
+    return AIOHTTP_AVAILABLE


-def _normalize_multimodal_content(content: Any) -> Any:
-    """Validate and normalize multimodal content for the API server.
-
-    Returns a plain string when the content is text-only, or a list of
-    ``{"type": "text"|"image_url", ...}`` parts when images are present.
-    The output shape is the native OpenAI Chat Completions vision format,
-    which the agent pipeline accepts verbatim (OpenAI-wire providers) or
-    converts (``_preprocess_anthropic_content`` for Anthropic).
-
-    Raises ``ValueError`` with an OpenAI-style code on invalid input:
-      * ``unsupported_content_type`` — file/input_file/file_id parts, or
-        non-image ``data:`` URLs.
-      * ``invalid_image_url`` — missing URL or unsupported scheme.
-      * ``invalid_content_part`` — malformed text/image objects.
-
-    Callers translate the ValueError into a 400 response.
-    """
-    # Scalar passthrough mirrors ``_normalize_chat_content``.
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
-    if not isinstance(content, list):
-        # Mirror the legacy text-normalizer's fallback so callers that
-        # pre-existed image support still get a string back.
-        return _normalize_chat_content(content)
-
-    items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
-    normalized_parts: List[Dict[str, Any]] = []
-    text_accum_len = 0
-
-    for part in items:
-        if isinstance(part, str):
-            if part:
-                trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
-                normalized_parts.append({"type": "text", "text": trimmed})
-                text_accum_len += len(trimmed)
-            continue
-
-        if not isinstance(part, dict):
-            # Ignore unknown scalars for forward compatibility with future
-            # Responses API additions (e.g. ``refusal``).  The same policy
-            # the text normalizer applies.
-            continue
-
-        raw_type = part.get("type")
-        part_type = str(raw_type or "").strip().lower()
-
-        if part_type in _TEXT_PART_TYPES:
-            text = part.get("text")
-            if text is None:
-                continue
-            if not isinstance(text, str):
-                text = str(text)
-            if text:
-                trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
-                normalized_parts.append({"type": "text", "text": trimmed})
-                text_accum_len += len(trimmed)
-            continue
-
-        if part_type in _IMAGE_PART_TYPES:
-            detail = part.get("detail")
-            image_ref = part.get("image_url")
-            # OpenAI Responses sends ``input_image`` with a top-level
-            # ``image_url`` string; Chat Completions sends ``image_url`` as
-            # ``{"url": "...", "detail": "..."}``.  Support both.
-            if isinstance(image_ref, dict):
-                url_value = image_ref.get("url")
-                detail = image_ref.get("detail", detail)
-            else:
-                url_value = image_ref
-            if not isinstance(url_value, str) or not url_value.strip():
-                raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
-            url_value = url_value.strip()
-            lowered = url_value.lower()
-            if lowered.startswith("data:"):
-                if not lowered.startswith("data:image/") or "," not in url_value:
-                    raise ValueError(
-                        "unsupported_content_type:Only image data URLs are supported. "
-                        "Non-image data payloads are not supported."
-                    )
-            elif not (lowered.startswith("http://") or lowered.startswith("https://")):
-                raise ValueError(
-                    "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
-                )
-            image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
-            if detail is not None:
-                if not isinstance(detail, str) or not detail.strip():
-                    raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
-                image_part["image_url"]["detail"] = detail.strip()
-            normalized_parts.append(image_part)
-            continue
-
-        if part_type in _FILE_PART_TYPES:
-            raise ValueError(
-                "unsupported_content_type:Inline image inputs are supported, "
-                "but uploaded files and document inputs are not supported on this endpoint."
-            )
-
-        # Unknown part type — reject explicitly so clients get a clear error
-        # instead of a silently dropped turn.
-        raise ValueError(
-            f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
-            "Only text and image_url/input_image parts are supported."
-        )
-
-    if not normalized_parts:
-        return ""
-
-    # Text-only: collapse to a plain string so downstream logging/trajectory
-    # code sees the native shape and prompt caching on text-only turns is
-    # unaffected.
-    if all(p.get("type") == "text" for p in normalized_parts):
-        return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
-
-    return normalized_parts
-
-
-def _content_has_visible_payload(content: Any) -> bool:
-    """True when content has any text or image attachment.  Used to reject empty turns."""
-    if isinstance(content, str):
-        return bool(content.strip())
-    if isinstance(content, list):
-        for part in content:
-            if isinstance(part, dict):
-                ptype = str(part.get("type") or "").strip().lower()
-                if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
-                    return True
-                if ptype in _IMAGE_PART_TYPES:
-                    return True
-    return False
-
-
-def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
-    """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
-    raw = str(exc)
-    code, _, message = raw.partition(":")
-    if not message:
-        code, message = "invalid_content_part", raw
-    return web.json_response(
-        _openai_error(message, code=code, param=param),
-        status=400,
-    )
-
 class ResponseStore:
    """
    SQLite-backed LRU store for Responses API state.
@@ -291,7 +138,7 @@ class ResponseStore:
        self._max_size = max_size
        if db_path is None:
            try:
-                from hermes_agent.cli.config import get_hermes_home
+                from hermes_cli.config import get_hermes_home
                db_path = str(get_hermes_home() / "response_store.db")
            except Exception:
                db_path = ":memory:"
@@ -322,6 +169,7 @@ class ResponseStore:
        ).fetchone()
        if row is None:
            return None
+        import time
        self._conn.execute(
            "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
            (time.time(), response_id),
@@ -331,6 +179,7 @@ class ResponseStore:

    def put(self, response_id: str, data: Dict[str, Any]) -> None:
        """Store a response, evicting the oldest if at capacity."""
+        import time
        self._conn.execute(
            "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
            (response_id, json.dumps(data, default=str), time.time()),
@@ -390,26 +239,30 @@ _CORS_HEADERS = {
 }


-@web.middleware
-async def cors_middleware(request, handler):
-    """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
-    adapter = request.app.get("api_server_adapter")
-    origin = request.headers.get("Origin", "")
-    cors_headers = None
-    if adapter is not None:
-        if not adapter._origin_allowed(origin):
-            return web.Response(status=403)
-        cors_headers = adapter._cors_headers_for_origin(origin)
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def cors_middleware(request, handler):
+        """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
+        adapter = request.app.get("api_server_adapter")
+        origin = request.headers.get("Origin", "")
+        cors_headers = None
+        if adapter is not None:
+            if not adapter._origin_allowed(origin):
+                return web.Response(status=403)
+            cors_headers = adapter._cors_headers_for_origin(origin)

-    if request.method == "OPTIONS":
-        if cors_headers is None:
-            return web.Response(status=403)
-        return web.Response(status=200, headers=cors_headers)
+        if request.method == "OPTIONS":
+            if cors_headers is None:
+                return web.Response(status=403)
+            return web.Response(status=200, headers=cors_headers)
+
+        response = await handler(request)
+        if cors_headers is not None:
+            response.headers.update(cors_headers)
+        return response
+else:
+    cors_middleware = None  # type: ignore[assignment]

-    response = await handler(request)
-    if cors_headers is not None:
-        response.headers.update(cors_headers)
-    return response

 def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
    """OpenAI-style error envelope."""
@@ -423,18 +276,21 @@ def _openai_error(message: str, err_type: str = "invalid_request_error", param:
    }


-@web.middleware
-async def body_limit_middleware(request, handler):
-    """Reject overly large request bodies early based on Content-Length."""
-    if request.method in ("POST", "PUT", "PATCH"):
-        cl = request.headers.get("Content-Length")
-        if cl is not None:
-            try:
-                if int(cl) > MAX_REQUEST_BYTES:
-                    return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
-            except ValueError:
-                return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
-    return await handler(request)
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def body_limit_middleware(request, handler):
+        """Reject overly large request bodies early based on Content-Length."""
+        if request.method in ("POST", "PUT", "PATCH"):
+            cl = request.headers.get("Content-Length")
+            if cl is not None:
+                try:
+                    if int(cl) > MAX_REQUEST_BYTES:
+                        return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
+                except ValueError:
+                    return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
+        return await handler(request)
+else:
+    body_limit_middleware = None  # type: ignore[assignment]

 _SECURITY_HEADERS = {
    "X-Content-Type-Options": "nosniff",
@@ -442,13 +298,16 @@ _SECURITY_HEADERS = {
 }


-@web.middleware
-async def security_headers_middleware(request, handler):
-    """Add security headers to all responses (including errors)."""
-    response = await handler(request)
-    for k, v in _SECURITY_HEADERS.items():
-        response.headers.setdefault(k, v)
-    return response
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def security_headers_middleware(request, handler):
+        """Add security headers to all responses (including errors)."""
+        response = await handler(request)
+        for k, v in _SECURITY_HEADERS.items():
+            response.headers.setdefault(k, v)
+        return response
+else:
+    security_headers_middleware = None  # type: ignore[assignment]


 class _IdempotencyCache:
@@ -456,12 +315,12 @@ class _IdempotencyCache:
    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
        from collections import OrderedDict
        self._store = OrderedDict()
-        self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
        self._ttl = ttl_seconds
        self._max = max_items

    def _purge(self):
-        now = time.time()
+        import time as _t
+        now = _t.time()
        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
        for k in expired:
            self._store.pop(k, None)
@@ -473,27 +332,11 @@ class _IdempotencyCache:
        item = self._store.get(key)
        if item and item["fp"] == fingerprint:
            return item["resp"]
-
-        inflight_key = (key, fingerprint)
-        task = self._inflight.get(inflight_key)
-        if task is None:
-            async def _compute_and_store():
-                resp = await compute_coro()
-                import time as _t
-                self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-                self._purge()
-                return resp
-
-            task = asyncio.create_task(_compute_and_store())
-            self._inflight[inflight_key] = task
-
-            def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
-                if self._inflight.get(inflight_key) is done_task:
-                    self._inflight.pop(inflight_key, None)
-
-            task.add_done_callback(_clear_inflight)
-
-        return await asyncio.shield(task)
+        resp = await compute_coro()
+        import time as _t
+        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+        self._purge()
+        return resp


 _idem_cache = _IdempotencyCache()
@@ -523,30 +366,6 @@ def _derive_chat_session_id(
    return f"api-{digest}"


-_CRON_AVAILABLE = False
-try:
-    from hermes_agent.cron.jobs import (
-        list_jobs as _cron_list,
-        get_job as _cron_get,
-        create_job as _cron_create,
-        update_job as _cron_update,
-        remove_job as _cron_remove,
-        pause_job as _cron_pause,
-        resume_job as _cron_resume,
-        trigger_job as _cron_trigger,
-    )
-    _CRON_AVAILABLE = True
-except ImportError:
-    _cron_list = None
-    _cron_get = None
-    _cron_create = None
-    _cron_update = None
-    _cron_remove = None
-    _cron_pause = None
-    _cron_resume = None
-    _cron_trigger = None
-
-
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -604,7 +423,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if explicit and explicit.strip():
            return explicit.strip()
        try:
-            from hermes_agent.cli.profiles import get_active_profile_name
+            from hermes_cli.profiles import get_active_profile_name
            profile = get_active_profile_name()
            if profile and profile not in ("default", "custom"):
                return profile
@@ -680,7 +499,7 @@ class APIServerAdapter(BasePlatformAdapter):
        """
        if self._session_db is None:
            try:
-                from hermes_agent.state import SessionDB
+                from hermes_state import SessionDB
                self._session_db = SessionDB()
            except Exception as e:
                logger.debug("SessionDB unavailable for API server: %s", e)
@@ -707,9 +526,9 @@ class APIServerAdapter(BasePlatformAdapter):
        from config.yaml platform_toolsets.api_server (same as all other
        gateway platforms), falling back to the hermes-api-server default.
        """
-        from hermes_agent.agent.loop import AIAgent
-        from hermes_agent.gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config
-        from hermes_agent.cli.tools_config import _get_platform_tools
+        from run_agent import AIAgent
+        from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config
+        from hermes_cli.tools_config import _get_platform_tools

        runtime_kwargs = _resolve_runtime_agent_kwargs()
        model = _resolve_gateway_model()
@@ -721,7 +540,7 @@ class APIServerAdapter(BasePlatformAdapter):

        # Load fallback provider chain so the API server platform has the
        # same fallback behaviour as Telegram/Discord/Slack (fixes #4954).
-        from hermes_agent.gateway.run import GatewayRunner
+        from gateway.run import GatewayRunner
        fallback_model = GatewayRunner._load_fallback_model()

        agent = AIAgent(
@@ -758,7 +577,7 @@ class APIServerAdapter(BasePlatformAdapter):
        dashboard can display full status without needing a shared PID file or
        /proc access.  No authentication required.
        """
-        from hermes_agent.gateway.status import read_runtime_status
+        from gateway.status import read_runtime_status

        runtime = read_runtime_status() or {}
        return web.json_response({
@@ -793,7 +612,7 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

-    async def _handle_chat_completions(self, request: "web.Request") -> "web.StreamResponse":
+    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -818,32 +637,26 @@ class APIServerAdapter(BasePlatformAdapter):
        system_prompt = None
        conversation_messages: List[Dict[str, str]] = []

-        for idx, msg in enumerate(messages):
+        for msg in messages:
            role = msg.get("role", "")
-            raw_content = msg.get("content", "")
+            content = _normalize_chat_content(msg.get("content", ""))
            if role == "system":
-                # System messages don't support images (Anthropic rejects, OpenAI
-                # text-model systems don't render them).  Flatten to text.
-                content = _normalize_chat_content(raw_content)
+                # Accumulate system messages
                if system_prompt is None:
                    system_prompt = content
                else:
                    system_prompt = system_prompt + "\n" + content
            elif role in ("user", "assistant"):
-                try:
-                    content = _normalize_multimodal_content(raw_content)
-                except ValueError as exc:
-                    return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
                conversation_messages.append({"role": role, "content": content})

        # Extract the last user message as the primary input
-        user_message: Any = ""
+        user_message = ""
        history = []
        if conversation_messages:
            user_message = conversation_messages[-1].get("content", "")
            history = conversation_messages[:-1]

-        if not _content_has_visible_payload(user_message):
+        if not user_message:
            return web.json_response(
                {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
                status=400,
@@ -939,7 +752,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    return
                if name.startswith("_"):
                    return
-                from hermes_agent.agent.display import get_tool_emoji
+                from agent.display import get_tool_emoji
                emoji = get_tool_emoji(name)
                label = preview or name
                _stream_q.put(("__tool_progress__", {
@@ -1577,7 +1390,7 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

-    async def _handle_responses(self, request: "web.Request") -> "web.StreamResponse":
+    async def _handle_responses(self, request: "web.Request") -> "web.Response":
        """POST /v1/responses — OpenAI Responses API format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -1611,19 +1424,16 @@ class APIServerAdapter(BasePlatformAdapter):
            # No error if conversation doesn't exist yet — it's a new conversation

        # Normalize input to message list
-        input_messages: List[Dict[str, Any]] = []
+        input_messages: List[Dict[str, str]] = []
        if isinstance(raw_input, str):
            input_messages = [{"role": "user", "content": raw_input}]
        elif isinstance(raw_input, list):
-            for idx, item in enumerate(raw_input):
+            for item in raw_input:
                if isinstance(item, str):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    try:
-                        content = _normalize_multimodal_content(item.get("content", ""))
-                    except ValueError as exc:
-                        return _multimodal_validation_error(exc, param=f"input[{idx}].content")
+                    content = _normalize_chat_content(item.get("content", ""))
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1632,7 +1442,7 @@ class APIServerAdapter(BasePlatformAdapter):
        # This lets stateless clients supply their own history instead of
        # relying on server-side response chaining via previous_response_id.
        # Precedence: explicit conversation_history > previous_response_id.
-        conversation_history: List[Dict[str, Any]] = []
+        conversation_history: List[Dict[str, str]] = []
        raw_history = body.get("conversation_history")
        if raw_history:
            if not isinstance(raw_history, list):
@@ -1646,11 +1456,7 @@ class APIServerAdapter(BasePlatformAdapter):
                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
                        status=400,
                    )
-                try:
-                    entry_content = _normalize_multimodal_content(entry["content"])
-                except ValueError as exc:
-                    return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
-                conversation_history.append({"role": str(entry["role"]), "content": entry_content})
+                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

@@ -1670,8 +1476,8 @@ class APIServerAdapter(BasePlatformAdapter):
            conversation_history.append(msg)

        # Last input message is the user_message
-        user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
-        if not _content_has_visible_payload(user_message):
+        user_message = input_messages[-1].get("content", "") if input_messages else ""
+        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

        # Truncation support
@@ -1876,16 +1682,44 @@ class APIServerAdapter(BasePlatformAdapter):
    # Cron jobs API
    # ------------------------------------------------------------------

+    # Check cron module availability once (not per-request)
+    _CRON_AVAILABLE = False
+    try:
+        from cron.jobs import (
+            list_jobs as _cron_list,
+            get_job as _cron_get,
+            create_job as _cron_create,
+            update_job as _cron_update,
+            remove_job as _cron_remove,
+            pause_job as _cron_pause,
+            resume_job as _cron_resume,
+            trigger_job as _cron_trigger,
+        )
+        # Wrap as staticmethod to prevent descriptor binding — these are plain
+        # module functions, not instance methods.  Without this, self._cron_*()
+        # injects ``self`` as the first positional argument and every call
+        # raises TypeError.
+        _cron_list = staticmethod(_cron_list)
+        _cron_get = staticmethod(_cron_get)
+        _cron_create = staticmethod(_cron_create)
+        _cron_update = staticmethod(_cron_update)
+        _cron_remove = staticmethod(_cron_remove)
+        _cron_pause = staticmethod(_cron_pause)
+        _cron_resume = staticmethod(_cron_resume)
+        _cron_trigger = staticmethod(_cron_trigger)
+        _CRON_AVAILABLE = True
+    except ImportError:
+        pass
+
    _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
    # Allowed fields for update — prevents clients injecting arbitrary keys
    _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
    _MAX_NAME_LENGTH = 200
    _MAX_PROMPT_LENGTH = 5000

-    @staticmethod
-    def _check_jobs_available() -> Optional["web.Response"]:
+    def _check_jobs_available(self) -> Optional["web.Response"]:
        """Return error response if cron module isn't available."""
-        if not _CRON_AVAILABLE:
+        if not self._CRON_AVAILABLE:
            return web.json_response(
                {"error": "Cron module not available"}, status=501,
            )
@@ -1910,7 +1744,7 @@ class APIServerAdapter(BasePlatformAdapter):
            return cron_err
        try:
            include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = _cron_list(include_disabled=include_disabled)
+            jobs = self._cron_list(include_disabled=include_disabled)
            return web.json_response({"jobs": jobs})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1958,7 +1792,7 @@ class APIServerAdapter(BasePlatformAdapter):
            if repeat is not None:
                kwargs["repeat"] = repeat

-            job = _cron_create(**kwargs)
+            job = self._cron_create(**kwargs)
            return web.json_response({"job": job})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1975,7 +1809,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_get(job_id)
+            job = self._cron_get(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2008,7 +1842,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return web.json_response(
                    {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                )
-            job = _cron_update(job_id, sanitized)
+            job = self._cron_update(job_id, sanitized)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2027,7 +1861,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            success = _cron_remove(job_id)
+            success = self._cron_remove(job_id)
            if not success:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"ok": True})
@@ -2046,7 +1880,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_pause(job_id)
+            job = self._cron_pause(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2065,7 +1899,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_resume(job_id)
+            job = self._cron_resume(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2084,7 +1918,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_trigger(job_id)
+            job = self._cron_trigger(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2471,6 +2305,10 @@ class APIServerAdapter(BasePlatformAdapter):

    async def connect(self) -> bool:
        """Start the aiohttp web server."""
+        if not AIOHTTP_AVAILABLE:
+            logger.warning("[%s] aiohttp not installed", self.name)
+            return False
+
        try:
            mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
            self._app = web.Application(middlewares=mws)
@@ -2517,7 +2355,7 @@ class APIServerAdapter(BasePlatformAdapter):
            # Ported from openclaw/openclaw#64586.
            if is_network_accessible(self._host) and self._api_key:
                try:
-                    from hermes_agent.cli.auth.auth import has_usable_secret
+                    from hermes_cli.auth import has_usable_secret
                    if not has_usable_secret(self._api_key, min_length=8):
                        logger.error(
                            "[%s] Refusing to start: API_SERVER_KEY is set to a "
--- a/hermes_agent/gateway/platforms/base.py
+++ b/hermes_agent/gateway/platforms/base.py
@@ -19,8 +19,6 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit

-from hermes_agent.utils import normalize_proxy_url
-
 logger = logging.getLogger(__name__)


@@ -161,13 +159,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
-            return normalize_proxy_url(value)
+            return value
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
-            return normalize_proxy_url(value)
-    return normalize_proxy_url(_detect_macos_system_proxy())
+            return value
+    return _detect_macos_system_proxy()


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -187,14 +185,16 @@ def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-        except ImportError:
-            raise ImportError(
-                "aiohttp-socks is required for SOCKS proxy support. "
-                "Install with: pip install hermes-agent[messaging]"
-            ) from None

-        connector = ProxyConnector.from_url(proxy_url, rdns=True)
-        return {"connector": connector}
+            connector = ProxyConnector.from_url(proxy_url, rdns=True)
+            return {"connector": connector}
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return {}
    return {"proxy": proxy_url}


@@ -218,14 +218,16 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-        except ImportError:
-            raise ImportError(
-                "aiohttp-socks is required for SOCKS proxy support. "
-                "Install with: pip install hermes-agent[messaging]"
-            ) from None

-        connector = ProxyConnector.from_url(proxy_url, rdns=True)
-        return {"connector": connector}, {}
+            connector = ProxyConnector.from_url(proxy_url, rdns=True)
+            return {"connector": connector}, {}
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return {}, {}
    return {}, {"proxy": proxy_url}


@@ -235,9 +237,12 @@ from pathlib import Path
 from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
 from enum import Enum

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.session import SessionSource, build_session_key
-from hermes_agent.constants import get_hermes_dir
+from pathlib import Path as _Path
+sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
+
+from gateway.config import Platform, PlatformConfig
+from gateway.session import SessionSource, build_session_key
+from hermes_constants import get_hermes_dir


 GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
@@ -293,7 +298,7 @@ async def _ssrf_redirect_guard(response):
    """
    if response.is_redirect and response.next_request:
        redirect_url = str(response.next_request.url)
-        from hermes_agent.tools.security.urls import is_safe_url
+        from tools.url_safety import is_safe_url
        if not is_safe_url(redirect_url):
            raise ValueError(
                f"Blocked redirect to private/internal address: {safe_url_for_log(redirect_url)}"
@@ -382,13 +387,16 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    Raises:
        ValueError: If the URL targets a private/internal network (SSRF protection).
    """
-    from hermes_agent.tools.security.urls import is_safe_url
+    from tools.url_safety import is_safe_url
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

+    import asyncio
    import httpx
-    _log = logging.getLogger(__name__)
+    import logging as _logging
+    _log = _logging.getLogger(__name__)

+    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -406,6 +414,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                response.raise_for_status()
                return cache_image_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -421,7 +430,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise AssertionError("unreachable: retry loop exhausted")
+    raise last_exc


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -497,13 +506,16 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    Raises:
        ValueError: If the URL targets a private/internal network (SSRF protection).
    """
-    from hermes_agent.tools.security.urls import is_safe_url
+    from tools.url_safety import is_safe_url
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

+    import asyncio
    import httpx
-    _log = logging.getLogger(__name__)
+    import logging as _logging
+    _log = _logging.getLogger(__name__)

+    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -521,6 +533,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                response.raise_for_status()
                return cache_audio_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -536,40 +549,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise AssertionError("unreachable: retry loop exhausted")
-
-
-# ---------------------------------------------------------------------------
-# Video cache utilities
-#
-# Same pattern as image/audio cache -- videos from platforms are downloaded
-# here so the agent can reference them by local file path.
-# ---------------------------------------------------------------------------
-
-VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
-
-SUPPORTED_VIDEO_TYPES = {
-    ".mp4": "video/mp4",
-    ".mov": "video/quicktime",
-    ".webm": "video/webm",
-    ".mkv": "video/x-matroska",
-    ".avi": "video/x-msvideo",
-}
-
-
-def get_video_cache_dir() -> Path:
-    """Return the video cache directory, creating it if it doesn't exist."""
-    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    return VIDEO_CACHE_DIR
-
-
-def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
-    """Save raw video bytes to the cache and return the absolute file path."""
-    cache_dir = get_video_cache_dir()
-    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
-    filepath = cache_dir / filename
-    filepath.write_bytes(data)
-    return str(filepath)
+    raise last_exc


 # ---------------------------------------------------------------------------
@@ -939,7 +919,7 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_message = None
        self._fatal_error_retryable = True
        try:
-            from hermes_agent.gateway.status import write_runtime_status
+            from gateway.status import write_runtime_status
            write_runtime_status(platform=self.platform.value, platform_state="connected", error_code=None, error_message=None)
        except Exception:
            pass
@@ -949,7 +929,7 @@ class BasePlatformAdapter(ABC):
        if self.has_fatal_error:
            return
        try:
-            from hermes_agent.gateway.status import write_runtime_status
+            from gateway.status import write_runtime_status
            write_runtime_status(platform=self.platform.value, platform_state="disconnected", error_code=None, error_message=None)
        except Exception:
            pass
@@ -960,7 +940,7 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_message = message
        self._fatal_error_retryable = retryable
        try:
-            from hermes_agent.gateway.status import write_runtime_status
+            from gateway.status import write_runtime_status
            write_runtime_status(
                platform=self.platform.value,
                platform_state="fatal",
@@ -980,7 +960,7 @@ class BasePlatformAdapter(ABC):

    def _acquire_platform_lock(self, scope: str, identity: str, resource_desc: str) -> bool:
        """Acquire a scoped lock for this adapter. Returns True on success."""
-        from hermes_agent.gateway.status import acquire_scoped_lock
+        from gateway.status import acquire_scoped_lock
        self._platform_lock_scope = scope
        self._platform_lock_identity = identity
        acquired, existing = acquire_scoped_lock(
@@ -1003,7 +983,7 @@ class BasePlatformAdapter(ABC):
        identity = getattr(self, '_platform_lock_identity', None)
        if not identity:
            return
-        from hermes_agent.gateway.status import release_scoped_lock
+        from gateway.status import release_scoped_lock
        release_scoped_lock(self._platform_lock_scope, identity)
        self._platform_lock_identity = None

@@ -1338,7 +1318,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1702,7 +1682,7 @@ class BasePlatformAdapter(ABC):
            # session lifecycle and its cleanup races with the running task
            # (see PR #4926).
            cmd = event.get_command()
-            from hermes_agent.cli.commands import should_bypass_active_session
+            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
                logger.debug(
@@ -1774,6 +1754,8 @@ class BasePlatformAdapter(ABC):
          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
        """
+        import random
+
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
@@ -1823,11 +1805,8 @@ class BasePlatformAdapter(ABC):
        try:
            await self._run_processing_hook("on_processing_start", event)

-            handler = self._message_handler
-            if handler is None:
-                return
-
-            response = await handler(event)
+            # Call the handler (this can take a while with tool calls)
+            response = await self._message_handler(event)
            
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
@@ -1876,7 +1855,7 @@ class BasePlatformAdapter(ABC):
                        and not media_files
                        and event.source.chat_id not in self._auto_tts_disabled_chats):
                    try:
-                        from hermes_agent.tools.media.tts import text_to_speech_tool, check_tts_requirements
+                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
                            import json as _json
                            speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
--- a/hermes_agent/gateway/platforms/bluebubbles.py
+++ b/hermes_agent/gateway/platforms/bluebubbles.py
@@ -14,14 +14,14 @@ import logging
 import os
 import re
 import uuid
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote

 import httpx

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -30,7 +30,7 @@ from hermes_agent.gateway.platforms.base import (
    cache_audio_from_bytes,
    cache_document_from_bytes,
 )
-from hermes_agent.gateway.platforms.helpers import strip_markdown
+from gateway.platforms.helpers import strip_markdown

 logger = logging.getLogger(__name__)

@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
    try:
        import aiohttp  # noqa: F401
-        import httpx  # noqa: F401
+        import httpx as _httpx  # noqa: F401
    except ImportError:
        return False
    return True
@@ -377,7 +377,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        payload = {
            "addresses": [address],
            "message": message,
-            "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
+            "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
        }
        try:
            res = await self._api_post("/api/v1/chat/new", payload)
@@ -417,7 +417,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
                )
            payload: Dict[str, Any] = {
                "chatGuid": guid,
-                "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
+                "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
                "message": chunk,
            }
            if reply_to and self._private_api_enabled and self._helper_connected:
@@ -502,7 +502,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        try:
-            from hermes_agent.gateway.platforms.base import cache_image_from_url
+            from gateway.platforms.base import cache_image_from_url

            local_path = await cache_image_from_url(image_url)
            return await self._send_attachment(chat_id, local_path, caption=caption)
--- a/hermes_agent/gateway/platforms/dingtalk.py
+++ b/hermes_agent/gateway/platforms/dingtalk.py
@@ -87,9 +87,9 @@ except ImportError:
    open_api_models = None
    tea_util_models = None

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.helpers import MessageDeduplicator
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.helpers import MessageDeduplicator
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
--- a/hermes_agent/gateway/platforms/discord.py
+++ b/hermes_agent/gateway/platforms/discord.py
@@ -36,11 +36,15 @@ except ImportError:
    Intents = Any
    commands = None

-from hermes_agent.gateway.config import Platform, PlatformConfig
+import sys
+from pathlib import Path as _Path
+sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
+
+from gateway.config import Platform, PlatformConfig
 import re

-from hermes_agent.gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker
-from hermes_agent.gateway.platforms.base import (
+from gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -53,7 +57,7 @@ from hermes_agent.gateway.platforms.base import (
    cache_document_from_bytes,
    SUPPORTED_DOCUMENT_TYPES,
 )
-from hermes_agent.tools.security.urls import is_safe_url
+from tools.url_safety import is_safe_url


 def _clean_discord_id(entry: str) -> str:
@@ -537,6 +541,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
            # so fall back to known Homebrew paths if needed.
            if not opus_path:
+                import sys
                _homebrew_paths = (
                    "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                    "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -597,7 +602,7 @@ class DiscordAdapter(BasePlatformAdapter):
            intents.voice_states = True

            # Resolve proxy (DISCORD_PROXY > generic env vars > macOS system proxy)
-            from hermes_agent.gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_bot
+            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_bot
            proxy_url = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
            if proxy_url:
                logger.info("[%s] Using proxy for Discord: %s", self.name, proxy_url)
@@ -966,7 +971,7 @@ class DiscordAdapter(BasePlatformAdapter):
        reported in ``raw_response['warnings']`` so the caller can surface
        partial-send issues.
        """
-        from hermes_agent.tools.send_message import _derive_forum_thread_name
+        from tools.send_message_tool import _derive_forum_thread_name

        formatted = self.format_message(content)
        chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
@@ -1028,7 +1033,7 @@ class DiscordAdapter(BasePlatformAdapter):
        ForumChannel accepts the same file/files/content kwargs as
        ``channel.send``, creating the thread and starter message atomically.
        """
-        from hermes_agent.tools.send_message import _derive_forum_thread_name
+        from tools.send_message_tool import _derive_forum_thread_name

        if not thread_name:
            # Prefer the text content, fall back to the first attached
@@ -1190,16 +1195,9 @@ class DiscordAdapter(BasePlatformAdapter):
            try:
                import base64

-                try:
-                    from mutagen.oggopus import OggOpus
-                except ImportError:
-                    raise ImportError(
-                        "mutagen is required for Discord voice messages. "
-                        "Install with: pip install hermes-agent[messaging]"
-                    ) from None
-
                duration_secs = 5.0
                try:
+                    from mutagen.oggopus import OggOpus
                    info = OggOpus(audio_path)
                    duration_secs = info.info.length
                except Exception:
@@ -1424,7 +1422,8 @@ class DiscordAdapter(BasePlatformAdapter):
        speaking_user_ids: set = set()
        receiver = self._voice_receivers.get(guild_id)
        if receiver:
-            now = time.monotonic()
+            import time as _time
+            now = _time.monotonic()
            with receiver._lock:
                for ssrc, last_t in receiver._last_packet_time.items():
                    # Consider "speaking" if audio received within last 2 seconds
@@ -1503,7 +1502,7 @@ class DiscordAdapter(BasePlatformAdapter):

    async def _process_voice_input(self, guild_id: int, user_id: int, pcm_data: bytes):
        """Convert PCM -> WAV -> STT -> callback."""
-        from hermes_agent.tools.media.voice import is_whisper_hallucination
+        from tools.voice_mode import is_whisper_hallucination

        tmp_f = tempfile.NamedTemporaryFile(suffix=".wav", prefix="vc_listen_", delete=False)
        wav_path = tmp_f.name
@@ -1511,7 +1510,7 @@ class DiscordAdapter(BasePlatformAdapter):
        try:
            await asyncio.to_thread(VoiceReceiver.pcm_to_wav, pcm_data, wav_path)

-            from hermes_agent.tools.media.transcription import transcribe_audio
+            from tools.transcription_tools import transcribe_audio
            result = await asyncio.to_thread(transcribe_audio, wav_path)

            if not result.get("success"):
@@ -1623,7 +1622,7 @@ class DiscordAdapter(BasePlatformAdapter):

            # Download the image and send as a Discord file attachment
            # (Discord renders attachments inline, unlike plain URLs)
-            from hermes_agent.gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
            async with aiohttp.ClientSession(**_sess_kw) as session:
@@ -1702,7 +1701,7 @@ class DiscordAdapter(BasePlatformAdapter):

            # Download the GIF and send as a Discord file attachment
            # (Discord renders .gif attachments as auto-playing animations inline)
-            from hermes_agent.gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
            async with aiohttp.ClientSession(**_sess_kw) as session:
@@ -1892,7 +1891,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # Fetch full member list (requires members intent)
            try:
                members = guild.members
-                if guild.member_count is not None and len(members) < guild.member_count:
+                if len(members) < guild.member_count:
                    members = [m async for m in guild.fetch_members(limit=None)]
            except Exception as e:
                logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
@@ -2133,7 +2132,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
        try:
-            from hermes_agent.cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates
+            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

            already_registered = set()
            try:
@@ -2223,7 +2222,7 @@ class DiscordAdapter(BasePlatformAdapter):
        skill name and its description.
        """
        try:
-            from hermes_agent.cli.commands import discord_skill_commands_by_category
+            from hermes_cli.commands import discord_skill_commands_by_category

            existing_names = set()
            try:
@@ -2472,12 +2471,12 @@ class DiscordAdapter(BasePlatformAdapter):
                if isinstance(skills, str):
                    return [skills]
                if isinstance(skills, list) and skills:
-                    return list(dict.fromkeys(skills))  # ty: ignore[invalid-return-type]  # dedup, preserve order
+                    return list(dict.fromkeys(skills))  # dedup, preserve order
        return None

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
        """Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
-        from hermes_agent.gateway.platforms.base import resolve_channel_prompt
+        from gateway.platforms.base import resolve_channel_prompt
        return resolve_channel_prompt(self.config.extra, channel_id, parent_id)

    def _discord_require_mention(self) -> bool:
@@ -2743,7 +2742,7 @@ class DiscordAdapter(BasePlatformAdapter):
                channel = await self._client.fetch_channel(int(target_id))

            try:
-                from hermes_agent.cli.providers import get_label
+                from hermes_cli.providers import get_label
                provider_label = get_label(current_provider)
            except Exception:
                provider_label = current_provider
@@ -2928,7 +2927,7 @@ class DiscordAdapter(BasePlatformAdapter):
                f"Blocked unsafe attachment URL (SSRF protection): {att.url}"
            )
        import aiohttp
-        from hermes_agent.gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+        from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
        _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
        _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
        async with aiohttp.ClientSession(**_sess_kw) as session:
@@ -2963,17 +2962,6 @@ class DiscordAdapter(BasePlatformAdapter):
            parent_channel_id = self._get_parent_channel_id(message.channel)

        is_voice_linked_channel = False
-
-        # Save mention-stripped text before auto-threading since create_thread()
-        # can clobber message.content, breaking /command detection in channels.
-        raw_content = message.content.strip()
-        normalized_content = raw_content
-        mention_prefix = False
-        if self._client.user and self._client.user in message.mentions:
-            mention_prefix = True
-            normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
-            normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
-            message.content = normalized_content
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -3008,11 +2996,16 @@ class DiscordAdapter(BasePlatformAdapter):

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
-            in_bot_thread = is_thread and thread_id is not None and thread_id in self._threads
+            in_bot_thread = is_thread and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
-                if self._client.user not in message.mentions and not mention_prefix:
+                if self._client.user not in message.mentions:
                    return
+
+            if self._client.user and self._client.user in message.mentions:
+                message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
+                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
+
        # Auto-thread: when enabled, automatically create a thread for every
        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
@@ -3034,7 +3027,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Determine message type
        msg_type = MessageType.TEXT
-        if normalized_content.startswith("/"):
+        if message.content.startswith("/"):
            msg_type = MessageType.COMMAND
        elif message.attachments:
            # Check attachment types
@@ -3174,9 +3167,7 @@ class DiscordAdapter(BasePlatformAdapter):
                                att.filename, e, exc_info=True,
                            )

-        # Use normalized_content (saved before auto-threading) instead of message.content,
-        # to detect /slash commands in channel messages.
-        event_text = normalized_content
+        event_text = message.content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection

@@ -3231,7 +3222,7 @@ class DiscordAdapter(BasePlatformAdapter):

    def _text_batch_key(self, event: MessageEvent) -> str:
        """Session-scoped key for text message batching."""
-        from hermes_agent.gateway.session import build_session_key
+        from gateway.session import build_session_key
        return build_session_key(
            event.source,
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
@@ -3368,7 +3359,7 @@ if DISCORD_AVAILABLE:

            # Unblock the waiting agent thread via the gateway approval queue
            try:
-                from hermes_agent.tools.security.approval import resolve_gateway_approval
+                from tools.approval import resolve_gateway_approval
                count = resolve_gateway_approval(self.session_key, choice)
                logger.info(
                    "Discord button resolved %d approval(s) for session %s (choice=%s, user=%s)",
@@ -3456,7 +3447,7 @@ if DISCORD_AVAILABLE:

            # Write response file
            try:
-                from hermes_agent.constants import get_hermes_home
+                from hermes_constants import get_hermes_home
                home = get_hermes_home()
                response_path = home / ".update_response"
                tmp = response_path.with_suffix(".tmp")
@@ -3601,9 +3592,7 @@ if DISCORD_AVAILABLE:
                )
                return

-            if interaction.data is None:
-                return
-            provider_slug = interaction.data["values"][0]  # ty: ignore[invalid-key]
+            provider_slug = interaction.data["values"][0]
            self._selected_provider = provider_slug
            provider = next(
                (p for p in self.providers if p["slug"] == provider_slug), None
@@ -3637,10 +3626,8 @@ if DISCORD_AVAILABLE:
                )
                return

-            if interaction.data is None:
-                return
            self.resolved = True
-            model_id = interaction.data["values"][0]  # ty: ignore[invalid-key]
+            model_id = interaction.data["values"][0]

            try:
                result_text = await self.on_model_selected(
@@ -3671,7 +3658,7 @@ if DISCORD_AVAILABLE:
            self._build_provider_select()

            try:
-                from hermes_agent.cli.providers import get_label
+                from hermes_cli.providers import get_label
                provider_label = get_label(self.current_provider)
            except Exception:
                provider_label = self.current_provider
--- a/hermes_agent/gateway/platforms/email.py
+++ b/hermes_agent/gateway/platforms/email.py
@@ -32,7 +32,7 @@ from email import encoders
 from pathlib import Path
 from typing import Any, Dict, List, Optional

-from hermes_agent.gateway.platforms.base import (
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -40,7 +40,7 @@ from hermes_agent.gateway.platforms.base import (
    cache_document_from_bytes,
    cache_image_from_bytes,
 )
-from hermes_agent.gateway.config import Platform, PlatformConfig
+from gateway.config import Platform, PlatformConfig

 logger = logging.getLogger(__name__)
 # Automated sender patterns — emails from these are silently ignored
@@ -532,7 +532,6 @@ class EmailAdapter(BasePlatformAdapter):
        image_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an image URL as part of an email body."""
        text = caption or ""
@@ -546,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
-        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
--- a/hermes_agent/gateway/platforms/feishu.py
+++ b/hermes_agent/gateway/platforms/feishu.py
@@ -95,8 +95,8 @@ except ImportError:
 FEISHU_WEBSOCKET_AVAILABLE = websockets is not None
 FEISHU_WEBHOOK_AVAILABLE = aiohttp is not None

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -108,8 +108,8 @@ from hermes_agent.gateway.platforms.base import (
    cache_audio_from_bytes,
    cache_image_from_bytes,
 )
-from hermes_agent.gateway.status import acquire_scoped_lock, release_scoped_lock
-from hermes_agent.constants import get_hermes_home
+from gateway.status import acquire_scoped_lock, release_scoped_lock
+from hermes_constants import get_hermes_home

 logger = logging.getLogger(__name__)

@@ -414,7 +414,7 @@ def _strip_markdown_to_plain_text(text: str) -> str:
    Feishu-specific patterns (blockquotes, strikethrough, underline tags,
    horizontal rules, \\r\\n normalisation).
    """
-    from hermes_agent.gateway.platforms.helpers import strip_markdown
+    from gateway.platforms.helpers import strip_markdown
    plain = text.replace("\r\n", "\n")
    plain = _MARKDOWN_LINK_RE.sub(lambda m: f"{m.group(1)} ({m.group(2).strip()})", plain)
    plain = re.sub(r"^>\s?", "", plain, flags=re.MULTILINE)
@@ -2039,7 +2039,7 @@ class FeishuAdapter(BasePlatformAdapter):
        logging, and reaction.  Scheduling follows the same
        ``run_coroutine_threadsafe`` pattern used by ``_on_message_event``.
        """
-        from hermes_agent.gateway.platforms.feishu_comment import handle_drive_comment_event
+        from gateway.platforms.feishu_comment import handle_drive_comment_event

        loop = self._loop
        if not self._loop_accepts_callbacks(loop):
@@ -2151,7 +2151,7 @@ class FeishuAdapter(BasePlatformAdapter):
            logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
            return
        try:
-            from hermes_agent.tools.security.approval import resolve_gateway_approval
+            from tools.approval import resolve_gateway_approval
            count = resolve_gateway_approval(state["session_key"], choice)
            logger.info(
                "Feishu button resolved %d approval(s) for session %s (choice=%s, user=%s)",
@@ -2542,7 +2542,7 @@ class FeishuAdapter(BasePlatformAdapter):
        )

    def _media_batch_key(self, event: MessageEvent) -> str:
-        from hermes_agent.gateway.session import build_session_key
+        from gateway.session import build_session_key

        session_key = build_session_key(
            event.source,
@@ -2619,7 +2619,7 @@ class FeishuAdapter(BasePlatformAdapter):
        default_ext: str,
        preferred_name: str,
    ) -> tuple[str, str]:
-        from hermes_agent.tools.security.urls import is_safe_url
+        from tools.url_safety import is_safe_url
        if not is_safe_url(file_url):
            raise ValueError(f"Blocked unsafe URL (SSRF protection): {file_url[:80]}")

@@ -2822,7 +2822,7 @@ class FeishuAdapter(BasePlatformAdapter):

    def _text_batch_key(self, event: MessageEvent) -> str:
        """Return the session-scoped key used for Feishu text aggregation."""
-        from hermes_agent.gateway.session import build_session_key
+        from gateway.session import build_session_key

        return build_session_key(
            event.source,
--- a/hermes_agent/gateway/platforms/feishu_comment.py
+++ b/hermes_agent/gateway/platforms/feishu_comment.py
@@ -975,18 +975,18 @@ def build_whole_comment_prompt(
 def _resolve_model_and_runtime() -> Tuple[str, dict]:
    """Resolve model and provider credentials, same as gateway message handling."""
    import os
-    from hermes_agent.gateway.run import _load_gateway_config, _resolve_gateway_model
+    from gateway.run import _load_gateway_config, _resolve_gateway_model

    user_config = _load_gateway_config()
    model = _resolve_gateway_model(user_config)

-    from hermes_agent.gateway.run import _resolve_runtime_agent_kwargs
+    from gateway.run import _resolve_runtime_agent_kwargs
    runtime_kwargs = _resolve_runtime_agent_kwargs()

    # Fall back to provider's default model if none configured
    if not model and runtime_kwargs.get("provider"):
        try:
-            from hermes_agent.cli.models.models import get_default_model_for_provider
+            from hermes_cli.models import get_default_model_for_provider
            model = get_default_model_for_provider(runtime_kwargs["provider"])
        except Exception:
            pass
@@ -1053,11 +1053,11 @@ def _run_comment_agent(prompt: str, client: Any, session_key: str = "") -> str:

    Returns the agent's final response text, or empty string on failure.
    """
-    from hermes_agent.agent.loop import AIAgent
+    from run_agent import AIAgent

    logger.info("[Feishu-Comment] _run_comment_agent: injecting lark client into tool thread-locals")
-    from hermes_agent.tools.feishu_doc import set_client as set_doc_client
-    from hermes_agent.tools.feishu_drive import set_client as set_drive_client
+    from tools.feishu_doc_tool import set_client as set_doc_client
+    from tools.feishu_drive_tool import set_client as set_drive_client
    set_doc_client(client)
    set_drive_client(client)

@@ -1165,7 +1165,7 @@ async def handle_drive_comment_event(
    )

    # Access control
-    from hermes_agent.gateway.platforms.feishu_comment_rules import load_config, resolve_rule, is_user_allowed, has_wiki_keys
+    from gateway.platforms.feishu_comment_rules import load_config, resolve_rule, is_user_allowed, has_wiki_keys

    comments_cfg = load_config()
    rule = resolve_rule(comments_cfg, file_type, file_token)
--- a/hermes_agent/gateway/platforms/feishu_comment_rules.py
+++ b/hermes_agent/gateway/platforms/feishu_comment_rules.py
@@ -16,7 +16,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, Optional

-from hermes_agent.constants import get_hermes_home
+from hermes_constants import get_hermes_home

 logger = logging.getLogger(__name__)

@@ -351,7 +351,7 @@ def _main() -> int:
    import sys

    try:
-        from hermes_agent.cli.env_loader import load_hermes_dotenv
+        from hermes_cli.env_loader import load_hermes_dotenv
        load_hermes_dotenv()
    except Exception:
        pass
--- a/hermes_agent/gateway/platforms/helpers.py
+++ b/hermes_agent/gateway/platforms/helpers.py
@@ -14,7 +14,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Dict, Optional

 if TYPE_CHECKING:
-    from hermes_agent.gateway.platforms.base import BasePlatformAdapter, MessageEvent
+    from gateway.platforms.base import BasePlatformAdapter, MessageEvent

 logger = logging.getLogger(__name__)

@@ -214,7 +214,7 @@ class ThreadParticipationTracker:
        self._threads: set = self._load()

    def _state_path(self) -> Path:
-        from hermes_agent.constants import get_hermes_home
+        from hermes_constants import get_hermes_home
        return get_hermes_home() / f"{self._platform}_threads.json"

    def _load(self) -> set:
--- a/hermes_agent/gateway/platforms/homeassistant.py
+++ b/hermes_agent/gateway/platforms/homeassistant.py
@@ -28,8 +28,8 @@ except ImportError:
    AIOHTTP_AVAILABLE = False
    aiohttp = None  # type: ignore[assignment]

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
--- a/hermes_agent/gateway/platforms/matrix.py
+++ b/hermes_agent/gateway/platforms/matrix.py
@@ -88,15 +88,15 @@ except ImportError:

    TrustState = _TrustStateStub  # type: ignore[misc,assignment]

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
    ProcessingOutcome,
    SendResult,
 )
-from hermes_agent.gateway.platforms.helpers import ThreadParticipationTracker
+from gateway.platforms.helpers import ThreadParticipationTracker

 logger = logging.getLogger(__name__)

@@ -106,7 +106,7 @@ MAX_MESSAGE_LENGTH = 4000

 # Store directory for E2EE keys and sync state.
 # Uses get_hermes_home() so each profile gets its own Matrix store.
-from hermes_agent.constants import get_hermes_dir as _get_hermes_dir
+from hermes_constants import get_hermes_dir as _get_hermes_dir

 _STORE_DIR = _get_hermes_dir("platforms/matrix/store", "matrix/store")
 _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
@@ -869,7 +869,7 @@ class MatrixAdapter(BasePlatformAdapter):
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Download an image URL and upload it to Matrix."""
-        from hermes_agent.tools.security.urls import is_safe_url
+        from tools.url_safety import is_safe_url

        if not is_safe_url(image_url):
            logger.warning("Matrix: blocked unsafe image URL (SSRF protection)")
@@ -1469,7 +1469,7 @@ class MatrixAdapter(BasePlatformAdapter):
                            file_bytes = None

                    if file_bytes is not None:
-                        from hermes_agent.gateway.platforms.base import (
+                        from gateway.platforms.base import (
                            cache_audio_from_bytes,
                            cache_document_from_bytes,
                            cache_image_from_bytes,
@@ -1676,7 +1676,7 @@ class MatrixAdapter(BasePlatformAdapter):

    def _text_batch_key(self, event: MessageEvent) -> str:
        """Session-scoped key for text message batching."""
-        from hermes_agent.gateway.session import build_session_key
+        from gateway.session import build_session_key

        return build_session_key(
            event.source,
@@ -2170,8 +2170,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ul_match = re.match(r"^[\s]*[-*+]\s+(.+)$", line)
            if ul_match:
                items = []
-                while i < len(lines) and (m := re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i])):
-                    items.append(m.group(1))
+                while i < len(lines) and re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]):
+                    items.append(re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]).group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ul>{li}</ul>")
@@ -2181,8 +2181,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ol_match = re.match(r"^[\s]*\d+[.)]\s+(.+)$", line)
            if ol_match:
                items = []
-                while i < len(lines) and (m := re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i])):
-                    items.append(m.group(1))
+                while i < len(lines) and re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]):
+                    items.append(re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]).group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ol>{li}</ol>")
--- a/hermes_agent/gateway/platforms/mattermost.py
+++ b/hermes_agent/gateway/platforms/mattermost.py
@@ -21,9 +21,9 @@ import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.helpers import MessageDeduplicator
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.helpers import MessageDeduplicator
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -405,11 +405,12 @@ class MattermostAdapter(BasePlatformAdapter):
        kind: str = "file",
    ) -> SendResult:
        """Download a URL and upload it as a file attachment."""
-        from hermes_agent.tools.security.urls import is_safe_url
+        from tools.url_safety import is_safe_url
        if not is_safe_url(url):
            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)

+        import asyncio
        import aiohttp

        last_exc = None
@@ -681,13 +682,13 @@ class MattermostAdapter(BasePlatformAdapter):
                ) as resp:
                    if resp.status < 400:
                        file_data = await resp.read()
-                        from hermes_agent.gateway.platforms.base import cache_image_from_bytes, cache_document_from_bytes
+                        from gateway.platforms.base import cache_image_from_bytes, cache_document_from_bytes
                        if mime.startswith("image/"):
                            local_path = cache_image_from_bytes(file_data, ext or ".png")
                            media_urls.append(local_path)
                            media_types.append(mime)
                        elif mime.startswith("audio/"):
-                            from hermes_agent.gateway.platforms.base import cache_audio_from_bytes
+                            from gateway.platforms.base import cache_audio_from_bytes
                            local_path = cache_audio_from_bytes(file_data, ext or ".ogg")
                            media_urls.append(local_path)
                            media_types.append(mime)
@@ -718,7 +719,7 @@ class MattermostAdapter(BasePlatformAdapter):
        )

        # Per-channel ephemeral prompt
-        from hermes_agent.gateway.platforms.base import resolve_channel_prompt
+        from gateway.platforms.base import resolve_channel_prompt
        _channel_prompt = resolve_channel_prompt(
            self.config.extra, channel_id, None,
        )
--- a/hermes_agent/gateway/platforms/qqbot/init.py
+++ b/hermes_agent/gateway/platforms/qqbot/init.py
@@ -4,8 +4,8 @@ QQBot platform package.
 Re-exports the main adapter symbols from ``adapter.py`` (the original
 ``qqbot.py``) so that **all existing import paths remain unchanged**::

-    from hermes_agent.gateway.platforms.qqbot import QQAdapter          # works
-    from hermes_agent.gateway.platforms.qqbot import check_qq_requirements  # works
+    from gateway.platforms.qqbot import QQAdapter          # works
+    from gateway.platforms.qqbot import check_qq_requirements  # works

 New modules:
    - ``constants`` — shared constants (API URLs, timeouts, message types)
--- a/hermes_agent/gateway/platforms/qqbot/adapter.py
+++ b/hermes_agent/gateway/platforms/qqbot/adapter.py
@@ -60,8 +60,8 @@ except ImportError:
    HTTPX_AVAILABLE = False
    httpx = None  # type: ignore[assignment]

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -70,7 +70,7 @@ from hermes_agent.gateway.platforms.base import (
    cache_document_from_bytes,
    cache_image_from_bytes,
 )
-from hermes_agent.gateway.platforms.helpers import strip_markdown
+from gateway.platforms.helpers import strip_markdown

 logger = logging.getLogger(__name__)

@@ -91,7 +91,7 @@ class QQCloseError(Exception):
 # Constants — imported from the shared constants module.
 # ---------------------------------------------------------------------------

-from hermes_agent.gateway.platforms.qqbot.constants import (
+from gateway.platforms.qqbot.constants import (
    API_BASE,
    TOKEN_URL,
    GATEWAY_URL_PATH,
@@ -115,7 +115,7 @@ from hermes_agent.gateway.platforms.qqbot.constants import (
    MEDIA_TYPE_VOICE,
    MEDIA_TYPE_FILE,
 )
-from hermes_agent.gateway.platforms.qqbot.utils import (
+from gateway.platforms.qqbot.utils import (
    coerce_list as _coerce_list_impl,
    build_user_agent,
 )
@@ -1086,8 +1086,11 @@ class QQAdapter(BasePlatformAdapter):
            return MessageType.VIDEO
        if "image" in first_type or "photo" in first_type:
            return MessageType.PHOTO
+        # Unknown content type with an attachment — don't assume PHOTO
+        # to prevent non-image files from being sent to vision analysis.
        logger.debug(
-            "Unknown media content_type '%s', defaulting to TEXT",
+            "[%s] Unknown media content_type '%s', defaulting to TEXT",
+            self._log_tag,
            first_type,
        )
        return MessageType.TEXT
@@ -1203,7 +1206,7 @@ class QQAdapter(BasePlatformAdapter):

    async def _download_and_cache(self, url: str, content_type: str) -> Optional[str]:
        """Download a URL and cache it locally."""
-        from hermes_agent.tools.security.urls import is_safe_url
+        from tools.url_safety import is_safe_url

        if not is_safe_url(url):
            raise ValueError(f"Blocked unsafe URL: {url[:80]}")
@@ -1304,7 +1307,7 @@ class QQAdapter(BasePlatformAdapter):
            is_pre_wav = True
            logger.debug("[%s] STT: using voice_wav_url (pre-converted WAV)", self._log_tag)

-        from hermes_agent.tools.security.urls import is_safe_url
+        from tools.url_safety import is_safe_url
        if not is_safe_url(download_url):
            logger.warning("[QQ] STT blocked unsafe URL: %s", download_url[:80])
            return None
@@ -1823,12 +1826,14 @@ class QQAdapter(BasePlatformAdapter):
            body["file_name"] = file_name

        # Retry transient upload failures
+        last_exc = None
        for attempt in range(3):
            try:
                return await self._api_request(
                    "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                )
            except RuntimeError as exc:
+                last_exc = exc
                err_msg = str(exc)
                if any(
                        kw in err_msg
@@ -1837,9 +1842,8 @@ class QQAdapter(BasePlatformAdapter):
                    raise
                if attempt < 2:
                    await asyncio.sleep(1.5 * (attempt + 1))
-                else:
-                    raise
-        raise AssertionError("unreachable: retry loop exhausted")
+
+        raise last_exc  # type: ignore[misc]

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
--- a/hermes_agent/gateway/platforms/qqbot/constants.py
+++ b/hermes_agent/gateway/platforms/qqbot/constants.py
--- a/hermes_agent/gateway/platforms/qqbot/crypto.py
+++ b/hermes_agent/gateway/platforms/qqbot/crypto.py
--- a/hermes_agent/gateway/platforms/qqbot/onboard.py
+++ b/hermes_agent/gateway/platforms/qqbot/onboard.py
--- a/hermes_agent/gateway/platforms/qqbot/utils.py
+++ b/hermes_agent/gateway/platforms/qqbot/utils.py
--- a/hermes_agent/gateway/platforms/signal.py
+++ b/hermes_agent/gateway/platforms/signal.py
@@ -26,8 +26,8 @@ from urllib.parse import quote, unquote

 import httpx

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -37,7 +37,7 @@ from hermes_agent.gateway.platforms.base import (
    cache_document_from_bytes,
    cache_image_from_url,
 )
-from hermes_agent.gateway.platforms.helpers import redact_phone
+from gateway.platforms.helpers import redact_phone

 logger = logging.getLogger(__name__)

--- a/hermes_agent/gateway/platforms/slack.py
+++ b/hermes_agent/gateway/platforms/slack.py
@@ -28,9 +28,13 @@ except ImportError:
    AsyncSocketModeHandler = Any
    AsyncWebClient = Any

-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.helpers import MessageDeduplicator
-from hermes_agent.gateway.platforms.base import (
+import sys
+from pathlib import Path as _Path
+sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.helpers import MessageDeduplicator
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
@@ -132,7 +136,7 @@ class SlackAdapter(BasePlatformAdapter):
        bot_tokens = [t.strip() for t in raw_token.split(",") if t.strip()]

        # Also load tokens from OAuth token file
-        from hermes_agent.constants import get_hermes_home
+        from hermes_constants import get_hermes_home
        tokens_file = get_hermes_home() / "slack_tokens.json"
        if tokens_file.exists():
            try:
@@ -650,7 +654,7 @@ class SlackAdapter(BasePlatformAdapter):
        if not self._app:
            return SendResult(success=False, error="Not connected")

-        from hermes_agent.tools.security.urls import is_safe_url
+        from tools.url_safety import is_safe_url
        if not is_safe_url(image_url):
            logger.warning("[Slack] Blocked unsafe image URL (SSRF protection)")
            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
@@ -1189,7 +1193,7 @@ class SlackAdapter(BasePlatformAdapter):
        )

        # Per-channel ephemeral prompt
-        from hermes_agent.gateway.platforms.base import resolve_channel_prompt
+        from gateway.platforms.base import resolve_channel_prompt
        _channel_prompt = resolve_channel_prompt(
            self.config.extra, channel_id, None,
        )
@@ -1384,7 +1388,7 @@ class SlackAdapter(BasePlatformAdapter):

        # Resolve the approval — this unblocks the agent thread
        try:
-            from hermes_agent.tools.security.approval import resolve_gateway_approval
+            from tools.approval import resolve_gateway_approval
            count = resolve_gateway_approval(session_key, choice)
            logger.info(
                "Slack button resolved %d approval(s) for session %s (choice=%s, user=%s)",
@@ -1519,7 +1523,7 @@ class SlackAdapter(BasePlatformAdapter):

        # Map subcommands to gateway commands — derived from central registry.
        # Also keep "compact" as a Slack-specific alias for /compress.
-        from hermes_agent.cli.commands import slack_subcommand_map
+        from hermes_cli.commands import slack_subcommand_map
        subcommand_map = slack_subcommand_map()
        subcommand_map["compact"] = "/compress"
        first_word = text.split()[0] if text else ""
@@ -1568,7 +1572,7 @@ class SlackAdapter(BasePlatformAdapter):
            return False

        try:
-            from hermes_agent.gateway.session import SessionSource, build_session_key
+            from gateway.session import SessionSource, build_session_key

            source = SessionSource(
                platform=Platform.SLACK,
@@ -1596,9 +1600,11 @@ class SlackAdapter(BasePlatformAdapter):

    async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
        """Download a Slack file using the bot token for auth, with retry."""
+        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
+        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1622,12 +1628,13 @@ class SlackAdapter(BasePlatformAdapter):
                        )

                    if audio:
-                        from hermes_agent.gateway.platforms.base import cache_audio_from_bytes
+                        from gateway.platforms.base import cache_audio_from_bytes
                        return cache_audio_from_bytes(response.content, ext)
                    else:
-                        from hermes_agent.gateway.platforms.base import cache_image_from_bytes
+                        from gateway.platforms.base import cache_image_from_bytes
                        return cache_image_from_bytes(response.content, ext)
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1636,13 +1643,15 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise AssertionError("unreachable: retry loop exhausted")
+        raise last_exc

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
+        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
+        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1654,6 +1663,7 @@ class SlackAdapter(BasePlatformAdapter):
                    response.raise_for_status()
                    return response.content
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1662,7 +1672,7 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise AssertionError("unreachable: retry loop exhausted")
+        raise last_exc

    # ── Channel mention gating ─────────────────────────────────────────────

--- a/hermes_agent/gateway/platforms/sms.py
+++ b/hermes_agent/gateway/platforms/sms.py
@@ -25,19 +25,16 @@ import hmac
 import logging
 import os
 import urllib.parse
-from typing import Any, Dict, Optional, TYPE_CHECKING
+from typing import Any, Dict, Optional

-if TYPE_CHECKING:
-    import aiohttp
-
-from hermes_agent.gateway.config import Platform, PlatformConfig
-from hermes_agent.gateway.platforms.base import (
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
    SendResult,
 )
-from hermes_agent.gateway.platforms.helpers import redact_phone, strip_markdown
+from gateway.platforms.helpers import redact_phone, strip_markdown

 logger = logging.getLogger(__name__)

--- a/Show More
+++ b/Show More