ci: split Tests workflow into 4 parallel shards via pytest-split

Reduces CI wall time by running the test suite as 4 parallel matrix jobs instead of a single job. Each shard runs ~3,000 tests in parallel, so total wall time drops from ~4min to ~60-90s. Changes: - Add pytest-split to dev extras (deterministic test splitting, composes with pytest-xdist's -n auto inside each shard). - Matrix-split tests.yml 'test' job into 4 groups. Each shard runs 'pytest ... --splits 4 --group N' and parallelizes inside with the -n auto already in pyproject.toml's addopts. - fail-fast: false so all shards finish even if one fails (consistent with current behavior when there's no matrix). Expected CI timing: Before: 243s single-job (4m03s) After: ~60-90s per shard in parallel + ~25s install overhead \u2192 total CI ~90-115s No test-file changes. Deterministic hash-based distribution (no .test_durations file yet; can add one later for better balance). The e2e job is unchanged — it's already small (20s) and runs separately.
test: remove 169 change-detector tests across 21 files (#11472 )
2026-04-17 04:21:02 -07:00 · 2026-04-17 01:05:09 -07:00 · 2026-04-17 01:02:06 -07:00 · 2026-04-17 00:52:42 -07:00 · 2026-04-17 00:52:35 -07:00 · 2026-04-17 00:52:35 -07:00
480 changed files with 54252 additions and 9644 deletions
@@ -24,6 +24,15 @@
 # Optional base URL override (default: Google's OpenAI-compatible endpoint)
 # GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai

+# =============================================================================
+# LLM PROVIDER (Ollama Cloud)
+# =============================================================================
+# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
+# Get your key at: https://ollama.com/settings
+# OLLAMA_API_KEY=your_ollama_key_here
+# Optional base URL override (default: https://ollama.com/v1)
+# OLLAMA_BASE_URL=https://ollama.com/v1
+
 # =============================================================================
 # LLM PROVIDER (z.ai / GLM)
 # =============================================================================
@@ -145,6 +154,10 @@
 # Only override here if you need to force a backend without touching config.yaml:
 # TERMINAL_ENV=local

+# Override the container runtime binary (e.g. to use Podman instead of Docker).
+# Useful on systems where Docker's storage driver is broken or unavailable.
+# HERMES_DOCKER_BINARY=/usr/local/bin/podman
+
 # Container images (for singularity/docker/modal backends)
 # TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
 # TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
@@ -0,0 +1,73 @@
+name: Contributor Attribution Check
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      # Only run when code files change (not docs-only PRs)
+      - '*.py'
+      - '**/*.py'
+      - '.github/workflows/contributor-check.yml'
+
+permissions:
+  contents: read
+
+jobs:
+  check-attribution:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          fetch-depth: 0  # Full history needed for git log
+
+      - name: Check for unmapped contributor emails
+        run: |
+          # Get the merge base between this PR and main
+          MERGE_BASE=$(git merge-base origin/main HEAD)
+
+          # Find any new author emails in this PR's commits
+          NEW_EMAILS=$(git log ${MERGE_BASE}..HEAD --format='%ae' --no-merges | sort -u)
+
+          if [ -z "$NEW_EMAILS" ]; then
+            echo "No new commits to check."
+            exit 0
+          fi
+
+          # Check each email against AUTHOR_MAP in release.py
+          MISSING=""
+          while IFS= read -r email; do
+            # Skip teknium and bot emails
+            case "$email" in
+              *teknium*|*noreply@github.com*|*dependabot*|*github-actions*|*anthropic.com*|*cursor.com*)
+                continue ;;
+            esac
+
+            # Check if email is in AUTHOR_MAP (either as a key or matches noreply pattern)
+            if echo "$email" | grep -qP '\+.*@users\.noreply\.github\.com'; then
+              continue  # GitHub noreply emails auto-resolve
+            fi
+
+            if ! grep -qF "\"${email}\"" scripts/release.py 2>/dev/null; then
+              AUTHOR=$(git log --author="$email" --format='%an' -1)
+              MISSING="${MISSING}\n  ${email} (${AUTHOR})"
+            fi
+          done <<< "$NEW_EMAILS"
+
+          if [ -n "$MISSING" ]; then
+            echo ""
+            echo "⚠️  New contributor email(s) not in AUTHOR_MAP:"
+            echo -e "$MISSING"
+            echo ""
+            echo "Please add mappings to scripts/release.py AUTHOR_MAP:"
+            echo -e "$MISSING" | while read -r line; do
+              email=$(echo "$line" | sed 's/^ *//' | cut -d' ' -f1)
+              [ -z "$email" ] && continue
+              echo "    \"${email}\": \"<github-username>\","
+            done
+            echo ""
+            echo "To find the GitHub username for an email:"
+            echo "  gh api 'search/users?q=EMAIL+in:email' --jq '.items[0].login'"
+            exit 1
+          else
+            echo "✅ All contributor emails are mapped in AUTHOR_MAP."
+          fi
@@ -1,11 +1,12 @@
 name: Deploy Site

 on:
+  release:
+    types: [published]
  push:
    branches: [main]
    paths:
      - 'website/**'
-      - 'landingpage/**'
      - 'skills/**'
      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
@@ -20,28 +21,34 @@ concurrency:
  cancel-in-progress: false

 jobs:
-  build-and-deploy:
-    # Only run on the upstream repository, not on forks
+  deploy-vercel:
+    if: github.event_name == 'release'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Trigger Vercel Deploy
+        run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}"
+
+  deploy-docs:
    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    environment:
      name: github-pages
      url: ${{ steps.deploy.outputs.page_url }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml httpx
+        run: pip install pyyaml==6.0.2 httpx==0.28.1

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
@@ -65,18 +72,13 @@ jobs:
      - name: Stage deployment
        run: |
          mkdir -p _site/docs
-          # Landing page at root
-          cp -r landingpage/* _site/
-          # Docusaurus at /docs/
          cp -r website/build/* _site/docs/
-          # CNAME so GitHub Pages keeps the custom domain between deploys
-          echo "hermes-agent.nousresearch.com" > _site/CNAME

      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
+        uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa  # v3
        with:
          path: _site

      - name: Deploy to GitHub Pages
        id: deploy
-        uses: actions/deploy-pages@v4
+        uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e  # v4
@@ -23,21 +23,21 @@ jobs:
    timeout-minutes: 60
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
        with:
          submodules: recursive

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
+        uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130  # v3

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

      # Build amd64 only so we can `load` the image for smoke testing.
      # `load: true` cannot export a multi-arch manifest to the local daemon.
      # The multi-arch build follows on push to main / release.
      - name: Build image (amd64, smoke test)
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8  # v6
        with:
          context: .
          file: Dockerfile
@@ -56,14 +56,14 @@ jobs:

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@v3
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9  # v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Push multi-arch image (main branch)
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8  # v6
        with:
          context: .
          file: Dockerfile
@@ -75,7 +75,7 @@ jobs:

      - name: Push multi-arch image (release)
        if: github.event_name == 'release'
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8  # v6
        with:
          context: .
          file: Dockerfile
@@ -7,13 +7,16 @@ on:
      - '.github/workflows/docs-site-checks.yml'
  workflow_dispatch:

+permissions:
+  contents: read
+
 jobs:
  docs-site-checks:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 20
          cache: npm
@@ -23,7 +26,7 @@ jobs:
        run: npm ci
        working-directory: website

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

@@ -14,6 +14,9 @@ on:
      - 'run_agent.py'
      - 'acp_adapter/**'

+permissions:
+  contents: read
+
 concurrency:
  group: nix-${{ github.ref }}
  cancel-in-progress: true
@@ -26,7 +29,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
      - name: Check flake
@@ -20,14 +20,14 @@ jobs:
    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

      - name: Install dependencies
-        run: pip install httpx pyyaml
+        run: pip install httpx==0.28.1 pyyaml==6.0.2

      - name: Build skills index
        env:
@@ -35,7 +35,7 @@ jobs:
        run: python scripts/build_skills_index.py

      - name: Upload index artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: skills-index
          path: website/static/api/skills-index.json
@@ -53,25 +53,25 @@ jobs:
    # Only deploy on schedule or manual trigger (not on every push to the script)
    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          name: skills-index
          path: website/static/api/

-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml
+        run: pip install pyyaml==6.0.2

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
@@ -92,10 +92,10 @@ jobs:
          echo "hermes-agent.nousresearch.com" > _site/CNAME

      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
+        uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa  # v3
        with:
          path: _site

      - name: Deploy to GitHub Pages
        id: deploy
-        uses: actions/deploy-pages@v4
+        uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e  # v4
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
        with:
          fetch-depth: 0

@@ -149,6 +149,62 @@ jobs:
          "
          fi

+          # --- CI/CD workflow files modified ---
+          WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true)
+          if [ -n "$WORKFLOW_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: CI/CD workflow files modified
+          Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added.
+
+          **Files:**
+          \`\`\`
+          ${WORKFLOW_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- Dockerfile / container build files modified ---
+          DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true)
+          if [ -n "$DOCKER_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: Container build files modified
+          Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands.
+
+          **Files:**
+          \`\`\`
+          ${DOCKER_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- Dependency manifest files modified ---
+          DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true)
+          if [ -n "$DEP_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: Dependency manifest files modified
+          Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources.
+
+          **Files:**
+          \`\`\`
+          ${DEP_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- GitHub Actions version unpinning (mutable tags instead of SHAs) ---
+          ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true)
+          if [ -n "$ACTIONS_UNPIN" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: GitHub Actions with mutable version tags
+          Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised.
+
+          **Matches:**
+          \`\`\`
+          ${ACTIONS_UNPIN}
+          \`\`\`
+          "
+          fi
+
          # --- Output results ---
          if [ -n "$FINDINGS" ]; then
            echo "found=true" >> "$GITHUB_OUTPUT"
@@ -6,6 +6,9 @@ on:
  pull_request:
    branches: [main]

+permissions:
+  contents: read
+
 # Cancel in-progress runs for the same PR/branch
 concurrency:
  group: tests-${{ github.ref }}
@@ -13,17 +16,22 @@ concurrency:

 jobs:
  test:
+    name: test (${{ matrix.group }}/4)
    runs-on: ubuntu-latest
    timeout-minutes: 10
+    strategy:
+      fail-fast: false
+      matrix:
+        group: [1, 2, 3, 4]
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

      - name: Install system dependencies
        run: sudo apt-get update && sudo apt-get install -y ripgrep

      - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11
        run: uv python install 3.11
@@ -34,10 +42,11 @@ jobs:
          source .venv/bin/activate
          uv pip install -e ".[all,dev]"

-      - name: Run tests
+      - name: Run tests (shard ${{ matrix.group }}/4)
        run: |
          source .venv/bin/activate
-          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto
+          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short \
+            --splits 4 --group ${{ matrix.group }}
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
@@ -49,10 +58,10 @@ jobs:
    timeout-minutes: 10
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

      - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11
        run: uv python install 3.11
@@ -0,0 +1,108 @@
+# .mailmap — canonical author mapping for git shortlog / git log / GitHub
+# Format: Canonical Name <canonical@email> <commit@email>
+# See: https://git-scm.com/docs/gitmailmap
+#
+# This maps commit emails to GitHub noreply addresses so that:
+# 1. `git shortlog -sn` shows deduplicated contributor counts
+# 2. GitHub's contributor graph can attribute commits correctly
+# 3. Contributors with personal/work emails get proper credit
+#
+# When adding entries: use the contributor's GitHub noreply email as canonical
+# so GitHub can link commits to their profile.
+
+# === Teknium (multiple emails) ===
+Teknium <127238744+teknium1@users.noreply.github.com> <teknium1@gmail.com>
+Teknium <127238744+teknium1@users.noreply.github.com> <teknium@nousresearch.com>
+
+# === Contributors — personal/work emails mapped to GitHub noreply ===
+# Format: Canonical Name <GH-noreply> <commit-email>
+
+# Verified via GH API email search
+luyao618 <364939526@qq.com> <364939526@qq.com>
+ethernet8023 <arilotter@gmail.com> <arilotter@gmail.com>
+nicoloboschi <boschi1997@gmail.com> <boschi1997@gmail.com>
+cherifya <chef.ya@gmail.com> <chef.ya@gmail.com>
+BongSuCHOI <chlqhdtn98@gmail.com> <chlqhdtn98@gmail.com>
+dsocolobsky <dsocolobsky@gmail.com> <dsocolobsky@gmail.com>
+pefontana <fontana.pedro93@gmail.com> <fontana.pedro93@gmail.com>
+Helmi <frank@helmschrott.de> <frank@helmschrott.de>
+hata1234 <hata1234@gmail.com> <hata1234@gmail.com>
+
+# Verified via PR investigation / salvage PR bodies
+DeployFaith <agents@kylefrench.dev> <agents@kylefrench.dev>
+flobo3 <floptopbot33@gmail.com> <floptopbot33@gmail.com>
+gaixianggeng <gaixg94@gmail.com> <gaixg94@gmail.com>
+KUSH42 <xush@xush.org> <xush@xush.org>
+konsisumer <der@konsi.org> <der@konsi.org>
+WorldInnovationsDepartment <vorvul.danylo@gmail.com> <vorvul.danylo@gmail.com>
+m0n5t3r <iacobs@m0n5t3r.info> <iacobs@m0n5t3r.info>
+sprmn24 <oncuevtv@gmail.com> <oncuevtv@gmail.com>
+fancydirty <fancydirty@gmail.com> <fancydirty@gmail.com>
+fxfitz <francis.x.fitzpatrick@gmail.com> <francis.x.fitzpatrick@gmail.com>
+limars874 <limars874@gmail.com> <limars874@gmail.com>
+AaronWong1999 <aaronwong1999@icloud.com> <aaronwong1999@icloud.com>
+dippwho <dipp.who@gmail.com> <dipp.who@gmail.com>
+duerzy <duerzy@gmail.com> <duerzy@gmail.com>
+geoffwellman <geoff.wellman@gmail.com> <geoff.wellman@gmail.com>
+hcshen0111 <shenhaocheng19990111@gmail.com> <shenhaocheng19990111@gmail.com>
+jamesarch <han.shan@live.cn> <han.shan@live.cn>
+stephenschoettler <stephenschoettler@gmail.com> <stephenschoettler@gmail.com>
+Tranquil-Flow <tranquil_flow@protonmail.com> <tranquil_flow@protonmail.com>
+Dusk1e <yusufalweshdemir@gmail.com> <yusufalweshdemir@gmail.com>
+Awsh1 <ysfalweshcan@gmail.com> <ysfalweshcan@gmail.com>
+WAXLYY <ysfwaxlycan@gmail.com> <ysfwaxlycan@gmail.com>
+donrhmexe <don.rhm@gmail.com> <don.rhm@gmail.com>
+hqhq1025 <1506751656@qq.com> <1506751656@qq.com>
+BlackishGreen33 <s5460703@gmail.com> <s5460703@gmail.com>
+tomqiaozc <zqiao@microsoft.com> <zqiao@microsoft.com>
+MagicRay1217 <mingjwan@microsoft.com> <mingjwan@microsoft.com>
+aaronagent <1115117931@qq.com> <1115117931@qq.com>
+YoungYang963 <young@YoungdeMacBook-Pro.local> <young@YoungdeMacBook-Pro.local>
+LongOddCode <haolong@microsoft.com> <haolong@microsoft.com>
+Cafexss <coffeemjj@gmail.com> <coffeemjj@gmail.com>
+Cygra <sjtuwbh@gmail.com> <sjtuwbh@gmail.com>
+DomGrieco <dgrieco@redhat.com> <dgrieco@redhat.com>
+
+# Duplicate email mapping (same person, multiple emails)
+Sertug17 <104278804+Sertug17@users.noreply.github.com> <srhtsrht17@gmail.com>
+yyovil <birdiegyal@gmail.com> <tanishq231003@gmail.com>
+DomGrieco <dgrieco@redhat.com> <dgrieco@redhat.com>
+dsocolobsky <dsocolobsky@gmail.com> <dylan.socolobsky@lambdaclass.com>
+olafthiele <programming@olafthiele.com> <olafthiele@gmail.com>
+
+# Verified via git display name matching GH contributor username
+cokemine <aptx4561@gmail.com> <aptx4561@gmail.com>
+dalianmao000 <dalianmao0107@gmail.com> <dalianmao0107@gmail.com>
+emozilla <emozilla@nousresearch.com> <emozilla@nousresearch.com>
+jjovalle99 <juan.ovalle@mistral.ai> <juan.ovalle@mistral.ai>
+kagura-agent <kagura.chen28@gmail.com> <kagura.chen28@gmail.com>
+spniyant <niyant@spicefi.xyz> <niyant@spicefi.xyz>
+olafthiele <programming@olafthiele.com> <programming@olafthiele.com>
+r266-tech <r2668940489@gmail.com> <r2668940489@gmail.com>
+xingkongliang <tianliangjay@gmail.com> <tianliangjay@gmail.com>
+win4r <win4r@outlook.com> <win4r@outlook.com>
+zhouboli <zhouboli@gmail.com> <zhouboli@gmail.com>
+yongtenglei <yongtenglei@gmail.com> <yongtenglei@gmail.com>
+
+# Nous Research team
+benbarclay <ben@nousresearch.com> <ben@nousresearch.com>
+jquesnelle <jonny@nousresearch.com> <jonny@nousresearch.com>
+
+# GH contributor list verified
+spideystreet <dhicham.pro@gmail.com> <dhicham.pro@gmail.com>
+dorukardahan <dorukardahan@hotmail.com> <dorukardahan@hotmail.com>
+MustafaKara7 <karamusti912@gmail.com> <karamusti912@gmail.com>
+Hmbown <hmbown@gmail.com> <hmbown@gmail.com>
+kamil-gwozdz <kamil@gwozdz.me> <kamil@gwozdz.me>
+kira-ariaki <kira@ariaki.me> <kira@ariaki.me>
+knopki <knopki@duck.com> <knopki@duck.com>
+Unayung <unayung@gmail.com> <unayung@gmail.com>
+SeeYangZhi <yangzhi.see@gmail.com> <yangzhi.see@gmail.com>
+Julientalbot <julien.talbot@ergonomia.re> <julien.talbot@ergonomia.re>
+lesterli <lisicheng168@gmail.com> <lisicheng168@gmail.com>
+JiayuuWang <jiayuw794@gmail.com> <jiayuw794@gmail.com>
+tesseracttars-creator <tesseracttars@gmail.com> <tesseracttars@gmail.com>
+xinbenlv <zzn+pa@zzn.im> <zzn+pa@zzn.im>
+SaulJWu <saul.jj.wu@gmail.com> <saul.jj.wu@gmail.com>
+angelos <angelos@oikos.lan.home.malaiwah.com> <angelos@oikos.lan.home.malaiwah.com>
+MestreY0d4-Uninter <241404605+MestreY0d4-Uninter@users.noreply.github.com> <MestreY0d4-Uninter@users.noreply.github.com>
@@ -13,7 +13,7 @@ source venv/bin/activate  # ALWAYS activate before running Python
 ```
 hermes-agent/
 ├── run_agent.py          # AIAgent class — core conversation loop
-├── model_tools.py        # Tool orchestration, _discover_tools(), handle_function_call()
+├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
 ├── cli.py                # HermesCLI class — interactive CLI orchestrator
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
@@ -55,7 +55,7 @@ hermes-agent/
 ├── gateway/              # Messaging platform gateway
 │   ├── run.py            # Main loop, slash commands, message dispatch
 │   ├── session.py        # SessionStore — conversation persistence
-│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal
+│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
 ├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
 ├── cron/                 # Scheduler (jobs.py, scheduler.py)
 ├── environments/         # RL training environments (Atropos)
@@ -181,7 +181,7 @@ if canonical == "mycommand":

 ## Adding New Tools

-Requires changes in **3 files**:
+Requires changes in **2 files**:

 **1. Create `tools/your_tool.py`:**
 ```python
@@ -204,9 +204,9 @@ registry.register(
 )
 ```

-**2. Add import** in `model_tools.py` `_discover_tools()` list.
+**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.

-**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
+Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.

 The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.

@@ -13,7 +13,7 @@

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.

-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -0,0 +1,27 @@
+# Hermes Agent v0.10.0 (v2026.4.16)
+
+**Release Date:** April 16, 2026
+
+> The Tool Gateway release — paid Nous Portal subscribers can now use web search, image generation, text-to-speech, and browser automation through their existing subscription with zero additional API keys.
+
+---
+
+## ✨ Highlights
+
+- **Nous Tool Gateway** — Paid [Nous Portal](https://portal.nousresearch.com) subscribers now get automatic access to **web search** (Firecrawl), **image generation** (FAL / FLUX 2 Pro), **text-to-speech** (OpenAI TTS), and **browser automation** (Browser Use) through their existing subscription. No separate API keys needed — just run `hermes model`, select Nous Portal, and pick which tools to enable. Per-tool opt-in via `use_gateway` config, full integration with `hermes tools` and `hermes status`, and the runtime correctly prefers the gateway even when direct API keys exist. Replaces the old hidden `HERMES_ENABLE_NOUS_MANAGED_TOOLS` env var with clean subscription-based detection. ([#11206](https://github.com/NousResearch/hermes-agent/pull/11206), based on work by @jquesnelle; docs: [#11208](https://github.com/NousResearch/hermes-agent/pull/11208))
+
+---
+
+## 🐛 Bug Fixes & Improvements
+
+This release includes 180+ commits with numerous bug fixes, platform improvements, and reliability enhancements across the agent core, gateway, CLI, and tool system. Full details will be published in the v0.11.0 changelog.
+
+---
+
+## 👥 Contributors
+
+- **@jquesnelle** (emozilla) — Original Tool Gateway implementation ([#10799](https://github.com/NousResearch/hermes-agent/pull/10799)), salvaged and shipped in this release
+
+---
+
+**Full Changelog**: [v2026.4.13...v2026.4.16](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.16)
@@ -0,0 +1,84 @@
+# Hermes Agent Security Policy
+
+This document outlines the security protocols, trust model, and deployment hardening guidelines for the **Hermes Agent** project.
+
+## 1. Vulnerability Reporting
+
+Hermes Agent does **not** operate a bug bounty program. Security issues should be reported via [GitHub Security Advisories (GHSA)](https://github.com/NousResearch/hermes-agent/security/advisories/new) or by emailing **security@nousresearch.com**. Do not open public issues for security vulnerabilities.
+
+### Required Submission Details
+- **Title & Severity:** Concise description and CVSS score/rating.
+- **Affected Component:** Exact file path and line range (e.g., `tools/approval.py:120-145`).
+- **Environment:** Output of `hermes version`, commit SHA, OS, and Python version.
+- **Reproduction:** Step-by-step Proof-of-Concept (PoC) against `main` or the latest release.
+- **Impact:** Explanation of what trust boundary was crossed.
+
+---
+
+## 2. Trust Model
+
+The core assumption is that Hermes is a **personal agent** with one trusted operator.
+
+### Operator & Session Trust
+- **Single Tenant:** The system protects the operator from LLM actions, not from malicious co-tenants. Multi-user isolation must happen at the OS/host level.
+- **Gateway Security:** Authorized callers (Telegram, Discord, Slack, etc.) receive equal trust. Session keys are used for routing, not as authorization boundaries.
+- **Execution:** Defaults to `terminal.backend: local` (direct host execution). Container isolation (Docker, Modal, Daytona) is opt-in for sandboxing.
+
+### Dangerous Command Approval
+The approval system (`tools/approval.py`) is a core security boundary. Terminal commands, file operations, and other potentially destructive actions are gated behind explicit user confirmation before execution. The approval mode is configurable via `approvals.mode` in `config.yaml`:
+- `"on"` (default) — prompts the user to approve dangerous commands.
+- `"auto"` — auto-approves after a configurable delay.
+- `"off"` — disables the gate entirely (break-glass; see Section 3).
+
+### Output Redaction
+`agent/redact.py` strips secret-like patterns (API keys, tokens, credentials) from all display output before it reaches the terminal or gateway platform. This prevents accidental credential leakage in chat logs, tool previews, and response text. Redaction operates on the display layer only — underlying values remain intact for internal agent operations.
+
+### Skills vs. MCP Servers
+- **Installed Skills:** High trust. Equivalent to local host code; skills can read environment variables and run arbitrary commands.
+- **MCP Servers:** Lower trust. MCP subprocesses receive a filtered environment (`_build_safe_env()` in `tools/mcp_tool.py`) — only safe baseline variables (`PATH`, `HOME`, `XDG_*`) plus variables explicitly declared in the server's `env` config block are passed through. Host credentials are stripped by default. Additionally, packages invoked via `npx`/`uvx` are checked against the OSV malware database before spawning.
+
+### Code Execution Sandbox
+The `execute_code` tool (`tools/code_execution_tool.py`) runs LLM-generated Python scripts in a child process with API keys and tokens stripped from the environment to prevent credential exfiltration. Only environment variables explicitly declared by loaded skills (via `env_passthrough`) or by the user in `config.yaml` (`terminal.env_passthrough`) are passed through. The child accesses Hermes tools via RPC, not direct API calls.
+
+### Subagents
+- **No recursive delegation:** The `delegate_task` tool is disabled for child agents.
+- **Depth limit:** `MAX_DEPTH = 2` — parent (depth 0) can spawn a child (depth 1); grandchildren are rejected.
+- **Memory isolation:** Subagents run with `skip_memory=True` and do not have access to the parent's persistent memory provider. The parent receives only the task prompt and final response as an observation.
+
+---
+
+## 3. Out of Scope (Non-Vulnerabilities)
+
+The following scenarios are **not** considered security breaches:
+- **Prompt Injection:** Unless it results in a concrete bypass of the approval system, toolset restrictions, or container sandbox.
+- **Public Exposure:** Deploying the gateway to the public internet without external authentication or network protection.
+- **Trusted State Access:** Reports that require pre-existing write access to `~/.hermes/`, `.env`, or `config.yaml` (these are operator-owned files).
+- **Default Behavior:** Host-level command execution when `terminal.backend` is set to `local` — this is the documented default, not a vulnerability.
+- **Configuration Trade-offs:** Intentional break-glass settings such as `approvals.mode: "off"` or `terminal.backend: local` in production.
+- **Tool-level read/access restrictions:** The agent has unrestricted shell access via the `terminal` tool by design. Reports that a specific tool (e.g., `read_file`) can access a resource are not vulnerabilities if the same access is available through `terminal`. Tool-level deny lists only constitute a meaningful security boundary when paired with equivalent restrictions on the terminal side (as with write operations, where `WRITE_DENIED_PATHS` is paired with the dangerous command approval system).
+
+---
+
+## 4. Deployment Hardening & Best Practices
+
+### Filesystem & Network
+- **Production sandboxing:** Use container backends (`docker`, `modal`, `daytona`) instead of `local` for untrusted workloads.
+- **File permissions:** Run as non-root (the Docker image uses UID 10000); protect credentials with `chmod 600 ~/.hermes/.env` on local installs.
+- **Network exposure:** Do not expose the gateway or API server to the public internet without VPN, Tailscale, or firewall protection. SSRF protection is enabled by default across all gateway platform adapters (Telegram, Discord, Slack, Matrix, Mattermost, etc.) with redirect validation. Note: the local terminal backend does not apply SSRF filtering, as it operates within the trusted operator's environment.
+
+### Skills & Supply Chain
+- **Skill installation:** Review Skills Guard reports (`tools/skills_guard.py`) before installing third-party skills. The audit log at `~/.hermes/skills/.hub/audit.log` tracks every install and removal.
+- **MCP safety:** OSV malware checking runs automatically for `npx`/`uvx` packages before MCP server processes are spawned.
+- **CI/CD:** GitHub Actions are pinned to full commit SHAs. The `supply-chain-audit.yml` workflow blocks PRs containing `.pth` files or suspicious `base64`+`exec` patterns.
+
+### Credential Storage
+- API keys and tokens belong exclusively in `~/.hermes/.env` — never in `config.yaml` or checked into version control.
+- The credential pool system (`agent/credential_pool.py`) handles key rotation and fallback. Credentials are resolved from environment variables, not stored in plaintext databases.
+
+---
+
+## 5. Disclosure Process
+
+- **Coordinated Disclosure:** 90-day window or until a fix is released, whichever comes first.
+- **Communication:** All updates occur via the GHSA thread or email correspondence with security@nousresearch.com.
+- **Credits:** Reporters are credited in release notes unless anonymity is requested.
@@ -28,19 +28,45 @@ except ImportError:
 logger = logging.getLogger(__name__)

 THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
+# Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
+# Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
+# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
+# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
+# agentic work) and downgrade it to max on pre-4.7 adaptive models (which
+# is the strongest level they accept).  "minimal" is a legacy alias that
+# maps to low on every model.  See:
+# https://platform.claude.com/docs/en/about-claude/models/migration-guide
 ADAPTIVE_EFFORT_MAP = {
-    "xhigh": "max",
-    "high": "high",
-    "medium": "medium",
-    "low": "low",
+    "max":     "max",
+    "xhigh":   "xhigh",
+    "high":    "high",
+    "medium":  "medium",
+    "low":     "low",
    "minimal": "low",
 }

+# Models that accept the "xhigh" output_config.effort level.  Opus 4.7 added
+# xhigh as a distinct level between high and max; older adaptive-thinking
+# models (4.6) reject it with a 400.  Keep this substring list in sync with
+# the Anthropic migration guide as new model families ship.
+_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
+
+# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
+# is the only supported mode; 4.7 additionally forbids manual thinking entirely
+# and drops temperature/top_p/top_k).
+_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
+
+# Models where temperature/top_p/top_k return 400 if set to non-default values.
+# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
+_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
+
 # ── Max output token limits per Anthropic model ───────────────────────
 # Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
 # max_tokens as a mandatory field.  Previously we hardcoded 16384, which
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
+    # Claude 4.7
+    "claude-opus-4-7":   128_000,
    # Claude 4.6
    "claude-opus-4-6":   128_000,
    "claude-sonnet-4-6":  64_000,
@@ -91,11 +117,37 @@ def _get_anthropic_max_output(model: str) -> int:


 def _supports_adaptive_thinking(model: str) -> bool:
-    """Return True for Claude 4.6 models that support adaptive thinking."""
-    return any(v in model for v in ("4-6", "4.6"))
+    """Return True for Claude 4.6+ models that support adaptive thinking."""
+    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)


-# Beta headers for enhanced features (sent with ALL auth types)
+def _supports_xhigh_effort(model: str) -> bool:
+    """Return True for models that accept the 'xhigh' adaptive effort level.
+
+    Opus 4.7 introduced xhigh as a distinct level between high and max.
+    Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
+    and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
+    when this returns False.
+    """
+    return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
+
+
+def _forbids_sampling_params(model: str) -> bool:
+    """Return True for models that 400 on any non-default temperature/top_p/top_k.
+
+    Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
+    expected to follow suit.  Callers should omit these fields entirely rather
+    than passing zero/default values (the API rejects anything non-null).
+    """
+    return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
+
+
+# Beta headers for enhanced features (sent with ALL auth types).
+# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
+# beta headers are still accepted (harmless no-op) but not required. Kept
+# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
+# that still gate on the headers continue to get the enhanced features.
+# Migration guide: remove these if you no longer support ≤4.5 models.
 _COMMON_BETAS = [
    "interleaved-thinking-2025-05-14",
    "fine-grained-tool-streaming-2025-05-14",
@@ -298,6 +350,33 @@ def build_anthropic_client(api_key: str, base_url: str = None):
    return _anthropic_sdk.Anthropic(**kwargs)


+def build_anthropic_bedrock_client(region: str):
+    """Create an AnthropicBedrock client for Bedrock Claude models.
+
+    Uses the Anthropic SDK's native Bedrock adapter, which provides full
+    Claude feature parity: prompt caching, thinking budgets, adaptive
+    thinking, fast mode — features not available via the Converse API.
+
+    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
+    """
+    if _anthropic_sdk is None:
+        raise ImportError(
+            "The 'anthropic' package is required for the Bedrock provider. "
+            "Install it with: pip install 'anthropic>=0.39.0'"
+        )
+    if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
+        raise ImportError(
+            "anthropic.AnthropicBedrock not available. "
+            "Upgrade with: pip install 'anthropic>=0.39.0'"
+        )
+    from httpx import Timeout
+
+    return _anthropic_sdk.AnthropicBedrock(
+        aws_region=region,
+        timeout=Timeout(timeout=900.0, connect=10.0),
+    )
+
+
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
    """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.

@@ -1230,9 +1309,10 @@ def build_anthropic_kwargs(
    When *base_url* points to a third-party Anthropic-compatible endpoint,
    thinking block signatures are stripped (they are Anthropic-proprietary).

-    When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta
-    header for ~2.5x faster output throughput on Opus 4.6.  Currently only
-    supported on native Anthropic endpoints (not third-party compatible ones).
+    When *fast_mode* is True, adds ``extra_body["speed"] = "fast"`` and the
+    fast-mode beta header for ~2.5x faster output throughput on Opus 4.6.
+    Currently only supported on native Anthropic endpoints (not third-party
+    compatible ones).
    """
    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
@@ -1313,18 +1393,31 @@ def build_anthropic_kwargs(
            kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}

    # Map reasoning_config to Anthropic's thinking parameter.
-    # Claude 4.6 models use adaptive thinking + output_config.effort.
+    # Claude 4.6+ models use adaptive thinking + output_config.effort.
    # Older models use manual thinking with budget_tokens.
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
+    #
+    # On 4.7+ the `thinking.display` field defaults to "omitted", which
+    # silently hides reasoning text that Hermes surfaces in its CLI. We
+    # request "summarized" so the reasoning blocks stay populated — matching
+    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
    if reasoning_config and isinstance(reasoning_config, dict):
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
            if _supports_adaptive_thinking(model):
-                kwargs["thinking"] = {"type": "adaptive"}
+                kwargs["thinking"] = {
+                    "type": "adaptive",
+                    "display": "summarized",
+                }
+                adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
+                # Downgrade xhigh→max on models that don't list xhigh as a
+                # supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
+                if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
+                    adaptive_effort = "max"
                kwargs["output_config"] = {
-                    "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium")
+                    "effort": adaptive_effort,
                }
            else:
                kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
@@ -1332,12 +1425,21 @@ def build_anthropic_kwargs(
                kwargs["temperature"] = 1
                kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)

+    # ── Strip sampling params on 4.7+ ─────────────────────────────────
+    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
+    # Callers (auxiliary_client, flush_memories, etc.) may set these for
+    # older models; drop them here as a safety net so upstream 4.6 → 4.7
+    # migrations don't require coordinated edits everywhere.
+    if _forbids_sampling_params(model):
+        for _sampling_key in ("temperature", "top_p", "top_k"):
+            kwargs.pop(_sampling_key, None)
+
    # ── Fast mode (Opus 4.6 only) ────────────────────────────────────
-    # Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed.
-    # Only for native Anthropic endpoints — third-party providers would
-    # reject the unknown beta header and speed parameter.
+    # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
+    # output speed. Only for native Anthropic endpoints — third-party
+    # providers would reject the unknown beta header and speed parameter.
    if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
-        kwargs["speed"] = "fast"
+        kwargs.setdefault("extra_body", {})["speed"] = "fast"
        # Build extra_headers with ALL applicable betas (the per-request
        # extra_headers override the client-level anthropic-beta header).
        betas = list(_common_betas_for_base_url(base_url))
@@ -1389,12 +1491,20 @@ def normalize_anthropic_response(
                )
            )

-    # Map Anthropic stop_reason to OpenAI finish_reason
+    # Map Anthropic stop_reason to OpenAI finish_reason.
+    # Newer stop reasons added in Claude 4.5+ / 4.7:
+    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
+    #   - model_context_window_exceeded: hit context limit (not max_tokens)
+    # Both need distinct handling upstream — a refusal should surface to the
+    # user with a clear message, and a context-window overflow should trigger
+    # compression/truncation rather than be treated as normal end-of-turn.
    stop_reason_map = {
        "end_turn": "stop",
        "tool_use": "tool_calls",
        "max_tokens": "length",
        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
    }
    finish_reason = stop_reason_map.get(response.stop_reason, "stop")

@@ -58,6 +58,9 @@ _PROVIDER_ALIASES = {
    "google": "gemini",
    "google-gemini": "gemini",
    "google-ai-studio": "gemini",
+    "x-ai": "xai",
+    "x.ai": "xai",
+    "grok": "xai",
    "glm": "zai",
    "z-ai": "zai",
    "z.ai": "zai",
@@ -104,6 +107,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "opencode-zen": "gemini-3-flash",
    "opencode-go": "glm-5",
    "kilocode": "google/gemini-3-flash-preview",
+    "ollama-cloud": "nemotron-3-nano:30b",
 }

 # Vision-specific model overrides for direct providers.
@@ -112,6 +116,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
 # "exotic provider" branch checks this before falling back to the main model.
 _PROVIDER_VISION_MODELS: Dict[str, str] = {
    "xiaomi": "mimo-v2-omni",
+    "zai": "glm-5v-turbo",
 }

 # OpenRouter app attribution headers
@@ -513,8 +518,13 @@ class _AnthropicCompletionsAdapter:
            tool_choice=normalized_tool_choice,
            is_oauth=self._is_oauth,
        )
+        # Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set
+        # temperature for models that still accept it. build_anthropic_kwargs
+        # additionally strips these keys as a safety net — keep both layers.
        if temperature is not None:
-            anthropic_kwargs["temperature"] = temperature
+            from agent.anthropic_adapter import _forbids_sampling_params
+            if not _forbids_sampling_params(model):
+                anthropic_kwargs["temperature"] = temperature

        response = self._client.messages.create(**anthropic_kwargs)
        assistant_message, finish_reason = normalize_anthropic_response(response)
@@ -774,6 +784,21 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:


 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
+    # Check cross-session rate limit guard before attempting Nous —
+    # if another session already recorded a 429, skip Nous entirely
+    # to avoid piling more requests onto the tapped RPH bucket.
+    try:
+        from agent.nous_rate_guard import nous_rate_limit_remaining
+        _remaining = nous_rate_limit_remaining()
+        if _remaining is not None and _remaining > 0:
+            logger.debug(
+                "Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
+                _remaining,
+            )
+            return None, None
+    except Exception:
+        pass
+
    nous = _read_nous_auth()
    if not nous:
        return None, None
@@ -898,6 +923,51 @@ def _current_custom_base_url() -> str:
    return custom_base or ""


+def _validate_proxy_env_urls() -> None:
+    """Fail fast with a clear error when proxy env vars have malformed URLs.
+
+    Common cause: shell config (e.g. .zshrc) with a typo like
+    ``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...``
+    which concatenates 'export' into the port number.  Without this
+    check the OpenAI/httpx client raises a cryptic ``Invalid port``
+    error that doesn't name the offending env var.
+    """
+    from urllib.parse import urlparse
+
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        value = str(os.environ.get(key) or "").strip()
+        if not value:
+            continue
+        try:
+            parsed = urlparse(value)
+            if parsed.scheme:
+                _ = parsed.port          # raises ValueError for e.g. '6153export'
+        except ValueError as exc:
+            raise RuntimeError(
+                f"Malformed proxy environment variable {key}={value!r}. "
+                "Fix or unset your proxy settings and try again."
+            ) from exc
+
+
+def _validate_base_url(base_url: str) -> None:
+    """Reject obviously broken custom endpoint URLs before they reach httpx."""
+    from urllib.parse import urlparse
+
+    candidate = str(base_url or "").strip()
+    if not candidate or candidate.startswith("acp://"):
+        return
+    try:
+        parsed = urlparse(candidate)
+        if parsed.scheme in {"http", "https"}:
+            _ = parsed.port              # raises ValueError for malformed ports
+    except ValueError as exc:
+        raise RuntimeError(
+            f"Malformed custom endpoint URL: {candidate!r}. "
+            "Run `hermes setup` or `hermes model` and enter a valid http(s) base URL."
+        ) from exc
+
+
 def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
    runtime = _resolve_custom_runtime()
    if len(runtime) == 2:
@@ -1298,6 +1368,7 @@ def resolve_provider_client(
    Returns:
        (client, resolved_model) or (None, None) if auth is unavailable.
    """
+    _validate_proxy_env_urls()
    # Normalise aliases
    provider = _normalize_aux_provider(provider)

@@ -1834,9 +1905,15 @@ def auxiliary_max_tokens_param(value: int) -> dict:
 # Every auxiliary LLM consumer should use these instead of manually
 # constructing clients and calling .chat.completions.create().

-# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
+# Client cache: (provider, async_mode, base_url, api_key, api_mode, runtime_key) -> (client, default_model, loop)
+# NOTE: loop identity is NOT part of the key.  On async cache hits we check
+# whether the cached loop is the *current* loop; if not, the stale entry is
+# replaced in-place.  This bounds cache growth to one entry per unique
+# provider config rather than one per (config × event-loop), which previously
+# caused unbounded fd accumulation in long-running gateway processes (#10200).
 _client_cache: Dict[tuple, tuple] = {}
 _client_cache_lock = threading.Lock()
+_CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded


 def neuter_async_httpx_del() -> None:
@@ -1969,39 +2046,49 @@ def _get_cached_client(
    Async clients (AsyncOpenAI) use httpx.AsyncClient internally, which
    binds to the event loop that was current when the client was created.
    Using such a client on a *different* loop causes deadlocks or
-    RuntimeError.  To prevent cross-loop issues (especially in gateway
-    mode where _run_async() may spawn fresh loops in worker threads), the
-    cache key for async clients includes the current event loop's identity
-    so each loop gets its own client instance.
+    RuntimeError.  To prevent cross-loop issues, the cache validates on
+    every async hit that the cached loop is the *current, open* loop.
+    If the loop changed (e.g. a new gateway worker-thread loop), the stale
+    entry is replaced in-place rather than creating an additional entry.
+
+    This keeps cache size bounded to one entry per unique provider config,
+    preventing the fd-exhaustion that previously occurred in long-running
+    gateways where recycled worker threads created unbounded entries (#10200).
    """
-    # Include loop identity for async clients to prevent cross-loop reuse.
-    # httpx.AsyncClient (inside AsyncOpenAI) is bound to the loop where it
-    # was created — reusing it on a different loop causes deadlocks (#2681).
-    loop_id = 0
+    # Resolve the current event loop for async clients so we can validate
+    # cached entries.  Loop identity is NOT in the cache key — instead we
+    # check at hit time whether the cached loop is still current and open.
+    # This prevents unbounded cache growth from recycled worker-thread loops
+    # while still guaranteeing we never reuse a client on the wrong loop
+    # (which causes deadlocks, see #2681).
    current_loop = None
    if async_mode:
        try:
            import asyncio as _aio
            current_loop = _aio.get_event_loop()
-            loop_id = id(current_loop)
        except RuntimeError:
            pass
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key)
+    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
            if async_mode:
-                # A cached async client whose loop has been closed will raise
-                # "Event loop is closed" when httpx tries to clean up its
-                # transport.  Discard the stale client and create a fresh one.
-                if cached_loop is not None and cached_loop.is_closed():
-                    _force_close_async_httpx(cached_client)
-                    del _client_cache[cache_key]
-                else:
+                # Validate: the cached client must be bound to the CURRENT,
+                # OPEN loop.  If the loop changed or was closed, the httpx
+                # transport inside is dead — force-close and replace.
+                loop_ok = (
+                    cached_loop is not None
+                    and cached_loop is current_loop
+                    and not cached_loop.is_closed()
+                )
+                if loop_ok:
                    effective = _compat_model(cached_client, model, cached_default)
                    return cached_client, effective
+                # Stale — evict and fall through to create a new client.
+                _force_close_async_httpx(cached_client)
+                del _client_cache[cache_key]
            else:
                effective = _compat_model(cached_client, model, cached_default)
                return cached_client, effective
@@ -2021,6 +2108,12 @@ def _get_cached_client(
        bound_loop = current_loop
        with _client_cache_lock:
            if cache_key not in _client_cache:
+                # Safety belt: if the cache has grown beyond the max, evict
+                # the oldest entries (FIFO — dict preserves insertion order).
+                while len(_client_cache) >= _CLIENT_CACHE_MAX_SIZE:
+                    evict_key, evict_entry = next(iter(_client_cache.items()))
+                    _force_close_async_httpx(evict_entry[0])
+                    del _client_cache[evict_key]
                _client_cache[cache_key] = (client, default_model, bound_loop)
            else:
                client, default_model, _ = _client_cache[cache_key]
@@ -2200,6 +2293,15 @@ def _build_call_kwargs(
        "timeout": timeout,
    }

+    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
+    # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
+    # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
+    # the aux model is flipped to 4.7.
+    if temperature is not None:
+        from agent.anthropic_adapter import _forbids_sampling_params
+        if _forbids_sampling_params(model):
+            temperature = None
+
    if temperature is not None:
        kwargs["temperature"] = temperature

@@ -2303,10 +2405,10 @@ def call_llm(

    if task == "vision":
        effective_provider, client, final_model = resolve_vision_provider_client(
-            provider=provider,
-            model=model,
-            base_url=base_url,
-            api_key=api_key,
+            provider=resolved_provider if resolved_provider != "auto" else provider,
+            model=resolved_model or model,
+            base_url=resolved_base_url or base_url,
+            api_key=resolved_api_key or api_key,
            async_mode=False,
        )
        if client is None and resolved_provider != "auto" and not resolved_base_url:
@@ -2511,10 +2613,10 @@ async def async_call_llm(

    if task == "vision":
        effective_provider, client, final_model = resolve_vision_provider_client(
-            provider=provider,
-            model=model,
-            base_url=base_url,
-            api_key=api_key,
+            provider=resolved_provider if resolved_provider != "auto" else provider,
+            model=resolved_model or model,
+            base_url=resolved_base_url or base_url,
+            api_key=resolved_api_key or api_key,
            async_mode=True,
        )
        if client is None and resolved_provider != "auto" and not resolved_base_url:
@@ -17,7 +17,10 @@ Improvements over v2:
  - Richer tool call/result detail in summarizer input
 """

+import hashlib
+import json
 import logging
+import re
 import time
 from typing import Any, Dict, List, Optional

@@ -36,7 +39,10 @@ SUMMARY_PREFIX = (
    "into the summary below. This is a handoff from a previous context "
    "window — treat it as background reference, NOT as active instructions. "
    "Do NOT answer questions or fulfill requests mentioned in this summary; "
-    "they were already addressed. Respond ONLY to the latest user message "
+    "they were already addressed. "
+    "Your current task is identified in the '## Active Task' section of the "
+    "summary — resume exactly from there. "
+    "Respond ONLY to the latest user message "
    "that appears AFTER this summary. The current session state (files, "
    "config, etc.) may reflect work described here — avoid repeating it:"
 )
@@ -57,6 +63,128 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
+    """Create an informative 1-line summary of a tool call + result.
+
+    Used during the pre-compression pruning pass to replace large tool
+    outputs with a short but useful description of what the tool did,
+    rather than a generic placeholder that carries zero information.
+
+    Returns strings like::
+
+        [terminal] ran `npm test` -> exit 0, 47 lines output
+        [read_file] read config.py from line 1 (1,200 chars)
+        [search_files] content search for 'compress' in agent/ -> 12 matches
+    """
+    try:
+        args = json.loads(tool_args) if tool_args else {}
+    except (json.JSONDecodeError, TypeError):
+        args = {}
+
+    content = tool_content or ""
+    content_len = len(content)
+    line_count = content.count("\n") + 1 if content.strip() else 0
+
+    if tool_name == "terminal":
+        cmd = args.get("command", "")
+        if len(cmd) > 80:
+            cmd = cmd[:77] + "..."
+        exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
+        exit_code = exit_match.group(1) if exit_match else "?"
+        return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
+
+    if tool_name == "read_file":
+        path = args.get("path", "?")
+        offset = args.get("offset", 1)
+        return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
+
+    if tool_name == "write_file":
+        path = args.get("path", "?")
+        written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
+        return f"[write_file] wrote to {path} ({written_lines} lines)"
+
+    if tool_name == "search_files":
+        pattern = args.get("pattern", "?")
+        path = args.get("path", ".")
+        target = args.get("target", "content")
+        match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
+        count = match_count.group(1) if match_count else "?"
+        return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
+
+    if tool_name == "patch":
+        path = args.get("path", "?")
+        mode = args.get("mode", "replace")
+        return f"[patch] {mode} in {path} ({content_len:,} chars result)"
+
+    if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
+                     "browser_type", "browser_scroll", "browser_vision"):
+        url = args.get("url", "")
+        ref = args.get("ref", "")
+        detail = f" {url}" if url else (f" ref={ref}" if ref else "")
+        return f"[{tool_name}]{detail} ({content_len:,} chars)"
+
+    if tool_name == "web_search":
+        query = args.get("query", "?")
+        return f"[web_search] query='{query}' ({content_len:,} chars result)"
+
+    if tool_name == "web_extract":
+        urls = args.get("urls", [])
+        url_desc = urls[0] if isinstance(urls, list) and urls else "?"
+        if isinstance(urls, list) and len(urls) > 1:
+            url_desc += f" (+{len(urls) - 1} more)"
+        return f"[web_extract] {url_desc} ({content_len:,} chars)"
+
+    if tool_name == "delegate_task":
+        goal = args.get("goal", "")
+        if len(goal) > 60:
+            goal = goal[:57] + "..."
+        return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
+
+    if tool_name == "execute_code":
+        code_preview = (args.get("code") or "")[:60].replace("\n", " ")
+        if len(args.get("code", "")) > 60:
+            code_preview += "..."
+        return f"[execute_code] `{code_preview}` ({line_count} lines output)"
+
+    if tool_name in ("skill_view", "skills_list", "skill_manage"):
+        name = args.get("name", "?")
+        return f"[{tool_name}] name={name} ({content_len:,} chars)"
+
+    if tool_name == "vision_analyze":
+        question = args.get("question", "")[:50]
+        return f"[vision_analyze] '{question}' ({content_len:,} chars)"
+
+    if tool_name == "memory":
+        action = args.get("action", "?")
+        target = args.get("target", "?")
+        return f"[memory] {action} on {target}"
+
+    if tool_name == "todo":
+        return "[todo] updated task list"
+
+    if tool_name == "clarify":
+        return "[clarify] asked user a question"
+
+    if tool_name == "text_to_speech":
+        return f"[text_to_speech] generated audio ({content_len:,} chars)"
+
+    if tool_name == "cronjob":
+        action = args.get("action", "?")
+        return f"[cronjob] {action}"
+
+    if tool_name == "process":
+        action = args.get("action", "?")
+        sid = args.get("session_id", "?")
+        return f"[process] {action} session={sid}"
+
+    # Generic fallback
+    first_arg = ""
+    for k, v in list(args.items())[:2]:
+        sv = str(v)[:40]
+        first_arg += f" {k}={sv}"
+    return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
+
+
 class ContextCompressor(ContextEngine):
    """Default context engine — compresses conversation context via lossy summarization.

@@ -78,6 +206,8 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_compression_savings_pct = 100.0
+        self._ineffective_compression_count = 0

    def update_model(
        self,
@@ -167,6 +297,9 @@ class ContextCompressor(ContextEngine):

        # Stores the previous compaction summary for iterative updates
        self._previous_summary: Optional[str] = None
+        # Anti-thrashing: track whether last compression was effective
+        self._last_compression_savings_pct: float = 100.0
+        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0

    def update_from_response(self, usage: Dict[str, Any]):
@@ -175,9 +308,26 @@ class ContextCompressor(ContextEngine):
        self.last_completion_tokens = usage.get("completion_tokens", 0)

    def should_compress(self, prompt_tokens: int = None) -> bool:
-        """Check if context exceeds the compression threshold."""
+        """Check if context exceeds the compression threshold.
+
+        Includes anti-thrashing protection: if the last two compressions
+        each saved less than 10%, skip compression to avoid infinite loops
+        where each pass removes only 1-2 messages.
+        """
        tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
-        return tokens >= self.threshold_tokens
+        if tokens < self.threshold_tokens:
+            return False
+        # Anti-thrashing: back off if recent compressions were ineffective
+        if self._ineffective_compression_count >= 2:
+            if not self.quiet_mode:
+                logger.warning(
+                    "Compression skipped — last %d compressions saved <10%% each. "
+                    "Consider /new to start a fresh session, or /compress <topic> "
+                    "for focused compression.",
+                    self._ineffective_compression_count,
+                )
+            return False
+        return True

    # ------------------------------------------------------------------
    # Tool output pruning (cheap pre-pass, no LLM call)
@@ -187,7 +337,16 @@ class ContextCompressor(ContextEngine):
        self, messages: List[Dict[str, Any]], protect_tail_count: int,
        protect_tail_tokens: int | None = None,
    ) -> tuple[List[Dict[str, Any]], int]:
-        """Replace old tool result contents with a short placeholder.
+        """Replace old tool result contents with informative 1-line summaries.
+
+        Instead of a generic placeholder, generates a summary like::
+
+            [terminal] ran `npm test` -> exit 0, 47 lines output
+            [read_file] read config.py from line 1 (3,400 chars)
+
+        Also deduplicates identical tool results (e.g. reading the same file
+        5x keeps only the newest full copy) and truncates large tool_call
+        arguments in assistant messages outside the protected tail.

        Walks backward from the end, protecting the most recent messages that
        fall within ``protect_tail_tokens`` (when provided) OR the last
@@ -203,6 +362,22 @@ class ContextCompressor(ContextEngine):
        result = [m.copy() for m in messages]
        pruned = 0

+        # Build index: tool_call_id -> (tool_name, arguments_json)
+        call_id_to_tool: Dict[str, tuple] = {}
+        for msg in result:
+            if msg.get("role") == "assistant":
+                for tc in msg.get("tool_calls") or []:
+                    if isinstance(tc, dict):
+                        cid = tc.get("id", "")
+                        fn = tc.get("function", {})
+                        call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
+                    else:
+                        cid = getattr(tc, "id", "") or ""
+                        fn = getattr(tc, "function", None)
+                        name = getattr(fn, "name", "unknown") if fn else "unknown"
+                        args_str = getattr(fn, "arguments", "") if fn else ""
+                        call_id_to_tool[cid] = (name, args_str)
+
        # Determine the prune boundary
        if protect_tail_tokens is not None and protect_tail_tokens > 0:
            # Token-budget approach: walk backward accumulating tokens
@@ -211,7 +386,8 @@ class ContextCompressor(ContextEngine):
            min_protect = min(protect_tail_count, len(result) - 1)
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
-                content_len = len(msg.get("content") or "")
+                raw_content = msg.get("content") or ""
+                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -226,18 +402,69 @@ class ContextCompressor(ContextEngine):
        else:
            prune_boundary = len(result) - protect_tail_count

+        # Pass 1: Deduplicate identical tool results.
+        # When the same file is read multiple times, keep only the most recent
+        # full copy and replace older duplicates with a back-reference.
+        content_hashes: dict = {}  # hash -> (index, tool_call_id)
+        for i in range(len(result) - 1, -1, -1):
+            msg = result[i]
+            if msg.get("role") != "tool":
+                continue
+            content = msg.get("content") or ""
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
+            if len(content) < 200:
+                continue
+            h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
+            if h in content_hashes:
+                # This is an older duplicate — replace with back-reference
+                result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
+                pruned += 1
+            else:
+                content_hashes[h] = (i, msg.get("tool_call_id", "?"))
+
+        # Pass 2: Replace old tool results with informative summaries
        for i in range(prune_boundary):
            msg = result[i]
            if msg.get("role") != "tool":
                continue
            content = msg.get("content", "")
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
            if not content or content == _PRUNED_TOOL_PLACEHOLDER:
                continue
+            # Skip already-deduplicated or previously-summarized results
+            if content.startswith("[Duplicate tool output"):
+                continue
            # Only prune if the content is substantial (>200 chars)
            if len(content) > 200:
-                result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
+                call_id = msg.get("tool_call_id", "")
+                tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
+                summary = _summarize_tool_result(tool_name, tool_args, content)
+                result[i] = {**msg, "content": summary}
                pruned += 1

+        # Pass 3: Truncate large tool_call arguments in assistant messages
+        # outside the protected tail. write_file with 50KB content, for
+        # example, survives pruning entirely without this.
+        for i in range(prune_boundary):
+            msg = result[i]
+            if msg.get("role") != "assistant" or not msg.get("tool_calls"):
+                continue
+            new_tcs = []
+            modified = False
+            for tc in msg["tool_calls"]:
+                if isinstance(tc, dict):
+                    args = tc.get("function", {}).get("arguments", "")
+                    if len(args) > 500:
+                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
+                        modified = True
+                new_tcs.append(tc)
+            if modified:
+                result[i] = {**msg, "tool_calls": new_tcs}
+
        return result, pruned

    # ------------------------------------------------------------------
@@ -357,29 +584,45 @@ class ContextCompressor(ContextEngine):
        )

        # Shared structured template (used by both paths).
-        # Key changes vs v1:
-        #   - "Pending User Asks" section (from Claude Code) explicitly tracks
-        #     unanswered questions so the model knows what's resolved vs open
-        #   - "Remaining Work" replaces "Next Steps" to avoid reading as active
-        #     instructions
-        #   - "Resolved Questions" makes it clear which questions were already
-        #     answered (prevents model from re-answering them)
-        _template_sections = f"""## Goal
-[What the user is trying to accomplish]
+        _template_sections = f"""## Active Task
+[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
+task assignment verbatim — the exact words they used. If multiple tasks
+were requested and only some are done, list only the ones NOT yet completed.
+The next assistant must pick up exactly here. Example:
+"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
+If no outstanding task exists, write "None."]
+
+## Goal
+[What the user is trying to accomplish overall]

 ## Constraints & Preferences
 [User preferences, coding style, constraints, important decisions]

-## Progress
-### Done
-[Completed work — include specific file paths, commands run, results obtained]
-### In Progress
-[Work currently underway]
-### Blocked
-[Any blockers or issues encountered]
+## Completed Actions
+[Numbered list of concrete actions taken — include tool used, target, and outcome.
+Format each as: N. ACTION target — outcome [tool: name]
+Example:
+1. READ config.py:45 — found `==` should be `!=` [tool: read_file]
+2. PATCH config.py:45 — changed `==` to `!=` [tool: patch]
+3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
+Be specific with file paths, commands, line numbers, and results.]
+
+## Active State
+[Current working state — include:
+- Working directory and branch (if applicable)
+- Modified/created files with brief note on each
+- Test status (X/Y passing)
+- Any running processes or servers
+- Environment details that matter]
+
+## In Progress
+[Work currently underway — what was being done when compaction fired]
+
+## Blocked
+[Any blockers, errors, or issues not yet resolved. Include exact error messages.]

 ## Key Decisions
-[Important technical decisions and why they were made]
+[Important technical decisions and WHY they were made]

 ## Resolved Questions
 [Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
@@ -396,10 +639,7 @@ class ContextCompressor(ContextEngine):
 ## Critical Context
 [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]

-## Tools & Patterns
-[Which tools were used, how they were used effectively, and any tool-specific discoveries]
-
-Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
+Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

 Write only the summary body. Do not include any preamble or prefix."""

@@ -415,7 +655,7 @@ PREVIOUS SUMMARY:
 NEW TURNS TO INCORPORATE:
 {content_to_summarize}

-Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
+Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.

 {_template_sections}"""
        else:
@@ -450,7 +690,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "api_mode": self.api_mode,
                },
                "messages": [{"role": "user", "content": prompt}],
-                "max_tokens": summary_budget * 2,
+                "max_tokens": int(summary_budget * 1.3),
                # timeout resolved from auxiliary.compression.timeout config by call_llm
            }
            if self.summary_model:
@@ -464,8 +704,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
+            self._summary_model_fallen_back = False
            return self._with_summary_prefix(summary)
        except RuntimeError:
+            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
@@ -473,12 +715,42 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
            return None
        except Exception as e:
-            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            # If the summary model is different from the main model and the
+            # error looks permanent (model not found, 503, 404), fall back to
+            # using the main model instead of entering cooldown that leaves
+            # context growing unbounded.  (#8620 sub-issue 4)
+            _status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
+            _err_str = str(e).lower()
+            _is_model_not_found = (
+                _status in (404, 503)
+                or "model_not_found" in _err_str
+                or "does not exist" in _err_str
+                or "no available channel" in _err_str
+            )
+            if (
+                _is_model_not_found
+                and self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' not available (%s). "
+                    "Falling back to main model '%s' for compression.",
+                    self.summary_model, e, self.model,
+                )
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0  # no cooldown
+                return self._generate_summary(messages, summary_budget)  # retry immediately
+
+            # Transient errors (timeout, rate limit, network) — shorter cooldown
+            _transient_cooldown = 60
+            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
                e,
-                _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+                _transient_cooldown,
            )
            return None

@@ -601,6 +873,62 @@ The user has requested that this compaction PRIORITISE preserving all informatio
    # Tail protection by token budget
    # ------------------------------------------------------------------

+    def _find_last_user_message_idx(
+        self, messages: List[Dict[str, Any]], head_end: int
+    ) -> int:
+        """Return the index of the last user-role message at or after *head_end*, or -1."""
+        for i in range(len(messages) - 1, head_end - 1, -1):
+            if messages[i].get("role") == "user":
+                return i
+        return -1
+
+    def _ensure_last_user_message_in_tail(
+        self,
+        messages: List[Dict[str, Any]],
+        cut_idx: int,
+        head_end: int,
+    ) -> int:
+        """Guarantee the most recent user message is in the protected tail.
+
+        Context compressor bug (#10896): ``_align_boundary_backward`` can pull
+        ``cut_idx`` past a user message when it tries to keep tool_call/result
+        groups together.  If the last user message ends up in the *compressed*
+        middle region the LLM summariser writes it into "Pending User Asks",
+        but ``SUMMARY_PREFIX`` tells the next model to respond only to user
+        messages *after* the summary — so the task effectively disappears from
+        the active context, causing the agent to stall, repeat completed work,
+        or silently drop the user's latest request.
+
+        Fix: if the last user-role message is not already in the tail
+        (``messages[cut_idx:]``), walk ``cut_idx`` back to include it.  We
+        then re-align backward one more time to avoid splitting any
+        tool_call/result group that immediately precedes the user message.
+        """
+        last_user_idx = self._find_last_user_message_idx(messages, head_end)
+        if last_user_idx < 0:
+            # No user message found beyond head — nothing to anchor.
+            return cut_idx
+
+        if last_user_idx >= cut_idx:
+            # Already in the tail; nothing to do.
+            return cut_idx
+
+        # The last user message is in the middle (compressed) region.
+        # Pull cut_idx back to it directly — a user message is already a
+        # clean boundary (no tool_call/result splitting risk), so there is no
+        # need to call _align_boundary_backward here; doing so would
+        # unnecessarily pull the cut further back into the preceding
+        # assistant + tool_calls group.
+        if not self.quiet_mode:
+            logger.debug(
+                "Anchoring tail cut to last user message at index %d "
+                "(was %d) to prevent active-task loss after compression",
+                last_user_idx,
+                cut_idx,
+            )
+        # Safety: never go back into the head region.
+        return max(last_user_idx, head_end + 1)
+
    def _find_tail_cut_by_tokens(
        self, messages: List[Dict[str, Any]], head_end: int,
        token_budget: int | None = None,
@@ -618,7 +946,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        read, etc.).  If even the minimum 3 messages exceed 1.5x the budget
        the cut is placed right after the head so compression still runs.

-        Never cuts inside a tool_call/result group.
+        Never cuts inside a tool_call/result group.  Always ensures the most
+        recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
        """
        if token_budget is None:
            token_budget = self.tail_token_budget
@@ -657,6 +986,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        # Align to avoid splitting tool groups
        cut_idx = self._align_boundary_backward(messages, cut_idx)

+        # Ensure the most recent user message is always in the tail so the
+        # active task is never lost to compression (fixes #10896).
+        cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)
+
        return max(cut_idx, head_end + 1)

    # ------------------------------------------------------------------
@@ -744,11 +1077,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        compressed = []
        for i in range(compress_start):
            msg = messages[i].copy()
-            if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
-                msg["content"] = (
-                    (msg.get("content") or "")
-                    + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                )
+            if i == 0 and msg.get("role") == "system":
+                existing = msg.get("content") or ""
+                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
+                if _compression_note not in existing:
+                    msg["content"] = existing + "\n\n" + _compression_note
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -806,14 +1139,24 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        compressed = self._sanitize_tool_pairs(compressed)

+        new_estimate = estimate_messages_tokens_rough(compressed)
+        saved_estimate = display_tokens - new_estimate
+
+        # Anti-thrashing: track compression effectiveness
+        savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
+        self._last_compression_savings_pct = savings_pct
+        if savings_pct < 10:
+            self._ineffective_compression_count += 1
+        else:
+            self._ineffective_compression_count = 0
+
        if not self.quiet_mode:
-            new_estimate = estimate_messages_tokens_rough(compressed)
-            saved_estimate = display_tokens - new_estimate
            logger.info(
-                "Compressed: %d -> %d messages (~%d tokens saved)",
+                "Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
                n_messages,
                len(compressed),
                saved_estimate,
+                savings_pct,
            )
            logger.info("Compression #%d complete", self.compression_count)

@@ -313,9 +313,25 @@ class CopilotACPClient:
            tools=tools,
            tool_choice=tool_choice,
        )
+        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
+        # (used natively by the OpenAI SDK) rather than a plain float.
+        if timeout is None:
+            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
+        elif isinstance(timeout, (int, float)):
+            _effective_timeout = float(timeout)
+        else:
+            # httpx.Timeout or similar — pick the largest component so the
+            # subprocess has enough wall-clock time for the full response.
+            _candidates = [
+                getattr(timeout, attr, None)
+                for attr in ("read", "write", "connect", "pool", "timeout")
+            ]
+            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
+            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
+
        response_text, reasoning_text = self._run_prompt(
            prompt_text,
-            timeout_seconds=float(timeout or _DEFAULT_TIMEOUT_SECONDS),
+            timeout_seconds=_effective_timeout,
        )

        tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
@@ -1152,6 +1152,61 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                },
            )

+    elif provider == "copilot":
+        # Copilot tokens are resolved dynamically via `gh auth token` or
+        # env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN).  They don't live in
+        # the auth store or credential pool, so we resolve them here.
+        try:
+            from hermes_cli.copilot_auth import resolve_copilot_token
+            token, source = resolve_copilot_token()
+            if token:
+                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
+                active_sources.add(source_name)
+                pconfig = PROVIDER_REGISTRY.get(provider)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": token,
+                        "base_url": pconfig.inference_base_url if pconfig else "",
+                        "label": source,
+                    },
+                )
+        except Exception as exc:
+            logger.debug("Copilot token seed failed: %s", exc)
+
+    elif provider == "qwen-oauth":
+        # Qwen OAuth tokens live in ~/.qwen/oauth_creds.json, written by
+        # the Qwen CLI (`qwen auth qwen-oauth`).  They aren't in the
+        # Hermes auth store or env vars, so resolve them here.
+        # Use refresh_if_expiring=False to avoid network calls during
+        # pool loading / provider discovery.
+        try:
+            from hermes_cli.auth import resolve_qwen_runtime_credentials
+            creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+            token = creds.get("api_key", "")
+            if token:
+                source_name = creds.get("source", "qwen-cli")
+                active_sources.add(source_name)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_OAUTH,
+                        "access_token": token,
+                        "expires_at_ms": creds.get("expires_at_ms"),
+                        "base_url": creds.get("base_url", ""),
+                        "label": creds.get("auth_file", source_name),
+                    },
+                )
+        except Exception as exc:
+            logger.debug("Qwen OAuth token seed failed: %s", exc)
+
    elif provider == "openai-codex":
        state = _load_provider_state(auth_store, "openai-codex")
        tokens = state.get("tokens") if isinstance(state, dict) else None
@@ -600,6 +600,45 @@ class KawaiiSpinner:
        "analyzing", "computing", "synthesizing", "formulating", "brainstorming",
    ]

+    @classmethod
+    def get_waiting_faces(cls) -> list:
+        """Return waiting faces from the active skin, falling back to KAWAII_WAITING."""
+        try:
+            skin = _get_skin()
+            if skin:
+                faces = skin.spinner.get("waiting_faces", [])
+                if faces:
+                    return faces
+        except Exception:
+            pass
+        return cls.KAWAII_WAITING
+
+    @classmethod
+    def get_thinking_faces(cls) -> list:
+        """Return thinking faces from the active skin, falling back to KAWAII_THINKING."""
+        try:
+            skin = _get_skin()
+            if skin:
+                faces = skin.spinner.get("thinking_faces", [])
+                if faces:
+                    return faces
+        except Exception:
+            pass
+        return cls.KAWAII_THINKING
+
+    @classmethod
+    def get_thinking_verbs(cls) -> list:
+        """Return thinking verbs from the active skin, falling back to THINKING_VERBS."""
+        try:
+            skin = _get_skin()
+            if skin:
+                verbs = skin.spinner.get("thinking_verbs", [])
+                if verbs:
+                    return verbs
+        except Exception:
+            pass
+        return cls.THINKING_VERBS
+
    def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
        self.message = message
        self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
@@ -954,84 +993,4 @@ def get_cute_tool_message(
 # Honcho session line (one-liner with clickable OSC 8 hyperlink)
 # =========================================================================

-_DIM = "\033[2m"
-_SKY_BLUE = "\033[38;5;117m"
-_ANSI_RESET = "\033[0m"

-
-# =========================================================================
-# Context pressure display (CLI user-facing warnings)
-# =========================================================================
-
-# ANSI color codes for context pressure tiers
-_CYAN = "\033[36m"
-_YELLOW = "\033[33m"
-_BOLD = "\033[1m"
-_DIM_ANSI = "\033[2m"
-
-# Bar characters
-_BAR_FILLED = "▰"
-_BAR_EMPTY = "▱"
-_BAR_WIDTH = 20
-
-
-def format_context_pressure(
-    compaction_progress: float,
-    threshold_tokens: int,
-    threshold_percent: float,
-    compression_enabled: bool = True,
-) -> str:
-    """Build a formatted context pressure line for CLI display.
-
-    The bar and percentage show progress toward the compaction threshold,
-    NOT the raw context window.  100% = compaction fires.
-
-    Args:
-        compaction_progress: How close to compaction (0.0–1.0, 1.0 = fires).
-        threshold_tokens: Compaction threshold in tokens.
-        threshold_percent: Compaction threshold as a fraction of context window.
-        compression_enabled: Whether auto-compression is active.
-    """
-    pct_int = min(int(compaction_progress * 100), 100)
-    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
-    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
-
-    threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
-    threshold_pct_int = int(threshold_percent * 100)
-
-    color = f"{_BOLD}{_YELLOW}"
-    icon = "⚠"
-    if compression_enabled:
-        hint = "compaction approaching"
-    else:
-        hint = "no auto-compaction"
-
-    return (
-        f"  {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
-        f"  {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}"
-    )
-
-
-def format_context_pressure_gateway(
-    compaction_progress: float,
-    threshold_percent: float,
-    compression_enabled: bool = True,
-) -> str:
-    """Build a plain-text context pressure notification for messaging platforms.
-
-    No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
-    The percentage shows progress toward the compaction threshold.
-    """
-    pct_int = min(int(compaction_progress * 100), 100)
-    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
-    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
-
-    threshold_pct_int = int(threshold_percent * 100)
-
-    icon = "⚠️"
-    if compression_enabled:
-        hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
-    else:
-        hint = "Auto-compaction is disabled — context may be truncated."
-
-    return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"
@@ -112,6 +112,10 @@ _RATE_LIMIT_PATTERNS = [
    "please retry after",
    "resource_exhausted",
    "rate increased too quickly",  # Alibaba/DashScope throttling
+    # AWS Bedrock throttling
+    "throttlingexception",
+    "too many concurrent requests",
+    "servicequotaexceededexception",
 ]

 # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
@@ -171,6 +175,11 @@ _CONTEXT_OVERFLOW_PATTERNS = [
    # Chinese error messages (some providers return these)
    "超过最大长度",
    "上下文长度",
+    # AWS Bedrock Converse API error patterns
+    "input is too long",
+    "max input token",
+    "input token",
+    "exceeds the maximum number of input tokens",
 ]

 # Model not found patterns
@@ -0,0 +1,764 @@
+"""OpenAI-compatible facade that talks to Google's Cloud Code Assist backend.
+
+This adapter lets Hermes use the ``google-gemini-cli`` provider as if it were
+a standard OpenAI-shaped chat completion endpoint, while the underlying HTTP
+traffic goes to ``cloudcode-pa.googleapis.com/v1internal:{generateContent,
+streamGenerateContent}`` with a Bearer access token obtained via OAuth PKCE.
+
+Architecture
+------------
+- ``GeminiCloudCodeClient`` exposes ``.chat.completions.create(**kwargs)``
+  mirroring the subset of the OpenAI SDK that ``run_agent.py`` uses.
+- Incoming OpenAI ``messages[]`` / ``tools[]`` / ``tool_choice`` are translated
+  to Gemini's native ``contents[]`` / ``tools[].functionDeclarations`` /
+  ``toolConfig`` / ``systemInstruction`` shape.
+- The request body is wrapped ``{project, model, user_prompt_id, request}``
+  per Code Assist API expectations.
+- Responses (``candidates[].content.parts[]``) are converted back to
+  OpenAI ``choices[0].message`` shape with ``content`` + ``tool_calls``.
+- Streaming uses SSE (``?alt=sse``) and yields OpenAI-shaped delta chunks.
+
+Attribution
+-----------
+Translation semantics follow jenslys/opencode-gemini-auth (MIT) and the public
+Gemini API docs. Request envelope shape
+(``{project, model, user_prompt_id, request}``) is documented nowhere; it is
+reverse-engineered from the opencode-gemini-auth and clawdbot implementations.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+import uuid
+from types import SimpleNamespace
+from typing import Any, Dict, Iterator, List, Optional
+
+import httpx
+
+from agent import google_oauth
+from agent.google_code_assist import (
+    CODE_ASSIST_ENDPOINT,
+    FREE_TIER_ID,
+    CodeAssistError,
+    ProjectContext,
+    resolve_project_context,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Request translation: OpenAI → Gemini
+# =============================================================================
+
+_ROLE_MAP_OPENAI_TO_GEMINI = {
+    "user": "user",
+    "assistant": "model",
+    "system": "user",   # handled separately via systemInstruction
+    "tool": "user",     # functionResponse is wrapped in a user-role turn
+    "function": "user",
+}
+
+
+def _coerce_content_to_text(content: Any) -> str:
+    """OpenAI content may be str or a list of parts; reduce to plain text."""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        pieces: List[str] = []
+        for p in content:
+            if isinstance(p, str):
+                pieces.append(p)
+            elif isinstance(p, dict):
+                if p.get("type") == "text" and isinstance(p.get("text"), str):
+                    pieces.append(p["text"])
+                # Multimodal (image_url, etc.) — stub for now; log and skip
+                elif p.get("type") in ("image_url", "input_audio"):
+                    logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type"))
+        return "\n".join(pieces)
+    return str(content)
+
+
+def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
+    """OpenAI tool_call -> Gemini functionCall part."""
+    fn = tool_call.get("function") or {}
+    args_raw = fn.get("arguments", "")
+    try:
+        args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
+    except json.JSONDecodeError:
+        args = {"_raw": args_raw}
+    if not isinstance(args, dict):
+        args = {"_value": args}
+    return {
+        "functionCall": {
+            "name": fn.get("name") or "",
+            "args": args,
+        },
+        # Sentinel signature — matches opencode-gemini-auth's approach.
+        # Without this, Code Assist rejects function calls that originated
+        # outside its own chain.
+        "thoughtSignature": "skip_thought_signature_validator",
+    }
+
+
+def _translate_tool_result_to_gemini(message: Dict[str, Any]) -> Dict[str, Any]:
+    """OpenAI tool-role message -> Gemini functionResponse part.
+
+    The function name isn't in the OpenAI tool message directly; it must be
+    passed via the assistant message that issued the call. For simplicity we
+    look up ``name`` on the message (OpenAI SDK copies it there) or on the
+    ``tool_call_id`` cross-reference.
+    """
+    name = str(message.get("name") or message.get("tool_call_id") or "tool")
+    content = _coerce_content_to_text(message.get("content"))
+    # Gemini expects the response as a dict under `response`. We wrap plain
+    # text in {"output": "..."}.
+    try:
+        parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
+    except json.JSONDecodeError:
+        parsed = None
+    response = parsed if isinstance(parsed, dict) else {"output": content}
+    return {
+        "functionResponse": {
+            "name": name,
+            "response": response,
+        },
+    }
+
+
+def _build_gemini_contents(
+    messages: List[Dict[str, Any]],
+) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
+    """Convert OpenAI messages[] to Gemini contents[] + systemInstruction."""
+    system_text_parts: List[str] = []
+    contents: List[Dict[str, Any]] = []
+
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        role = str(msg.get("role") or "user")
+
+        if role == "system":
+            system_text_parts.append(_coerce_content_to_text(msg.get("content")))
+            continue
+
+        # Tool result message — emit a user-role turn with functionResponse
+        if role == "tool" or role == "function":
+            contents.append({
+                "role": "user",
+                "parts": [_translate_tool_result_to_gemini(msg)],
+            })
+            continue
+
+        gemini_role = _ROLE_MAP_OPENAI_TO_GEMINI.get(role, "user")
+        parts: List[Dict[str, Any]] = []
+
+        text = _coerce_content_to_text(msg.get("content"))
+        if text:
+            parts.append({"text": text})
+
+        # Assistant messages can carry tool_calls
+        tool_calls = msg.get("tool_calls") or []
+        if isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if isinstance(tc, dict):
+                    parts.append(_translate_tool_call_to_gemini(tc))
+
+        if not parts:
+            # Gemini rejects empty parts; skip the turn entirely
+            continue
+
+        contents.append({"role": gemini_role, "parts": parts})
+
+    system_instruction: Optional[Dict[str, Any]] = None
+    joined_system = "\n".join(p for p in system_text_parts if p).strip()
+    if joined_system:
+        system_instruction = {
+            "role": "system",
+            "parts": [{"text": joined_system}],
+        }
+
+    return contents, system_instruction
+
+
+def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
+    """OpenAI tools[] -> Gemini tools[].functionDeclarations[]."""
+    if not isinstance(tools, list) or not tools:
+        return []
+    declarations: List[Dict[str, Any]] = []
+    for t in tools:
+        if not isinstance(t, dict):
+            continue
+        fn = t.get("function") or {}
+        if not isinstance(fn, dict):
+            continue
+        name = fn.get("name")
+        if not name:
+            continue
+        decl = {"name": str(name)}
+        if fn.get("description"):
+            decl["description"] = str(fn["description"])
+        params = fn.get("parameters")
+        if isinstance(params, dict):
+            decl["parameters"] = params
+        declarations.append(decl)
+    if not declarations:
+        return []
+    return [{"functionDeclarations": declarations}]
+
+
+def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
+    """OpenAI tool_choice -> Gemini toolConfig.functionCallingConfig."""
+    if tool_choice is None:
+        return None
+    if isinstance(tool_choice, str):
+        if tool_choice == "auto":
+            return {"functionCallingConfig": {"mode": "AUTO"}}
+        if tool_choice == "required":
+            return {"functionCallingConfig": {"mode": "ANY"}}
+        if tool_choice == "none":
+            return {"functionCallingConfig": {"mode": "NONE"}}
+    if isinstance(tool_choice, dict):
+        fn = tool_choice.get("function") or {}
+        name = fn.get("name")
+        if name:
+            return {
+                "functionCallingConfig": {
+                    "mode": "ANY",
+                    "allowedFunctionNames": [str(name)],
+                },
+            }
+    return None
+
+
+def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
+    """Accept thinkingBudget / thinkingLevel / includeThoughts (+ snake_case)."""
+    if not isinstance(config, dict) or not config:
+        return None
+    budget = config.get("thinkingBudget", config.get("thinking_budget"))
+    level = config.get("thinkingLevel", config.get("thinking_level"))
+    include = config.get("includeThoughts", config.get("include_thoughts"))
+    normalized: Dict[str, Any] = {}
+    if isinstance(budget, (int, float)):
+        normalized["thinkingBudget"] = int(budget)
+    if isinstance(level, str) and level.strip():
+        normalized["thinkingLevel"] = level.strip().lower()
+    if isinstance(include, bool):
+        normalized["includeThoughts"] = include
+    return normalized or None
+
+
+def build_gemini_request(
+    *,
+    messages: List[Dict[str, Any]],
+    tools: Any = None,
+    tool_choice: Any = None,
+    temperature: Optional[float] = None,
+    max_tokens: Optional[int] = None,
+    top_p: Optional[float] = None,
+    stop: Any = None,
+    thinking_config: Any = None,
+) -> Dict[str, Any]:
+    """Build the inner Gemini request body (goes inside ``request`` wrapper)."""
+    contents, system_instruction = _build_gemini_contents(messages)
+
+    body: Dict[str, Any] = {"contents": contents}
+    if system_instruction is not None:
+        body["systemInstruction"] = system_instruction
+
+    gemini_tools = _translate_tools_to_gemini(tools)
+    if gemini_tools:
+        body["tools"] = gemini_tools
+    tool_cfg = _translate_tool_choice_to_gemini(tool_choice)
+    if tool_cfg is not None:
+        body["toolConfig"] = tool_cfg
+
+    generation_config: Dict[str, Any] = {}
+    if isinstance(temperature, (int, float)):
+        generation_config["temperature"] = float(temperature)
+    if isinstance(max_tokens, int) and max_tokens > 0:
+        generation_config["maxOutputTokens"] = max_tokens
+    if isinstance(top_p, (int, float)):
+        generation_config["topP"] = float(top_p)
+    if isinstance(stop, str) and stop:
+        generation_config["stopSequences"] = [stop]
+    elif isinstance(stop, list) and stop:
+        generation_config["stopSequences"] = [str(s) for s in stop if s]
+    normalized_thinking = _normalize_thinking_config(thinking_config)
+    if normalized_thinking:
+        generation_config["thinkingConfig"] = normalized_thinking
+    if generation_config:
+        body["generationConfig"] = generation_config
+
+    return body
+
+
+def wrap_code_assist_request(
+    *,
+    project_id: str,
+    model: str,
+    inner_request: Dict[str, Any],
+    user_prompt_id: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Wrap the inner Gemini request in the Code Assist envelope."""
+    return {
+        "project": project_id,
+        "model": model,
+        "user_prompt_id": user_prompt_id or str(uuid.uuid4()),
+        "request": inner_request,
+    }
+
+
+# =============================================================================
+# Response translation: Gemini → OpenAI
+# =============================================================================
+
+def _translate_gemini_response(
+    resp: Dict[str, Any],
+    model: str,
+) -> SimpleNamespace:
+    """Non-streaming Gemini response -> OpenAI-shaped SimpleNamespace.
+
+    Code Assist wraps the actual Gemini response inside ``response``, so we
+    unwrap it first if present.
+    """
+    inner = resp.get("response") if isinstance(resp.get("response"), dict) else resp
+
+    candidates = inner.get("candidates") or []
+    if not isinstance(candidates, list) or not candidates:
+        return _empty_response(model)
+
+    cand = candidates[0]
+    content_obj = cand.get("content") if isinstance(cand, dict) else {}
+    parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
+
+    text_pieces: List[str] = []
+    reasoning_pieces: List[str] = []
+    tool_calls: List[SimpleNamespace] = []
+
+    for i, part in enumerate(parts or []):
+        if not isinstance(part, dict):
+            continue
+        # Thought parts are model's internal reasoning — surface as reasoning,
+        # don't mix into content.
+        if part.get("thought") is True:
+            if isinstance(part.get("text"), str):
+                reasoning_pieces.append(part["text"])
+            continue
+        if isinstance(part.get("text"), str):
+            text_pieces.append(part["text"])
+            continue
+        fc = part.get("functionCall")
+        if isinstance(fc, dict) and fc.get("name"):
+            try:
+                args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
+            except (TypeError, ValueError):
+                args_str = "{}"
+            tool_calls.append(SimpleNamespace(
+                id=f"call_{uuid.uuid4().hex[:12]}",
+                type="function",
+                index=i,
+                function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
+            ))
+
+    finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(
+        str(cand.get("finishReason") or "")
+    )
+
+    usage_meta = inner.get("usageMetadata") or {}
+    usage = SimpleNamespace(
+        prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
+        completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
+        total_tokens=int(usage_meta.get("totalTokenCount") or 0),
+        prompt_tokens_details=SimpleNamespace(
+            cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
+        ),
+    )
+
+    message = SimpleNamespace(
+        role="assistant",
+        content="".join(text_pieces) if text_pieces else None,
+        tool_calls=tool_calls or None,
+        reasoning="".join(reasoning_pieces) or None,
+        reasoning_content="".join(reasoning_pieces) or None,
+        reasoning_details=None,
+    )
+    choice = SimpleNamespace(
+        index=0,
+        message=message,
+        finish_reason=finish_reason,
+    )
+    return SimpleNamespace(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=usage,
+    )
+
+
+def _empty_response(model: str) -> SimpleNamespace:
+    message = SimpleNamespace(
+        role="assistant", content="", tool_calls=None,
+        reasoning=None, reasoning_content=None, reasoning_details=None,
+    )
+    choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
+    usage = SimpleNamespace(
+        prompt_tokens=0, completion_tokens=0, total_tokens=0,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+    )
+    return SimpleNamespace(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=usage,
+    )
+
+
+def _map_gemini_finish_reason(reason: str) -> str:
+    mapping = {
+        "STOP": "stop",
+        "MAX_TOKENS": "length",
+        "SAFETY": "content_filter",
+        "RECITATION": "content_filter",
+        "OTHER": "stop",
+    }
+    return mapping.get(reason.upper(), "stop")
+
+
+# =============================================================================
+# Streaming SSE iterator
+# =============================================================================
+
+class _GeminiStreamChunk(SimpleNamespace):
+    """Mimics an OpenAI ChatCompletionChunk with .choices[0].delta."""
+    pass
+
+
+def _make_stream_chunk(
+    *,
+    model: str,
+    content: str = "",
+    tool_call_delta: Optional[Dict[str, Any]] = None,
+    finish_reason: Optional[str] = None,
+    reasoning: str = "",
+) -> _GeminiStreamChunk:
+    delta_kwargs: Dict[str, Any] = {"role": "assistant"}
+    if content:
+        delta_kwargs["content"] = content
+    if tool_call_delta is not None:
+        delta_kwargs["tool_calls"] = [SimpleNamespace(
+            index=tool_call_delta.get("index", 0),
+            id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
+            type="function",
+            function=SimpleNamespace(
+                name=tool_call_delta.get("name") or "",
+                arguments=tool_call_delta.get("arguments") or "",
+            ),
+        )]
+    if reasoning:
+        delta_kwargs["reasoning"] = reasoning
+        delta_kwargs["reasoning_content"] = reasoning
+    delta = SimpleNamespace(**delta_kwargs)
+    choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
+    return _GeminiStreamChunk(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion.chunk",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=None,
+    )
+
+
+def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
+    """Parse Server-Sent Events from an httpx streaming response."""
+    buffer = ""
+    for chunk in response.iter_text():
+        if not chunk:
+            continue
+        buffer += chunk
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.rstrip("\r")
+            if not line:
+                continue
+            if line.startswith("data: "):
+                data = line[6:]
+                if data == "[DONE]":
+                    return
+                try:
+                    yield json.loads(data)
+                except json.JSONDecodeError:
+                    logger.debug("Non-JSON SSE line: %s", data[:200])
+
+
+def _translate_stream_event(
+    event: Dict[str, Any],
+    model: str,
+    tool_call_indices: Dict[str, int],
+) -> List[_GeminiStreamChunk]:
+    """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s)."""
+    inner = event.get("response") if isinstance(event.get("response"), dict) else event
+    candidates = inner.get("candidates") or []
+    if not candidates:
+        return []
+    cand = candidates[0]
+    if not isinstance(cand, dict):
+        return []
+
+    chunks: List[_GeminiStreamChunk] = []
+
+    content = cand.get("content") or {}
+    parts = content.get("parts") if isinstance(content, dict) else []
+    for part in parts or []:
+        if not isinstance(part, dict):
+            continue
+        if part.get("thought") is True and isinstance(part.get("text"), str):
+            chunks.append(_make_stream_chunk(
+                model=model, reasoning=part["text"],
+            ))
+            continue
+        if isinstance(part.get("text"), str) and part["text"]:
+            chunks.append(_make_stream_chunk(model=model, content=part["text"]))
+        fc = part.get("functionCall")
+        if isinstance(fc, dict) and fc.get("name"):
+            name = str(fc["name"])
+            idx = tool_call_indices.setdefault(name, len(tool_call_indices))
+            try:
+                args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
+            except (TypeError, ValueError):
+                args_str = "{}"
+            chunks.append(_make_stream_chunk(
+                model=model,
+                tool_call_delta={
+                    "index": idx,
+                    "name": name,
+                    "arguments": args_str,
+                },
+            ))
+
+    finish_reason_raw = str(cand.get("finishReason") or "")
+    if finish_reason_raw:
+        mapped = _map_gemini_finish_reason(finish_reason_raw)
+        if tool_call_indices:
+            mapped = "tool_calls"
+        chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
+    return chunks
+
+
+# =============================================================================
+# GeminiCloudCodeClient — OpenAI-compatible facade
+# =============================================================================
+
+MARKER_BASE_URL = "cloudcode-pa://google"
+
+
+class _GeminiChatCompletions:
+    def __init__(self, client: "GeminiCloudCodeClient"):
+        self._client = client
+
+    def create(self, **kwargs: Any) -> Any:
+        return self._client._create_chat_completion(**kwargs)
+
+
+class _GeminiChatNamespace:
+    def __init__(self, client: "GeminiCloudCodeClient"):
+        self.completions = _GeminiChatCompletions(client)
+
+
+class GeminiCloudCodeClient:
+    """Minimal OpenAI-SDK-compatible facade over Code Assist v1internal."""
+
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        default_headers: Optional[Dict[str, str]] = None,
+        project_id: str = "",
+        **_: Any,
+    ):
+        # `api_key` here is a dummy — real auth is the OAuth access token
+        # fetched on every call via agent.google_oauth.get_valid_access_token().
+        # We accept the kwarg for openai.OpenAI interface parity.
+        self.api_key = api_key or "google-oauth"
+        self.base_url = base_url or MARKER_BASE_URL
+        self._default_headers = dict(default_headers or {})
+        self._configured_project_id = project_id
+        self._project_context: Optional[ProjectContext] = None
+        self._project_context_lock = False  # simple single-thread guard
+        self.chat = _GeminiChatNamespace(self)
+        self.is_closed = False
+        self._http = httpx.Client(timeout=httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0))
+
+    def close(self) -> None:
+        self.is_closed = True
+        try:
+            self._http.close()
+        except Exception:
+            pass
+
+    # Implement the OpenAI SDK's context-manager-ish closure check
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def _ensure_project_context(self, access_token: str, model: str) -> ProjectContext:
+        """Lazily resolve and cache the project context for this client."""
+        if self._project_context is not None:
+            return self._project_context
+
+        env_project = google_oauth.resolve_project_id_from_env()
+        creds = google_oauth.load_credentials()
+        stored_project = creds.project_id if creds else ""
+
+        # Prefer what's already baked into the creds
+        if stored_project:
+            self._project_context = ProjectContext(
+                project_id=stored_project,
+                managed_project_id=creds.managed_project_id if creds else "",
+                tier_id="",
+                source="stored",
+            )
+            return self._project_context
+
+        ctx = resolve_project_context(
+            access_token,
+            configured_project_id=self._configured_project_id,
+            env_project_id=env_project,
+            user_agent_model=model,
+        )
+        # Persist discovered project back to the creds file so the next
+        # session doesn't re-run the discovery.
+        if ctx.project_id or ctx.managed_project_id:
+            google_oauth.update_project_ids(
+                project_id=ctx.project_id,
+                managed_project_id=ctx.managed_project_id,
+            )
+        self._project_context = ctx
+        return ctx
+
+    def _create_chat_completion(
+        self,
+        *,
+        model: str = "gemini-2.5-flash",
+        messages: Optional[List[Dict[str, Any]]] = None,
+        stream: bool = False,
+        tools: Any = None,
+        tool_choice: Any = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+        stop: Any = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Any = None,
+        **_: Any,
+    ) -> Any:
+        access_token = google_oauth.get_valid_access_token()
+        ctx = self._ensure_project_context(access_token, model)
+
+        thinking_config = None
+        if isinstance(extra_body, dict):
+            thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
+
+        inner = build_gemini_request(
+            messages=messages or [],
+            tools=tools,
+            tool_choice=tool_choice,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop,
+            thinking_config=thinking_config,
+        )
+        wrapped = wrap_code_assist_request(
+            project_id=ctx.project_id,
+            model=model,
+            inner_request=inner,
+        )
+
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "Authorization": f"Bearer {access_token}",
+            "User-Agent": "hermes-agent (gemini-cli-compat)",
+            "X-Goog-Api-Client": "gl-python/hermes",
+            "x-activity-request-id": str(uuid.uuid4()),
+        }
+        headers.update(self._default_headers)
+
+        if stream:
+            return self._stream_completion(model=model, wrapped=wrapped, headers=headers)
+
+        url = f"{CODE_ASSIST_ENDPOINT}/v1internal:generateContent"
+        response = self._http.post(url, json=wrapped, headers=headers)
+        if response.status_code != 200:
+            raise _gemini_http_error(response)
+        try:
+            payload = response.json()
+        except ValueError as exc:
+            raise CodeAssistError(
+                f"Invalid JSON from Code Assist: {exc}",
+                code="code_assist_invalid_json",
+            ) from exc
+        return _translate_gemini_response(payload, model=model)
+
+    def _stream_completion(
+        self,
+        *,
+        model: str,
+        wrapped: Dict[str, Any],
+        headers: Dict[str, str],
+    ) -> Iterator[_GeminiStreamChunk]:
+        """Generator that yields OpenAI-shaped streaming chunks."""
+        url = f"{CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse"
+        stream_headers = dict(headers)
+        stream_headers["Accept"] = "text/event-stream"
+
+        def _generator() -> Iterator[_GeminiStreamChunk]:
+            try:
+                with self._http.stream("POST", url, json=wrapped, headers=stream_headers) as response:
+                    if response.status_code != 200:
+                        # Materialize error body for better diagnostics
+                        response.read()
+                        raise _gemini_http_error(response)
+                    tool_call_indices: Dict[str, int] = {}
+                    for event in _iter_sse_events(response):
+                        for chunk in _translate_stream_event(event, model, tool_call_indices):
+                            yield chunk
+            except httpx.HTTPError as exc:
+                raise CodeAssistError(
+                    f"Streaming request failed: {exc}",
+                    code="code_assist_stream_error",
+                ) from exc
+
+        return _generator()
+
+
+def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
+    status = response.status_code
+    try:
+        body = response.text[:500]
+    except Exception:
+        body = ""
+    # Let run_agent's retry logic see auth errors as rotatable via `api_key`
+    code = f"code_assist_http_{status}"
+    if status == 401:
+        code = "code_assist_unauthorized"
+    elif status == 429:
+        code = "code_assist_rate_limited"
+    return CodeAssistError(
+        f"Code Assist returned HTTP {status}: {body}",
+        code=code,
+    )
@@ -0,0 +1,417 @@
+"""Google Code Assist API client — project discovery, onboarding, quota.
+
+The Code Assist API powers Google's official gemini-cli. It sits at
+``cloudcode-pa.googleapis.com`` and provides:
+
+- Free tier access (generous daily quota) for personal Google accounts
+- Paid tier access via GCP projects with billing / Workspace / Standard / Enterprise
+
+This module handles the control-plane dance needed before inference:
+
+1. ``load_code_assist()`` — probe the user's account to learn what tier they're on
+   and whether a ``cloudaicompanionProject`` is already assigned.
+2. ``onboard_user()`` — if the user hasn't been onboarded yet (new account, fresh
+   free tier, etc.), call this with the chosen tier + project id. Supports LRO
+   polling for slow provisioning.
+3. ``retrieve_user_quota()`` — fetch the ``buckets[]`` array showing remaining
+   quota per model, used by the ``/gquota`` slash command.
+
+VPC-SC handling: enterprise accounts under a VPC Service Controls perimeter
+will get ``SECURITY_POLICY_VIOLATED`` on ``load_code_assist``. We catch this
+and force the account to ``standard-tier`` so the call chain still succeeds.
+
+Derived from opencode-gemini-auth (MIT) and clawdbot/extensions/google. The
+request/response shapes are specific to Google's internal Code Assist API,
+documented nowhere public — we copy them from the reference implementations.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Constants
+# =============================================================================
+
+CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"
+
+# Fallback endpoints tried when prod returns an error during project discovery
+FALLBACK_ENDPOINTS = [
+    "https://daily-cloudcode-pa.sandbox.googleapis.com",
+    "https://autopush-cloudcode-pa.sandbox.googleapis.com",
+]
+
+# Tier identifiers that Google's API uses
+FREE_TIER_ID = "free-tier"
+LEGACY_TIER_ID = "legacy-tier"
+STANDARD_TIER_ID = "standard-tier"
+
+# Default HTTP headers matching gemini-cli's fingerprint.
+# Google may reject unrecognized User-Agents on these internal endpoints.
+_GEMINI_CLI_USER_AGENT = "google-api-nodejs-client/9.15.1 (gzip)"
+_X_GOOG_API_CLIENT = "gl-node/24.0.0"
+_DEFAULT_REQUEST_TIMEOUT = 30.0
+_ONBOARDING_POLL_ATTEMPTS = 12
+_ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
+
+
+class CodeAssistError(RuntimeError):
+    def __init__(self, message: str, *, code: str = "code_assist_error") -> None:
+        super().__init__(message)
+        self.code = code
+
+
+class ProjectIdRequiredError(CodeAssistError):
+    def __init__(self, message: str = "GCP project id required for this tier") -> None:
+        super().__init__(message, code="code_assist_project_id_required")
+
+
+# =============================================================================
+# HTTP primitive (auth via Bearer token passed per-call)
+# =============================================================================
+
+def _build_headers(access_token: str, *, user_agent_model: str = "") -> Dict[str, str]:
+    ua = _GEMINI_CLI_USER_AGENT
+    if user_agent_model:
+        ua = f"{ua} model/{user_agent_model}"
+    return {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "Authorization": f"Bearer {access_token}",
+        "User-Agent": ua,
+        "X-Goog-Api-Client": _X_GOOG_API_CLIENT,
+        "x-activity-request-id": str(uuid.uuid4()),
+    }
+
+
+def _client_metadata() -> Dict[str, str]:
+    """Match Google's gemini-cli exactly — unrecognized metadata may be rejected."""
+    return {
+        "ideType": "IDE_UNSPECIFIED",
+        "platform": "PLATFORM_UNSPECIFIED",
+        "pluginType": "GEMINI",
+    }
+
+
+def _post_json(
+    url: str,
+    body: Dict[str, Any],
+    access_token: str,
+    *,
+    timeout: float = _DEFAULT_REQUEST_TIMEOUT,
+    user_agent_model: str = "",
+) -> Dict[str, Any]:
+    data = json.dumps(body).encode("utf-8")
+    request = urllib.request.Request(
+        url, data=data, method="POST",
+        headers=_build_headers(access_token, user_agent_model=user_agent_model),
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=timeout) as response:
+            raw = response.read().decode("utf-8", errors="replace")
+            return json.loads(raw) if raw else {}
+    except urllib.error.HTTPError as exc:
+        detail = ""
+        try:
+            detail = exc.read().decode("utf-8", errors="replace")
+        except Exception:
+            pass
+        # Special case: VPC-SC violation should be distinguishable
+        if _is_vpc_sc_violation(detail):
+            raise CodeAssistError(
+                f"VPC-SC policy violation: {detail}",
+                code="code_assist_vpc_sc",
+            ) from exc
+        raise CodeAssistError(
+            f"Code Assist HTTP {exc.code}: {detail or exc.reason}",
+            code=f"code_assist_http_{exc.code}",
+        ) from exc
+    except urllib.error.URLError as exc:
+        raise CodeAssistError(
+            f"Code Assist request failed: {exc}",
+            code="code_assist_network_error",
+        ) from exc
+
+
+def _is_vpc_sc_violation(body: str) -> bool:
+    """Detect a VPC Service Controls violation from a response body."""
+    if not body:
+        return False
+    try:
+        parsed = json.loads(body)
+    except (json.JSONDecodeError, ValueError):
+        return "SECURITY_POLICY_VIOLATED" in body
+    # Walk the nested error structure Google uses
+    error = parsed.get("error") if isinstance(parsed, dict) else None
+    if not isinstance(error, dict):
+        return False
+    details = error.get("details") or []
+    if isinstance(details, list):
+        for item in details:
+            if isinstance(item, dict):
+                reason = item.get("reason") or ""
+                if reason == "SECURITY_POLICY_VIOLATED":
+                    return True
+    msg = str(error.get("message", ""))
+    return "SECURITY_POLICY_VIOLATED" in msg
+
+
+# =============================================================================
+# load_code_assist — discovers current tier + assigned project
+# =============================================================================
+
+@dataclass
+class CodeAssistProjectInfo:
+    """Result from ``load_code_assist``."""
+    current_tier_id: str = ""
+    cloudaicompanion_project: str = ""   # Google-managed project (free tier)
+    allowed_tiers: List[str] = field(default_factory=list)
+    raw: Dict[str, Any] = field(default_factory=dict)
+
+
+def load_code_assist(
+    access_token: str,
+    *,
+    project_id: str = "",
+    user_agent_model: str = "",
+) -> CodeAssistProjectInfo:
+    """Call ``POST /v1internal:loadCodeAssist`` with prod → sandbox fallback.
+
+    Returns whatever tier + project info Google reports. On VPC-SC violations,
+    returns a synthetic ``standard-tier`` result so the chain can continue.
+    """
+    body: Dict[str, Any] = {
+        "metadata": {
+            "duetProject": project_id,
+            **_client_metadata(),
+        },
+    }
+    if project_id:
+        body["cloudaicompanionProject"] = project_id
+
+    endpoints = [CODE_ASSIST_ENDPOINT] + FALLBACK_ENDPOINTS
+    last_err: Optional[Exception] = None
+    for endpoint in endpoints:
+        url = f"{endpoint}/v1internal:loadCodeAssist"
+        try:
+            resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
+            return _parse_load_response(resp)
+        except CodeAssistError as exc:
+            if exc.code == "code_assist_vpc_sc":
+                logger.info("VPC-SC violation on %s — defaulting to standard-tier", endpoint)
+                return CodeAssistProjectInfo(
+                    current_tier_id=STANDARD_TIER_ID,
+                    cloudaicompanion_project=project_id,
+                )
+            last_err = exc
+            logger.warning("loadCodeAssist failed on %s: %s", endpoint, exc)
+            continue
+    if last_err:
+        raise last_err
+    return CodeAssistProjectInfo()
+
+
+def _parse_load_response(resp: Dict[str, Any]) -> CodeAssistProjectInfo:
+    current_tier = resp.get("currentTier") or {}
+    tier_id = str(current_tier.get("id") or "") if isinstance(current_tier, dict) else ""
+    project = str(resp.get("cloudaicompanionProject") or "")
+    allowed = resp.get("allowedTiers") or []
+    allowed_ids: List[str] = []
+    if isinstance(allowed, list):
+        for t in allowed:
+            if isinstance(t, dict):
+                tid = str(t.get("id") or "")
+                if tid:
+                    allowed_ids.append(tid)
+    return CodeAssistProjectInfo(
+        current_tier_id=tier_id,
+        cloudaicompanion_project=project,
+        allowed_tiers=allowed_ids,
+        raw=resp,
+    )
+
+
+# =============================================================================
+# onboard_user — provisions a new user on a tier (with LRO polling)
+# =============================================================================
+
+def onboard_user(
+    access_token: str,
+    *,
+    tier_id: str,
+    project_id: str = "",
+    user_agent_model: str = "",
+) -> Dict[str, Any]:
+    """Call ``POST /v1internal:onboardUser`` to provision the user.
+
+    For paid tiers, ``project_id`` is REQUIRED (raises ProjectIdRequiredError).
+    For free tiers, ``project_id`` is optional — Google will assign one.
+
+    Returns the final operation response. Polls ``/v1internal/<name>`` for up
+    to ``_ONBOARDING_POLL_ATTEMPTS`` × ``_ONBOARDING_POLL_INTERVAL_SECONDS``
+    (default: 12 × 5s = 1 min).
+    """
+    if tier_id != FREE_TIER_ID and tier_id != LEGACY_TIER_ID and not project_id:
+        raise ProjectIdRequiredError(
+            f"Tier {tier_id!r} requires a GCP project id. "
+            "Set HERMES_GEMINI_PROJECT_ID or GOOGLE_CLOUD_PROJECT."
+        )
+
+    body: Dict[str, Any] = {
+        "tierId": tier_id,
+        "metadata": _client_metadata(),
+    }
+    if project_id:
+        body["cloudaicompanionProject"] = project_id
+
+    endpoint = CODE_ASSIST_ENDPOINT
+    url = f"{endpoint}/v1internal:onboardUser"
+    resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
+
+    # Poll if LRO (long-running operation)
+    if not resp.get("done"):
+        op_name = resp.get("name", "")
+        if not op_name:
+            return resp
+        for attempt in range(_ONBOARDING_POLL_ATTEMPTS):
+            time.sleep(_ONBOARDING_POLL_INTERVAL_SECONDS)
+            poll_url = f"{endpoint}/v1internal/{op_name}"
+            try:
+                poll_resp = _post_json(poll_url, {}, access_token, user_agent_model=user_agent_model)
+            except CodeAssistError as exc:
+                logger.warning("Onboarding poll attempt %d failed: %s", attempt + 1, exc)
+                continue
+            if poll_resp.get("done"):
+                return poll_resp
+        logger.warning("Onboarding did not complete within %d attempts", _ONBOARDING_POLL_ATTEMPTS)
+    return resp
+
+
+# =============================================================================
+# retrieve_user_quota — for /gquota
+# =============================================================================
+
+@dataclass
+class QuotaBucket:
+    model_id: str
+    token_type: str = ""
+    remaining_fraction: float = 0.0
+    reset_time_iso: str = ""
+    raw: Dict[str, Any] = field(default_factory=dict)
+
+
+def retrieve_user_quota(
+    access_token: str,
+    *,
+    project_id: str = "",
+    user_agent_model: str = "",
+) -> List[QuotaBucket]:
+    """Call ``POST /v1internal:retrieveUserQuota`` and parse ``buckets[]``."""
+    body: Dict[str, Any] = {}
+    if project_id:
+        body["project"] = project_id
+    url = f"{CODE_ASSIST_ENDPOINT}/v1internal:retrieveUserQuota"
+    resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
+    raw_buckets = resp.get("buckets") or []
+    buckets: List[QuotaBucket] = []
+    if not isinstance(raw_buckets, list):
+        return buckets
+    for b in raw_buckets:
+        if not isinstance(b, dict):
+            continue
+        buckets.append(QuotaBucket(
+            model_id=str(b.get("modelId") or ""),
+            token_type=str(b.get("tokenType") or ""),
+            remaining_fraction=float(b.get("remainingFraction") or 0.0),
+            reset_time_iso=str(b.get("resetTime") or ""),
+            raw=b,
+        ))
+    return buckets
+
+
+# =============================================================================
+# Project context resolution
+# =============================================================================
+
+@dataclass
+class ProjectContext:
+    """Resolved state for a given OAuth session."""
+    project_id: str = ""           # effective project id sent on requests
+    managed_project_id: str = ""   # Google-assigned project (free tier)
+    tier_id: str = ""
+    source: str = ""               # "env", "config", "discovered", "onboarded"
+
+
+def resolve_project_context(
+    access_token: str,
+    *,
+    configured_project_id: str = "",
+    env_project_id: str = "",
+    user_agent_model: str = "",
+) -> ProjectContext:
+    """Figure out what project id + tier to use for requests.
+
+    Priority:
+      1. If configured_project_id or env_project_id is set, use that directly
+         and short-circuit (no discovery needed).
+      2. Otherwise call loadCodeAssist to see what Google says.
+      3. If no tier assigned yet, onboard the user (free tier default).
+    """
+    # Short-circuit: caller provided a project id
+    if configured_project_id:
+        return ProjectContext(
+            project_id=configured_project_id,
+            tier_id=STANDARD_TIER_ID,  # assume paid since they specified one
+            source="config",
+        )
+    if env_project_id:
+        return ProjectContext(
+            project_id=env_project_id,
+            tier_id=STANDARD_TIER_ID,
+            source="env",
+        )
+
+    # Discover via loadCodeAssist
+    info = load_code_assist(access_token, user_agent_model=user_agent_model)
+
+    effective_project = info.cloudaicompanion_project
+    tier = info.current_tier_id
+
+    if not tier:
+        # User hasn't been onboarded — provision them on free tier
+        onboard_resp = onboard_user(
+            access_token,
+            tier_id=FREE_TIER_ID,
+            project_id="",
+            user_agent_model=user_agent_model,
+        )
+        # Re-parse from the onboard response
+        response_body = onboard_resp.get("response") or {}
+        if isinstance(response_body, dict):
+            effective_project = (
+                effective_project
+                or str(response_body.get("cloudaicompanionProject") or "")
+            )
+        tier = FREE_TIER_ID
+        source = "onboarded"
+    else:
+        source = "discovered"
+
+    return ProjectContext(
+        project_id=effective_project,
+        managed_project_id=effective_project if tier == FREE_TIER_ID else "",
+        tier_id=tier,
+        source=source,
+    )
@@ -634,13 +634,7 @@ class InsightsEngine:
        lines.append(f"  Sessions:          {o['total_sessions']:<12}  Messages:        {o['total_messages']:,}")
        lines.append(f"  Tool calls:        {o['total_tool_calls']:<12,}  User messages:   {o['user_messages']:,}")
        lines.append(f"  Input tokens:      {o['total_input_tokens']:<12,}  Output tokens:   {o['total_output_tokens']:,}")
-        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
-        if cache_total > 0:
-            lines.append(f"  Cache read:        {o['total_cache_read_tokens']:<12,}  Cache write:     {o['total_cache_write_tokens']:,}")
-        cost_str = f"${o['estimated_cost']:.2f}"
-        if o.get("models_without_pricing"):
-            cost_str += " *"
-        lines.append(f"  Total tokens:      {o['total_tokens']:<12,}  Est. cost:       {cost_str}")
+        lines.append(f"  Total tokens:      {o['total_tokens']:,}")
        if o["total_hours"] > 0:
            lines.append(f"  Active time:       ~{_format_duration(o['total_hours'] * 3600):<11}  Avg session:     ~{_format_duration(o['avg_session_duration'])}")
        lines.append(f"  Avg msgs/session:  {o['avg_messages_per_session']:.1f}")
@@ -650,16 +644,10 @@ class InsightsEngine:
        if report["models"]:
            lines.append("  🤖 Models Used")
            lines.append("  " + "─" * 56)
-            lines.append(f"  {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
+            lines.append(f"  {'Model':<30} {'Sessions':>8} {'Tokens':>12}")
            for m in report["models"]:
                model_name = m["model"][:28]
-                if m.get("has_pricing"):
-                    cost_cell = f"${m['cost']:>6.2f}"
-                else:
-                    cost_cell = "     N/A"
-                lines.append(f"  {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
-            if o.get("models_without_pricing"):
-                lines.append("  * Cost N/A for custom/self-hosted models")
+                lines.append(f"  {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,}")
            lines.append("")

        # Platform breakdown
@@ -739,15 +727,7 @@ class InsightsEngine:

        # Overview
        lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
-        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
-        if cache_total > 0:
-            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
-        else:
-            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
-        cost_note = ""
-        if o.get("models_without_pricing"):
-            cost_note = " _(excludes custom/self-hosted models)_"
-        lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
+        lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
        if o["total_hours"] > 0:
            lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
        lines.append("")
@@ -756,8 +736,7 @@ class InsightsEngine:
        if report["models"]:
            lines.append("**🤖 Models:**")
            for m in report["models"][:5]:
-                cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
-                lines.append(f"  {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
+                lines.append(f"  {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens")
            lines.append("")

        # Platforms (if multi-platform)
@@ -28,6 +28,7 @@ Usage in run_agent.py:

 from __future__ import annotations

+import json
 import logging
 import re
 from typing import Any, Dict, List, Optional
@@ -43,11 +44,22 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------

 _FENCE_TAG_RE = re.compile(r'</?\s*memory-context\s*>', re.IGNORECASE)
+_INTERNAL_CONTEXT_RE = re.compile(
+    r'<\s*memory-context\s*>[\s\S]*?</\s*memory-context\s*>',
+    re.IGNORECASE,
+)
+_INTERNAL_NOTE_RE = re.compile(
+    r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
+    re.IGNORECASE,
+)


 def sanitize_context(text: str) -> str:
-    """Strip fence-escape sequences from provider output."""
-    return _FENCE_TAG_RE.sub('', text)
+    """Strip fence tags, injected context blocks, and system notes from provider output."""
+    text = _INTERNAL_CONTEXT_RE.sub('', text)
+    text = _INTERNAL_NOTE_RE.sub('', text)
+    text = _FENCE_TAG_RE.sub('', text)
+    return text


 def build_memory_context_block(raw_context: str) -> str:
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -33,9 +33,11 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "google", "google-gemini", "google-ai-studio",
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
+    "ollama",
    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
+    "xai", "x-ai", "x.ai", "grok",
    "qwen-portal",
 })

@@ -100,15 +102,23 @@ DEFAULT_CONTEXT_LENGTHS = {
    # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
    # substring of "anthropic/claude-sonnet-4.6").
    # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
+    "claude-opus-4-7": 1000000,
+    "claude-opus-4.7": 1000000,
    "claude-opus-4-6": 1000000,
    "claude-sonnet-4-6": 1000000,
    "claude-opus-4.6": 1000000,
    "claude-sonnet-4.6": 1000000,
    # Catch-all for older Claude models (must sort after specific entries)
    "claude": 200000,
-    # OpenAI
+    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
+    # Source: https://developers.openai.com/api/docs/models
+    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
+    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
+    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
+    "gpt-5.3-codex-spark": 128000,    # Spark variant has reduced 128k context
+    "gpt-5.1-chat": 128000,           # Chat variant has 128k context
+    "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
    "gpt-4.1": 1047576,
-    "gpt-5": 128000,
    "gpt-4": 128000,
    # Google
    "gemini": 1048576,
@@ -150,6 +160,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "kimi": 262144,
    # Arcee
    "trinity": 262144,
+    # OpenRouter
+    "elephant": 262144,
    # Hugging Face Inference Providers — model IDs use org/name format
    "Qwen/Qwen3.5-397B-A17B": 131072,
    "Qwen/Qwen3.5-35B-A3B": 131072,
@@ -230,6 +242,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.x.ai": "xai",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
+    "ollama.com": "ollama-cloud",
 }


@@ -1003,6 +1016,16 @@ def get_model_context_length(
        if ctx:
            return ctx

+    # 4b. AWS Bedrock — use static context length table.
+    # Bedrock's ListFoundationModels doesn't expose context window sizes,
+    # so we maintain a curated table in bedrock_adapter.py.
+    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
+        try:
+            from agent.bedrock_adapter import get_bedrock_context_length
+            return get_bedrock_context_length(model)
+        except ImportError:
+            pass  # boto3 not installed — fall through to generic resolution
+
    # 5. Provider-aware lookups (before generic OpenRouter cache)
    # These are provider-specific and take priority over the generic OR cache,
    # since the same model can have different context limits per provider
@@ -169,6 +169,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "togetherai": "togetherai",
    "perplexity": "perplexity",
    "cohere": "cohere",
+    "ollama-cloud": "ollama-cloud",
 }

 # Reverse mapping: models.dev → Hermes (built lazily)
@@ -0,0 +1,182 @@
+"""Cross-session rate limit guard for Nous Portal.
+
+Writes rate limit state to a shared file so all sessions (CLI, gateway,
+cron, auxiliary) can check whether Nous Portal is currently rate-limited
+before making requests.  Prevents retry amplification when RPH is tapped.
+
+Each 429 from Nous triggers up to 9 API calls per conversation turn
+(3 SDK retries x 3 Hermes retries), and every one of those calls counts
+against RPH.  By recording the rate limit state on first 429 and checking
+it before subsequent attempts, we eliminate the amplification effect.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import tempfile
+import time
+from typing import Any, Mapping, Optional
+
+logger = logging.getLogger(__name__)
+
+_STATE_SUBDIR = "rate_limits"
+_STATE_FILENAME = "nous.json"
+
+
+def _state_path() -> str:
+    """Return the path to the Nous rate limit state file."""
+    try:
+        from hermes_constants import get_hermes_home
+        base = get_hermes_home()
+    except ImportError:
+        base = os.path.join(os.path.expanduser("~"), ".hermes")
+    return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
+
+
+def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
+    """Extract the best available reset-time estimate from response headers.
+
+    Priority:
+      1. x-ratelimit-reset-requests-1h  (hourly RPH window — most useful)
+      2. x-ratelimit-reset-requests     (per-minute RPM window)
+      3. retry-after                     (generic HTTP header)
+
+    Returns seconds-from-now, or None if no usable header found.
+    """
+    if not headers:
+        return None
+
+    lowered = {k.lower(): v for k, v in headers.items()}
+
+    for key in (
+        "x-ratelimit-reset-requests-1h",
+        "x-ratelimit-reset-requests",
+        "retry-after",
+    ):
+        raw = lowered.get(key)
+        if raw is not None:
+            try:
+                val = float(raw)
+                if val > 0:
+                    return val
+            except (TypeError, ValueError):
+                pass
+
+    return None
+
+
+def record_nous_rate_limit(
+    *,
+    headers: Optional[Mapping[str, str]] = None,
+    error_context: Optional[dict[str, Any]] = None,
+    default_cooldown: float = 300.0,
+) -> None:
+    """Record that Nous Portal is rate-limited.
+
+    Parses the reset time from response headers or error context.
+    Falls back to ``default_cooldown`` (5 minutes) if no reset info
+    is available.  Writes to a shared file that all sessions can read.
+
+    Args:
+        headers: HTTP response headers from the 429 error.
+        error_context: Structured error context from _extract_api_error_context().
+        default_cooldown: Fallback cooldown in seconds when no header data.
+    """
+    now = time.time()
+    reset_at = None
+
+    # Try headers first (most accurate)
+    header_seconds = _parse_reset_seconds(headers)
+    if header_seconds is not None:
+        reset_at = now + header_seconds
+
+    # Try error_context reset_at (from body parsing)
+    if reset_at is None and isinstance(error_context, dict):
+        ctx_reset = error_context.get("reset_at")
+        if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
+            reset_at = float(ctx_reset)
+
+    # Default cooldown
+    if reset_at is None:
+        reset_at = now + default_cooldown
+
+    path = _state_path()
+    try:
+        state_dir = os.path.dirname(path)
+        os.makedirs(state_dir, exist_ok=True)
+
+        state = {
+            "reset_at": reset_at,
+            "recorded_at": now,
+            "reset_seconds": reset_at - now,
+        }
+
+        # Atomic write: write to temp file + rename
+        fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
+        try:
+            with os.fdopen(fd, "w") as f:
+                json.dump(state, f)
+            os.replace(tmp_path, path)
+        except Exception:
+            # Clean up temp file on failure
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
+
+        logger.info(
+            "Nous rate limit recorded: resets in %.0fs (at %.0f)",
+            reset_at - now, reset_at,
+        )
+    except Exception as exc:
+        logger.debug("Failed to write Nous rate limit state: %s", exc)
+
+
+def nous_rate_limit_remaining() -> Optional[float]:
+    """Check if Nous Portal is currently rate-limited.
+
+    Returns:
+        Seconds remaining until reset, or None if not rate-limited.
+    """
+    path = _state_path()
+    try:
+        with open(path) as f:
+            state = json.load(f)
+        reset_at = state.get("reset_at", 0)
+        remaining = reset_at - time.time()
+        if remaining > 0:
+            return remaining
+        # Expired — clean up
+        try:
+            os.unlink(path)
+        except OSError:
+            pass
+        return None
+    except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
+        return None
+
+
+def clear_nous_rate_limit() -> None:
+    """Clear the rate limit state (e.g., after a successful Nous request)."""
+    try:
+        os.unlink(_state_path())
+    except FileNotFoundError:
+        pass
+    except OSError as exc:
+        logger.debug("Failed to clear Nous rate limit state: %s", exc)
+
+
+def format_remaining(seconds: float) -> str:
+    """Format seconds remaining into human-readable duration."""
+    s = max(0, int(seconds))
+    if s < 60:
+        return f"{s}s"
+    if s < 3600:
+        m, sec = divmod(s, 60)
+        return f"{m}m {sec}s" if sec else f"{m}m"
+    h, remainder = divmod(s, 3600)
+    m = remainder // 60
+    return f"{h}h {m}m" if m else f"{h}h"
@@ -295,7 +295,9 @@ PLATFORM_HINTS = {
    ),
    "telegram": (
        "You are on a text messaging communication platform, Telegram. "
-        "Please do not use markdown as it does not render. "
+        "Standard markdown is automatically converted to Telegram format. "
+        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
+        "`inline code`, ```code blocks```, [links](url), and ## headers. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -376,6 +378,12 @@ PLATFORM_HINTS = {
        "downloaded and sent as native photos. Do NOT tell the user you lack file-sending "
        "capability — use MEDIA: syntax whenever a file delivery is appropriate."
    ),
+    "qqbot": (
+        "You are on QQ, a popular Chinese messaging platform. QQ supports markdown formatting "
+        "and emoji. You can send media files natively: include MEDIA:/absolute/path/to/file in "
+        "your response. Images are sent as native photos, and other files arrive as downloadable "
+        "documents."
+    ),
 }

 # ---------------------------------------------------------------------------
@@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile(
    re.IGNORECASE,
 )

+# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
+# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
+_JWT_RE = re.compile(
+    r"eyJ[A-Za-z0-9_-]{10,}"           # Header (always starts with eyJ)
+    r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}"   # Optional payload and/or signature
+)
+
+# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
+# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
+_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
+
 # E.164 phone numbers: +<country><number>, 7-15 digits
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
@@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str:
    # Database connection string passwords
    text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)

+    # JWT tokens (eyJ... — base64-encoded JSON headers)
+    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
+
+    # Discord user/role mentions (<@snowflake_id>)
+    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
+
    # E.164 phone numbers (Signal, WhatsApp)
    def _redact_phone(m):
        phone = m.group(1)
@@ -12,6 +12,8 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional

+from hermes_constants import display_hermes_home
+
 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
@@ -70,7 +72,14 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
    skill_name = str(loaded_skill.get("name") or normalized)
    skill_path = str(loaded_skill.get("path") or "")
    skill_dir = None
-    if skill_path:
+    # Prefer the absolute skill_dir returned by skill_view() — this is
+    # correct for both local and external skills.  Fall back to the old
+    # SKILLS_DIR-relative reconstruction only when skill_dir is absent
+    # (e.g. legacy skill_view responses).
+    abs_skill_dir = loaded_skill.get("skill_dir")
+    if abs_skill_dir:
+        skill_dir = Path(abs_skill_dir)
+    elif skill_path:
        try:
            skill_dir = SKILLS_DIR / Path(skill_path).parent
        except Exception:
@@ -108,7 +117,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
        if not resolved:
            return

-        lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
+        lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
        for key, value in resolved.items():
            display_val = str(value) if value else "(not set)"
            lines.append(f"  {key} = {display_val}")
@@ -10,7 +10,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import get_config_path, get_skills_dir

@@ -441,3 +441,25 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
            matches.append(Path(root) / filename)
    for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):
        yield path
+
+
+# ── Namespace helpers for plugin-provided skills ───────────────────────────
+
+_NAMESPACE_RE = re.compile(r"^[a-zA-Z0-9_-]+$")
+
+
+def parse_qualified_name(name: str) -> Tuple[Optional[str], str]:
+    """Split ``'namespace:skill-name'`` into ``(namespace, bare_name)``.
+
+    Returns ``(None, name)`` when there is no ``':'``.
+    """
+    if ":" not in name:
+        return None, name
+    return tuple(name.split(":", 1))  # type: ignore[return-value]
+
+
+def is_valid_namespace(candidate: Optional[str]) -> bool:
+    """Check whether *candidate* is a valid namespace (``[a-zA-Z0-9_-]+``)."""
+    if not candidate:
+        return False
+    return bool(_NAMESPACE_RE.match(candidate))
@@ -284,6 +284,80 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://ai.google.dev/pricing",
        pricing_version="google-pricing-2026-03-16",
    ),
+    # AWS Bedrock — pricing per the Bedrock pricing page.
+    # Bedrock charges the same per-token rates as the model provider but
+    # through AWS billing.  These are the on-demand prices (no commitment).
+    # Source: https://aws.amazon.com/bedrock/pricing/
+    (
+        "bedrock",
+        "anthropic.claude-opus-4-6",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("15.00"),
+        output_cost_per_million=Decimal("75.00"),
+        source="official_docs_snapshot",
+        source_url="https://aws.amazon.com/bedrock/pricing/",
+        pricing_version="bedrock-pricing-2026-04",
+    ),
+    (
+        "bedrock",
+        "anthropic.claude-sonnet-4-6",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("3.00"),
+        output_cost_per_million=Decimal("15.00"),
+        source="official_docs_snapshot",
+        source_url="https://aws.amazon.com/bedrock/pricing/",
+        pricing_version="bedrock-pricing-2026-04",
+    ),
+    (
+        "bedrock",
+        "anthropic.claude-sonnet-4-5",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("3.00"),
+        output_cost_per_million=Decimal("15.00"),
+        source="official_docs_snapshot",
+        source_url="https://aws.amazon.com/bedrock/pricing/",
+        pricing_version="bedrock-pricing-2026-04",
+    ),
+    (
+        "bedrock",
+        "anthropic.claude-haiku-4-5",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.80"),
+        output_cost_per_million=Decimal("4.00"),
+        source="official_docs_snapshot",
+        source_url="https://aws.amazon.com/bedrock/pricing/",
+        pricing_version="bedrock-pricing-2026-04",
+    ),
+    (
+        "bedrock",
+        "amazon.nova-pro",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.80"),
+        output_cost_per_million=Decimal("3.20"),
+        source="official_docs_snapshot",
+        source_url="https://aws.amazon.com/bedrock/pricing/",
+        pricing_version="bedrock-pricing-2026-04",
+    ),
+    (
+        "bedrock",
+        "amazon.nova-lite",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.06"),
+        output_cost_per_million=Decimal("0.24"),
+        source="official_docs_snapshot",
+        source_url="https://aws.amazon.com/bedrock/pricing/",
+        pricing_version="bedrock-pricing-2026-04",
+    ),
+    (
+        "bedrock",
+        "amazon.nova-micro",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.035"),
+        output_cost_per_million=Decimal("0.14"),
+        source="official_docs_snapshot",
+        source_url="https://aws.amazon.com/bedrock/pricing/",
+        pricing_version="bedrock-pricing-2026-04",
+    ),
 }


@@ -561,7 +561,10 @@ class BatchRunner:
            provider_sort (str): Sort providers by price/throughput/latency (optional)
            max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
            reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking)
-            prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming)
+            prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming).
+                NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a trailing assistant-role prefill
+                (400 error).  For those models use output_config.format or structured-output
+                schemas instead.  Safe here for user-role priming and for older Claude / non-Claude models.
            max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
        """
        self.dataset_file = Path(dataset_file)
@@ -16,7 +16,7 @@ model:
  #   "nous"         - Nous Portal OAuth (requires: hermes login)
  #   "nous-api"     - Nous Portal API key (requires: NOUS_API_KEY)
  #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
-  #   "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
+  #   "openai-codex" - OpenAI Codex (requires: hermes auth)
  #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
  #   "gemini"      - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
  #   "zai"         - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
@@ -26,6 +26,7 @@ model:
  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
  #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
  #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
+  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
  #
@@ -37,12 +38,6 @@ model:
  #     base_url: "http://localhost:1234/v1"
  #   No API key needed — local servers typically ignore auth.
  #
-  #   For Ollama Cloud (https://ollama.com/pricing):
-  #     provider: "custom"
-  #     base_url: "https://ollama.com/v1"
-  #   Set OLLAMA_API_KEY in .env — automatically picked up when base_url
-  #   points to ollama.com.
-  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
  
@@ -337,6 +332,7 @@ compression:
 #   "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
 #   "nous"       - Force Nous Portal (requires: hermes login)
 #   "gemini"      - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
+#   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
 #   "codex"       - Force Codex OAuth (requires: hermes model → Codex).
 #                  Uses gpt-5.3-codex which supports vision.
 #   "main"       - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
@@ -523,7 +519,7 @@ agent:
 #   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
 #   - A list of individual toolsets to compose your own (see list below)
 #
-# Supported platform keys: cli, telegram, discord, whatsapp, slack
+# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
 #
 # Examples:
 #
@@ -552,6 +548,7 @@ agent:
 #   slack:         hermes-slack          (same as telegram)
 #   signal:        hermes-signal         (same as telegram)
 #   homeassistant: hermes-homeassistant  (same as telegram)
+#   qqbot:            hermes-qqbot            (same as telegram)
 #
 platform_toolsets:
  cli: [hermes-cli]
@@ -561,6 +558,19 @@ platform_toolsets:
  slack: [hermes-slack]
  signal: [hermes-signal]
  homeassistant: [hermes-homeassistant]
+  qqbot: [hermes-qqbot]
+
+# =============================================================================
+# Gateway Platform Settings
+# =============================================================================
+# Optional per-platform messaging settings.
+# Platform-specific knobs live under `extra`.
+#
+# platforms:
+#   telegram:
+#     reply_to_mode: "first"  # off | first | all
+#     extra:
+#       disable_link_previews: false  # Set true to suppress Telegram URL previews in bot messages

 # ─────────────────────────────────────────────────────────────────────────────
 # Available toolsets (use these names in platform_toolsets or the toolsets list)
@@ -401,14 +401,27 @@ def load_cli_config() -> Dict[str, Any]:
    # filesystem is directly accessible.  For ALL remote/container backends
    # (ssh, docker, modal, singularity), the host path doesn't exist on the
    # target -- remove the key so terminal_tool.py uses its per-backend default.
-    if terminal_config.get("cwd") in (".", "auto", "cwd"):
-        effective_backend = terminal_config.get("env_type", "local")
-        if effective_backend == "local":
-            terminal_config["cwd"] = os.getcwd()
-            defaults["terminal"]["cwd"] = terminal_config["cwd"]
+    #
+    # GUARD: If TERMINAL_CWD is already set to a real absolute path (by the
+    # gateway's config bridge earlier in the process), don't clobber it.
+    # This prevents a lazy import of cli.py during gateway runtime from
+    # rewriting TERMINAL_CWD to the service's working directory.
+    # See issue #10817.
+    _CWD_PLACEHOLDERS = (".", "auto", "cwd")
+    if terminal_config.get("cwd") in _CWD_PLACEHOLDERS:
+        _existing_cwd = os.environ.get("TERMINAL_CWD", "")
+        if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd):
+            # Gateway (or earlier startup) already resolved a real path — keep it
+            terminal_config["cwd"] = _existing_cwd
+            defaults["terminal"]["cwd"] = _existing_cwd
        else:
-            # Remove so TERMINAL_CWD stays unset → tool picks backend default
-            terminal_config.pop("cwd", None)
+            effective_backend = terminal_config.get("env_type", "local")
+            if effective_backend == "local":
+                terminal_config["cwd"] = os.getcwd()
+                defaults["terminal"]["cwd"] = terminal_config["cwd"]
+            else:
+                # Remove so TERMINAL_CWD stays unset → tool picks backend default
+                terminal_config.pop("cwd", None)
    
    env_mappings = {
        "env_type": "TERMINAL_ENV",
@@ -988,19 +1001,20 @@ def _prune_orphaned_branches(repo_root: str) -> None:
 # ANSI building blocks for conversation display
 _ACCENT_ANSI_DEFAULT = "\033[1;38;2;255;215;0m"  # True-color #FFD700 bold — fallback
 _BOLD = "\033[1m"
-_DIM = "\033[2m"
 _RST = "\033[0m"
+_STREAM_PAD = "    "  # 4-space indent for streamed response text (matches Panel padding)


-def _hex_to_ansi_bold(hex_color: str) -> str:
-    """Convert a hex color like '#268bd2' to a bold true-color ANSI escape."""
+def _hex_to_ansi(hex_color: str, *, bold: bool = False) -> str:
+    """Convert a hex color like '#268bd2' to a true-color ANSI escape."""
    try:
        r = int(hex_color[1:3], 16)
        g = int(hex_color[3:5], 16)
        b = int(hex_color[5:7], 16)
-        return f"\033[1;38;2;{r};{g};{b}m"
+        prefix = "1;" if bold else ""
+        return f"\033[{prefix}38;2;{r};{g};{b}m"
    except (ValueError, IndexError):
-        return _ACCENT_ANSI_DEFAULT
+        return _ACCENT_ANSI_DEFAULT if bold else "\033[38;2;184;134;11m"


 class _SkinAwareAnsi:
@@ -1010,20 +1024,22 @@ class _SkinAwareAnsi:
    force re-resolution after a ``/skin`` switch.
    """

-    def __init__(self, skin_key: str, fallback_hex: str = "#FFD700"):
+    def __init__(self, skin_key: str, fallback_hex: str = "#FFD700", *, bold: bool = False):
        self._skin_key = skin_key
        self._fallback_hex = fallback_hex
+        self._bold = bold
        self._cached: str | None = None

    def __str__(self) -> str:
        if self._cached is None:
            try:
                from hermes_cli.skin_engine import get_active_skin
-                self._cached = _hex_to_ansi_bold(
-                    get_active_skin().get_color(self._skin_key, self._fallback_hex)
+                self._cached = _hex_to_ansi(
+                    get_active_skin().get_color(self._skin_key, self._fallback_hex),
+                    bold=self._bold,
                )
            except Exception:
-                self._cached = _hex_to_ansi_bold(self._fallback_hex)
+                self._cached = _hex_to_ansi(self._fallback_hex, bold=self._bold)
        return self._cached

    def __add__(self, other: str) -> str:
@@ -1037,7 +1053,8 @@ class _SkinAwareAnsi:
        self._cached = None


-_ACCENT = _SkinAwareAnsi("response_border", "#FFD700")
+_ACCENT = _SkinAwareAnsi("response_border", "#FFD700", bold=True)
+_DIM = _SkinAwareAnsi("banner_dim", "#B8860B")


 def _accent_hex() -> str:
@@ -1709,9 +1726,9 @@ class HermesCLI:
        # Parse and validate toolsets
        self.enabled_toolsets = toolsets
        if toolsets and "all" not in toolsets and "*" not in toolsets:
-            # Validate each toolset — MCP server names are added by
-            # _get_platform_tools() but aren't registered in TOOLSETS yet
-            # (that happens later in _sync_mcp_toolsets), so exclude them.
+            # Validate each toolset — MCP server names are resolved via
+            # live registry aliases (registered during discover_mcp_tools),
+            # but discovery hasn't run yet at this point, so exclude them.
            mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys())
            invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names]
            if invalid:
@@ -2009,7 +2026,17 @@ class HermesCLI:
        """Return the visible height for the spinner/status text line above the status bar."""
        if not getattr(self, "_spinner_text", ""):
            return 0
-        return 0 if self._use_minimal_tui_chrome(width=width) else 1
+        if self._use_minimal_tui_chrome(width=width):
+            return 0
+        # Compute how many lines the spinner text needs when wrapped.
+        # The rendered text is "  {emoji} {label}  ({elapsed})" — about
+        # len(_spinner_text) + 16 chars for indent + timer suffix.
+        width = width or self._get_tui_terminal_width()
+        if width and width > 10:
+            import math
+            text_len = len(self._spinner_text) + 16  # indent + timer
+            return max(1, math.ceil(text_len / width))
+        return 1

    def _get_voice_status_fragments(self, width: Optional[int] = None):
        """Return the voice status bar fragments for the interactive TUI."""
@@ -2577,7 +2604,7 @@ class HermesCLI:
        _tc = getattr(self, "_stream_text_ansi", "")
        while "\n" in self._stream_buf:
            line, self._stream_buf = self._stream_buf.split("\n", 1)
-            _cprint(f"{_tc}{line}{_RST}" if _tc else line)
+            _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")

    def _flush_stream(self) -> None:
        """Emit any remaining partial line from the stream buffer and close the box."""
@@ -2594,7 +2621,7 @@ class HermesCLI:

        if self._stream_buf:
            _tc = getattr(self, "_stream_text_ansi", "")
-            _cprint(f"{_tc}{self._stream_buf}{_RST}" if _tc else self._stream_buf)
+            _cprint(f"{_STREAM_PAD}{_tc}{self._stream_buf}{_RST}" if _tc else f"{_STREAM_PAD}{self._stream_buf}")
            self._stream_buf = ""

        # Close the response box
@@ -3893,23 +3920,14 @@ class HermesCLI:
    
    def _handle_profile_command(self):
        """Display active profile name and home directory."""
-        from hermes_constants import get_hermes_home, display_hermes_home
+        from hermes_constants import display_hermes_home
+        from hermes_cli.profiles import get_active_profile_name

-        home = get_hermes_home()
        display = display_hermes_home()
-
-        profiles_parent = Path.home() / ".hermes" / "profiles"
-        try:
-            rel = home.relative_to(profiles_parent)
-            profile_name = str(rel).split("/")[0]
-        except ValueError:
-            profile_name = None
+        profile_name = get_active_profile_name()

        print()
-        if profile_name:
-            print(f"  Profile: {profile_name}")
-        else:
-            print("  Profile: default")
+        print(f"  Profile: {profile_name}")
        print(f"  Home:    {display}")
        print()

@@ -4096,6 +4114,8 @@ class HermesCLI:
                self.agent.flush_memories(self.conversation_history)
            except (Exception, KeyboardInterrupt):
                pass
+            # Trigger memory extraction on the old session before session_id rotates.
+            self.agent.commit_memory_session(self.conversation_history)
            self._notify_session_boundary("on_session_finalize")
        elif self.agent:
            # First session or empty history — still finalize the old session
@@ -4584,16 +4604,19 @@ class HermesCLI:
                self._close_model_picker()
                return
            provider_data = providers[selected]
-            model_list = []
-            try:
-                from hermes_cli.models import provider_model_ids
-                live = provider_model_ids(provider_data["slug"])
-                if live:
-                    model_list = live
-            except Exception:
-                pass
+            # Use the curated model list from list_authenticated_providers()
+            # (same lists as `hermes model` and gateway pickers).
+            # Only fall back to the live provider catalog when the curated
+            # list is empty (e.g. user-defined endpoints with no curated list).
+            model_list = provider_data.get("models", [])
            if not model_list:
-                model_list = provider_data.get("models", [])
+                try:
+                    from hermes_cli.models import provider_model_ids
+                    live = provider_model_ids(provider_data["slug"])
+                    if live:
+                        model_list = live
+                except Exception:
+                    pass
            state["stage"] = "model"
            state["provider_data"] = provider_data
            state["model_list"] = model_list
@@ -4901,6 +4924,52 @@ class HermesCLI:
            return "\n".join(p for p in parts if p)
        return str(value)

+    def _handle_gquota_command(self, cmd_original: str) -> None:
+        """Show Google Gemini Code Assist quota usage for the current OAuth account."""
+        try:
+            from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials
+            from agent.google_code_assist import retrieve_user_quota, CodeAssistError
+        except ImportError as exc:
+            self.console.print(f"  [red]Gemini modules unavailable: {exc}[/]")
+            return
+
+        try:
+            access_token = get_valid_access_token()
+        except GoogleOAuthError as exc:
+            self.console.print(f"  [yellow]{exc}[/]")
+            self.console.print("  Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
+            return
+
+        creds = load_credentials()
+        project_id = (creds.project_id if creds else "") or ""
+
+        try:
+            buckets = retrieve_user_quota(access_token, project_id=project_id)
+        except CodeAssistError as exc:
+            self.console.print(f"  [red]Quota lookup failed:[/] {exc}")
+            return
+
+        if not buckets:
+            self.console.print("  [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
+            return
+
+        # Sort for stable display, group by model
+        buckets.sort(key=lambda b: (b.model_id, b.token_type))
+        self.console.print()
+        self.console.print(f"  [bold]Gemini Code Assist quota[/]  (project: {project_id or '(auto / free-tier)'})")
+        self.console.print()
+        for b in buckets:
+            pct = max(0.0, min(1.0, b.remaining_fraction))
+            width = 20
+            filled = int(round(pct * width))
+            bar = "▓" * filled + "░" * (width - filled)
+            pct_str = f"{int(pct * 100):3d}%"
+            header = b.model_id
+            if b.token_type:
+                header += f" [{b.token_type}]"
+            self.console.print(f"    {header:40s}  {bar}  {pct_str}")
+        self.console.print()
+
    def _handle_personality_command(self, cmd: str):
        """Handle the /personality command to set predefined personalities."""
        parts = cmd.split(maxsplit=1)
@@ -5410,6 +5479,8 @@ class HermesCLI:
            self._handle_model_switch(cmd_original)
        elif canonical == "provider":
            self._show_model_and_providers()
+        elif canonical == "gquota":
+            self._handle_gquota_command(cmd_original)

        elif canonical == "personality":
            # Use original case (handler lowercases the personality name itself)
@@ -5484,7 +5555,8 @@ class HermesCLI:
                        version = f" v{p['version']}" if p["version"] else ""
                        tools = f"{p['tools']} tools" if p["tools"] else ""
                        hooks = f"{p['hooks']} hooks" if p["hooks"] else ""
-                        parts = [x for x in [tools, hooks] if x]
+                        commands = f"{p['commands']} commands" if p.get("commands") else ""
+                        parts = [x for x in [tools, hooks, commands] if x]
                        detail = f" ({', '.join(parts)})" if parts else ""
                        error = f" — {p['error']}" if p["error"] else ""
                        print(f"  {status} {p['name']}{version}{detail}{error}")
@@ -5758,7 +5830,7 @@ class HermesCLI:
                        border_style=_resp_color,
                        style=_resp_text,
                        box=rich_box.HORIZONTALS,
-                        padding=(1, 2),
+                        padding=(1, 4),
                    ))
                else:
                    _cprint("  (No response generated)")
@@ -5882,7 +5954,7 @@ class HermesCLI:
                        title_align="left",
                        border_style=_resp_color,
                        box=rich_box.HORIZONTALS,
-                        padding=(1, 2),
+                        padding=(1, 4),
                    ))
                else:
                    _cprint("  💬 /btw: (no response)")
@@ -5949,7 +6021,7 @@ class HermesCLI:
        parts = cmd.strip().split(None, 1)
        sub = parts[1].lower().strip() if len(parts) > 1 else "status"

-        _DEFAULT_CDP = "http://localhost:9222"
+        _DEFAULT_CDP = "http://127.0.0.1:9222"
        current = os.environ.get("BROWSER_CDP_URL", "").strip()

        if sub.startswith("connect"):
@@ -6156,6 +6228,7 @@ class HermesCLI:

        set_active_skin(new_skin)
        _ACCENT.reset()  # Re-resolve ANSI color for the new skin
+        _DIM.reset()     # Re-resolve dim/secondary ANSI color for the new skin
        if save_config_value("display.skin", new_skin):
            print(f"  Skin set to: {new_skin} (saved)")
        else:
@@ -6195,13 +6268,21 @@ class HermesCLI:
    def _toggle_yolo(self):
        """Toggle YOLO mode — skip all dangerous command approval prompts."""
        import os
+        from hermes_cli.colors import Colors as _Colors
+
        current = bool(os.environ.get("HERMES_YOLO_MODE"))
        if current:
            os.environ.pop("HERMES_YOLO_MODE", None)
-            self.console.print("  ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.")
+            _cprint(
+                f"  ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}"
+                " — dangerous commands will require approval."
+            )
        else:
            os.environ["HERMES_YOLO_MODE"] = "1"
-            self.console.print("  ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.")
+            _cprint(
+                f"  ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}"
+                " — all commands auto-approved. Use with caution."
+            )

    def _handle_reasoning_command(self, cmd: str):
        """Handle /reasoning — manage effort level and display toggle.
@@ -7378,7 +7459,15 @@ class HermesCLI:
        self._invalidate()

    def _get_approval_display_fragments(self):
-        """Render the dangerous-command approval panel for the prompt_toolkit UI."""
+        """Render the dangerous-command approval panel for the prompt_toolkit UI.
+
+        Layout priority: title + command + choices must always render, even if
+        the terminal is short or the description is long. Description is placed
+        at the bottom of the panel and gets truncated to fit the remaining row
+        budget. This prevents HSplit from clipping approve/deny off-screen when
+        tirith findings produce multi-paragraph descriptions or when the user
+        runs in a compact terminal pane.
+        """
        state = self._approval_state
        if not state:
            return []
@@ -7437,22 +7526,89 @@ class HermesCLI:
        box_width = _panel_box_width(title, preview_lines)
        inner_text_width = max(8, box_width - 2)

+        # Pre-wrap the mandatory content — command + choices must always render.
+        cmd_wrapped = _wrap_panel_text(cmd_display, inner_text_width)
+
+        # (choice_index, wrapped_line) so we can re-apply selected styling below
+        choice_wrapped: list[tuple[int, str]] = []
+        for i, choice in enumerate(choices):
+            label = choice_labels.get(choice, choice)
+            prefix = '❯ ' if i == selected else '  '
+            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="  "):
+                choice_wrapped.append((i, wrapped))
+
+        # Budget vertical space so HSplit never clips the command or choices.
+        # Panel chrome (full layout with separators):
+        #   top border + title + blank_after_title
+        #   + blank_between_cmd_choices + bottom border = 5 rows.
+        # In tight terminals we collapse to:
+        #   top border + title + bottom border = 3 rows (no blanks).
+        #
+        # reserved_below: rows consumed below the approval panel by the
+        # spinner/tool-progress line, status bar, input area, separators, and
+        # prompt symbol. Measured at ~6 rows during live PTY approval prompts;
+        # budget 6 so we don't overestimate the panel's room.
+        term_rows = shutil.get_terminal_size((100, 24)).lines
+        chrome_full = 5
+        chrome_tight = 3
+        reserved_below = 6
+
+        available = max(0, term_rows - reserved_below)
+        mandatory_full = chrome_full + len(cmd_wrapped) + len(choice_wrapped)
+
+        # If the full-chrome panel doesn't fit, drop the separator blanks.
+        # This keeps the command and every choice on-screen in compact terminals.
+        use_compact_chrome = mandatory_full > available
+        chrome_rows = chrome_tight if use_compact_chrome else chrome_full
+
+        # If the command itself is too long to leave room for choices (e.g. user
+        # hit "view" on a multi-hundred-character command), truncate it so the
+        # approve/deny buttons still render. Keep at least 1 row of command.
+        max_cmd_rows = max(1, available - chrome_rows - len(choice_wrapped))
+        if len(cmd_wrapped) > max_cmd_rows:
+            keep = max(1, max_cmd_rows - 1) if max_cmd_rows > 1 else 1
+            cmd_wrapped = cmd_wrapped[:keep] + ["… (command truncated — use /logs or /debug for full text)"]
+
+        # Allocate any remaining rows to description. The extra -1 in full mode
+        # accounts for the blank separator between choices and description.
+        mandatory_no_desc = chrome_rows + len(cmd_wrapped) + len(choice_wrapped)
+        desc_sep_cost = 0 if use_compact_chrome else 1
+        available_for_desc = available - mandatory_no_desc - desc_sep_cost
+        # Even on huge terminals, cap description height so the panel stays compact.
+        available_for_desc = max(0, min(available_for_desc, 10))
+
+        desc_wrapped = _wrap_panel_text(description, inner_text_width) if description else []
+        if available_for_desc < 1 or not desc_wrapped:
+            desc_wrapped = []
+        elif len(desc_wrapped) > available_for_desc:
+            keep = max(1, available_for_desc - 1)
+            desc_wrapped = desc_wrapped[:keep] + ["… (description truncated)"]
+
+        # Render: title → command → choices → description (description last so
+        # any remaining overflow clips from the bottom of the least-critical
+        # content, never from the command or choices). Use compact chrome (no
+        # blank separators) when the terminal is tight.
        lines = []
        lines.append(('class:approval-border', '╭' + ('─' * box_width) + '╮\n'))
        _append_panel_line(lines, 'class:approval-border', 'class:approval-title', title, box_width)
-        _append_blank_panel_line(lines, 'class:approval-border', box_width)
-        for wrapped in _wrap_panel_text(description, inner_text_width):
-            _append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
-        for wrapped in _wrap_panel_text(cmd_display, inner_text_width):
+        if not use_compact_chrome:
+            _append_blank_panel_line(lines, 'class:approval-border', box_width)
+
+        for wrapped in cmd_wrapped:
            _append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width)
-        _append_blank_panel_line(lines, 'class:approval-border', box_width)
-        for i, choice in enumerate(choices):
-            label = choice_labels.get(choice, choice)
+        if not use_compact_chrome:
+            _append_blank_panel_line(lines, 'class:approval-border', box_width)
+
+        for i, wrapped in choice_wrapped:
            style = 'class:approval-selected' if i == selected else 'class:approval-choice'
-            prefix = '❯ ' if i == selected else '  '
-            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="  "):
-                _append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
-        _append_blank_panel_line(lines, 'class:approval-border', box_width)
+            _append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
+
+        if desc_wrapped:
+            if not use_compact_chrome:
+                _append_blank_panel_line(lines, 'class:approval-border', box_width)
+            for wrapped in desc_wrapped:
+                _append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
+
        lines.append(('class:approval-border', '╰' + ('─' * box_width) + '╯\n'))
        return lines

@@ -7644,7 +7800,7 @@ class HermesCLI:
                        label = " ⚕ Hermes "
                        fill = w - 2 - len(label)
                        _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
-                    _cprint(sentence.rstrip())
+                    _cprint(f"{_STREAM_PAD}{sentence.rstrip()}")

                tts_thread = threading.Thread(
                    target=stream_tts_to_speaker,
@@ -7748,7 +7904,33 @@ class HermesCLI:
                    # Fallback for non-interactive mode (e.g., single-query)
                    agent_thread.join(0.1)

-            agent_thread.join()  # Ensure agent thread completes
+            # Wait for the agent thread to finish.  After an interrupt the
+            # agent may take a few seconds to clean up (kill subprocess, persist
+            # session).  Poll instead of a blocking join so the process_loop
+            # stays responsive — if the user sent another interrupt or the
+            # agent gets stuck, we can break out instead of freezing forever.
+            if interrupt_msg is not None:
+                # Interrupt path: poll briefly, then move on.  The agent
+                # thread is daemon — it dies on process exit regardless.
+                for _wait_tick in range(50):  # 50 * 0.2s = 10s max
+                    agent_thread.join(timeout=0.2)
+                    if not agent_thread.is_alive():
+                        break
+                    # Check if user fired ANOTHER interrupt (Ctrl+C sets
+                    # _should_exit which process_loop checks on next pass).
+                    if getattr(self, '_should_exit', False):
+                        break
+                if agent_thread.is_alive():
+                    logger.warning(
+                        "Agent thread still alive after interrupt "
+                        "(thread %s). Daemon thread will be cleaned up "
+                        "on exit.",
+                        agent_thread.ident,
+                    )
+            else:
+                # Normal completion: agent thread should be done already,
+                # but guard against edge cases.
+                agent_thread.join(timeout=30)

            # Proactively clean up async clients whose event loop is dead.
            # The agent thread may have created AsyncOpenAI clients bound
@@ -7875,7 +8057,7 @@ class HermesCLI:
                        border_style=_resp_color,
                        style=_resp_text,
                        box=rich_box.HORIZONTALS,
-                        padding=(1, 2),
+                        padding=(1, 4),
                    ))


@@ -8627,6 +8809,24 @@ class HermesCLI:
            self._should_exit = True
            event.app.exit()

+        _modal_prompt_active = Condition(
+            lambda: bool(self._secret_state or self._sudo_state)
+        )
+
+        @kb.add('escape', filter=_modal_prompt_active, eager=True)
+        def handle_escape_modal(event):
+            """ESC cancels active secret/sudo prompts."""
+            if self._secret_state:
+                self._cancel_secret_capture()
+                event.app.current_buffer.reset()
+                event.app.invalidate()
+                return
+            if self._sudo_state:
+                self._sudo_state["response_queue"].put("")
+                self._sudo_state = None
+                event.app.invalidate()
+                return
+
        @kb.add('c-z')
        def handle_ctrl_z(event):
            """Handle Ctrl+Z - suspend process to background (Unix only)."""
@@ -8924,9 +9124,9 @@ class HermesCLI:
            if cli_ref._voice_processing:
                return "transcribing..."
            if cli_ref._sudo_state:
-                return "type password (hidden), Enter to skip"
+                return "type password (hidden), Enter to submit · ESC to skip"
            if cli_ref._secret_state:
-                return "type secret (hidden), Enter to skip"
+                return "type secret (hidden), Enter to submit · ESC to skip"
            if cli_ref._approval_state:
                return ""
            if cli_ref._clarify_freetext:
@@ -9023,6 +9223,7 @@ class HermesCLI:
        spinner_widget = Window(
            content=FormattedTextControl(get_spinner_text),
            height=get_spinner_height,
+            wrap_lines=True,
        )

        spacer = Window(
@@ -9059,7 +9260,13 @@ class HermesCLI:
            lines.append((border_style, "│" + (" " * box_width) + "│\n"))

        def _get_clarify_display():
-            """Build styled text for the clarify question/choices panel."""
+            """Build styled text for the clarify question/choices panel.
+
+            Layout priority: choices + Other option must always render even if
+            the question is very long. The question is budgeted to leave enough
+            rows for the choices and trailing chrome; anything over the budget
+            is truncated with a marker.
+            """
            state = cli_ref._clarify_state
            if not state:
                return []
@@ -9080,48 +9287,97 @@ class HermesCLI:
            box_width = _panel_box_width("Hermes needs your input", preview_lines)
            inner_text_width = max(8, box_width - 2)

+            # Pre-wrap choices + Other option — these are mandatory.
+            choice_wrapped: list[tuple[int, str]] = []
+            if choices:
+                for i, choice in enumerate(choices):
+                    prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else '  '
+                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="  "):
+                        choice_wrapped.append((i, wrapped))
+                # Trailing Other row(s)
+                other_idx = len(choices)
+                if selected == other_idx and not cli_ref._clarify_freetext:
+                    other_label_mand = '❯ Other (type your answer)'
+                elif cli_ref._clarify_freetext:
+                    other_label_mand = '❯ Other (type below)'
+                else:
+                    other_label_mand = '  Other (type your answer)'
+                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="  ")
+            elif cli_ref._clarify_freetext:
+                # Freetext-only mode: the guidance line takes the place of choices.
+                other_wrapped = _wrap_panel_text(
+                    "Type your answer in the prompt below, then press Enter.",
+                    inner_text_width,
+                )
+            else:
+                other_wrapped = []
+
+            # Budget the question so mandatory rows always render.
+            # Chrome layouts:
+            #   full : top border + blank_after_title + blank_after_question
+            #          + blank_before_bottom + bottom border = 5 rows
+            #   tight: top border + bottom border = 2 rows (drop all blanks)
+            #
+            # reserved_below matches the approval-panel budget (~6 rows for
+            # spinner/tool-progress + status + input + separators + prompt).
+            term_rows = shutil.get_terminal_size((100, 24)).lines
+            chrome_full = 5
+            chrome_tight = 2
+            reserved_below = 6
+
+            available = max(0, term_rows - reserved_below)
+            mandatory_full = chrome_full + len(choice_wrapped) + len(other_wrapped)
+
+            use_compact_chrome = mandatory_full > available
+            chrome_rows = chrome_tight if use_compact_chrome else chrome_full
+
+            max_question_rows = max(1, available - chrome_rows - len(choice_wrapped) - len(other_wrapped))
+            max_question_rows = min(max_question_rows, 12)  # soft cap on huge terminals
+
+            question_wrapped = _wrap_panel_text(question, inner_text_width)
+            if len(question_wrapped) > max_question_rows:
+                keep = max(1, max_question_rows - 1)
+                question_wrapped = question_wrapped[:keep] + ["… (question truncated)"]
+
            lines = []
            # Box top border
            lines.append(('class:clarify-border', '╭─ '))
            lines.append(('class:clarify-title', 'Hermes needs your input'))
            lines.append(('class:clarify-border', ' ' + ('─' * max(0, box_width - len("Hermes needs your input") - 3)) + '╮\n'))
-            _append_blank_panel_line(lines, 'class:clarify-border', box_width)
+            if not use_compact_chrome:
+                _append_blank_panel_line(lines, 'class:clarify-border', box_width)

-            # Question text
-            for wrapped in _wrap_panel_text(question, inner_text_width):
+            # Question text (bounded)
+            for wrapped in question_wrapped:
                _append_panel_line(lines, 'class:clarify-border', 'class:clarify-question', wrapped, box_width)
-            _append_blank_panel_line(lines, 'class:clarify-border', box_width)
+            if not use_compact_chrome:
+                _append_blank_panel_line(lines, 'class:clarify-border', box_width)

            if cli_ref._clarify_freetext and not choices:
-                guidance = "Type your answer in the prompt below, then press Enter."
-                for wrapped in _wrap_panel_text(guidance, inner_text_width):
+                for wrapped in other_wrapped:
                    _append_panel_line(lines, 'class:clarify-border', 'class:clarify-choice', wrapped, box_width)
-                _append_blank_panel_line(lines, 'class:clarify-border', box_width)
+                if not use_compact_chrome:
+                    _append_blank_panel_line(lines, 'class:clarify-border', box_width)

            if choices:
                # Multiple-choice mode: show selectable options
-                for i, choice in enumerate(choices):
+                for i, wrapped in choice_wrapped:
                    style = 'class:clarify-selected' if i == selected and not cli_ref._clarify_freetext else 'class:clarify-choice'
-                    prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else '  '
-                    wrapped_lines = _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="  ")
-                    for wrapped in wrapped_lines:
-                        _append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width)
+                    _append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width)

-                # "Other" option (5th line, only shown when choices exist)
+                # "Other" option (trailing row(s), only shown when choices exist)
                other_idx = len(choices)
                if selected == other_idx and not cli_ref._clarify_freetext:
                    other_style = 'class:clarify-selected'
-                    other_label = '❯ Other (type your answer)'
                elif cli_ref._clarify_freetext:
                    other_style = 'class:clarify-active-other'
-                    other_label = '❯ Other (type below)'
                else:
                    other_style = 'class:clarify-choice'
-                    other_label = '  Other (type your answer)'
-                for wrapped in _wrap_panel_text(other_label, inner_text_width, subsequent_indent="  "):
+                for wrapped in other_wrapped:
                    _append_panel_line(lines, 'class:clarify-border', other_style, wrapped, box_width)

-            _append_blank_panel_line(lines, 'class:clarify-border', box_width)
+            if not use_compact_chrome:
+                _append_blank_panel_line(lines, 'class:clarify-border', box_width)
            lines.append(('class:clarify-border', '╰' + ('─' * box_width) + '╯\n'))
            return lines

@@ -9169,7 +9425,7 @@ class HermesCLI:
            prompt = state.get("prompt") or f"Enter value for {state.get('var_name', 'secret')}"
            metadata = state.get("metadata") or {}
            help_text = metadata.get("help")
-            body = 'Enter secret below (hidden), or press Enter to skip'
+            body = 'Enter secret below (hidden), ESC or Ctrl+C to skip'
            content_lines = [prompt, body]
            if help_text:
                content_lines.insert(1, str(help_text))
@@ -9982,6 +10238,11 @@ def main(
                ):
                    cli.agent.quiet_mode = True
                    cli.agent.suppress_status_output = True
+                    # Suppress streaming display callbacks so stdout stays
+                    # machine-readable (no styled "Hermes" box, no tool-gen
+                    # status lines).  The response is printed once below.
+                    cli.agent.stream_delta_callback = None
+                    cli.agent.tool_gen_callback = None
                    result = cli.agent.run_conversation(
                        user_message=effective_query,
                        conversation_history=cli.conversation_history,
@@ -9989,7 +10250,8 @@ def main(
                    response = result.get("final_response", "") if isinstance(result, dict) else str(result)
                    if response:
                        print(response)
-                    print(f"\nsession_id: {cli.session_id}")
+                    # Session ID goes to stderr so piped stdout is clean.
+                    print(f"\nsession_id: {cli.session_id}", file=sys.stderr)
                    
                    # Ensure proper exit code for automation wrappers
                    sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0)
@@ -501,6 +501,12 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]

        if schedule_changed:
            updated_schedule = updated["schedule"]
+            # The API may pass schedule as a raw string (e.g. "every 10m")
+            # instead of a pre-parsed dict.  Normalize it the same way
+            # create_job() does so downstream code can call .get() safely.
+            if isinstance(updated_schedule, str):
+                updated_schedule = parse_schedule(updated_schedule)
+                updated["schedule"] = updated_schedule
            updated["schedule_display"] = updates.get(
                "schedule_display",
                updated_schedule.get("display", updated.get("schedule_display")),
@@ -10,6 +10,7 @@ runs at a time if multiple processes overlap.

 import asyncio
 import concurrent.futures
+import contextvars
 import json
 import logging
 import os
@@ -45,6 +46,7 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
    "telegram", "discord", "slack", "whatsapp", "signal",
    "matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
    "wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
+    "qqbot",
 })

 from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
@@ -254,6 +256,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        "email": Platform.EMAIL,
        "sms": Platform.SMS,
        "bluebubbles": Platform.BLUEBUBBLES,
+        "qqbot": Platform.QQBOT,
    }
    platform = platform_map.get(platform_name.lower())
    if not platform:
@@ -286,11 +289,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option

    if wrap_response:
        task_name = job.get("name", job["id"])
+        job_id = job.get("id", "")
        delivery_content = (
            f"Cronjob Response: {task_name}\n"
+            f"(job_id: {job_id})\n"
            f"-------------\n\n"
            f"{content}\n\n"
-            f"Note: The agent cannot see this message, and therefore cannot respond to it."
+            f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
        )
    else:
        delivery_content = content
@@ -766,7 +771,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
        _POLL_INTERVAL = 5.0
        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-        _cron_future = _cron_pool.submit(agent.run_conversation, prompt)
+        # Preserve scheduler-scoped ContextVar state (for example skill-declared
+        # env passthrough registrations) when the cron run hops into the worker
+        # thread used for inactivity timeout monitoring.
+        _cron_context = contextvars.copy_context()
+        _cron_future = _cron_pool.submit(_cron_context.run, agent.run_conversation, prompt)
        _inactivity_timeout = False
        try:
            if _cron_inactivity_limit is None:
@@ -828,6 +837,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            )

        final_response = result.get("final_response", "") or ""
+        # Strip leaked placeholder text that upstream may inject on empty completions.
+        if final_response.strip() == "(No response generated)":
+            final_response = ""
        # Use a separate variable for log display; keep final_response clean
        # for delivery logic (empty response = no delivery).
        logged_response = final_response if final_response else "(No response generated)"
@@ -967,6 +979,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                        delivery_error = str(de)
                        logger.error("Delivery failed for job %s: %s", job["id"], de)

+                # Treat empty final_response as a soft failure so last_status
+                # is not "ok" — the agent ran but produced nothing useful.
+                # (issue #8585)
+                if success and not final_response:
+                    success = False
+                    error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
+
                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
                executed += 1

@@ -1,13 +1,14 @@
 #!/bin/bash
-# Docker entrypoint: bootstrap config files into the mounted volume, then run hermes.
+# Docker/Podman entrypoint: bootstrap config files into the mounted volume, then run hermes.
 set -e

-HERMES_HOME="/opt/data"
+HERMES_HOME="${HERMES_HOME:-/opt/data}"
 INSTALL_DIR="/opt/hermes"

 # --- Privilege dropping via gosu ---
-# When started as root (the default), optionally remap the hermes user/group
-# to match host-side ownership, fix volume permissions, then re-exec as hermes.
+# When started as root (the default for Docker, or fakeroot in rootless Podman),
+# optionally remap the hermes user/group to match host-side ownership, fix volume
+# permissions, then re-exec as hermes.
 if [ "$(id -u)" = "0" ]; then
    if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
        echo "Changing hermes UID to $HERMES_UID"
@@ -16,13 +17,19 @@ if [ "$(id -u)" = "0" ]; then

    if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
        echo "Changing hermes GID to $HERMES_GID"
-        groupmod -g "$HERMES_GID" hermes
+        # -o allows non-unique GID (e.g. macOS GID 20 "staff" may already exist
+        # as "dialout" in the Debian-based container image)
+        groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
    fi

    actual_hermes_uid=$(id -u hermes)
    if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
        echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
-        chown -R hermes:hermes "$HERMES_HOME"
+        # In rootless Podman the container's "root" is mapped to an unprivileged
+        # host UID — chown will fail.  That's fine: the volume is already owned
+        # by the mapped user on the host side.
+        chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
+            echo "Warning: chown failed (rootless container?) — continuing anyway"
    fi

    echo "Dropping root privileges"
@@ -41,6 +41,14 @@ colors:
  session_label: "#DAA520"        # Session label
  session_border: "#8B8682"       # Session ID dim color

+  # TUI surfaces
+  status_bar_bg: "#1a1a2e"              # Status / usage bar background
+  voice_status_bg: "#1a1a2e"            # Voice-mode badge background
+  completion_menu_bg: "#1a1a2e"         # Completion list background
+  completion_menu_current_bg: "#333355" # Active completion row background
+  completion_menu_meta_bg: "#1a1a2e"    # Completion meta column background
+  completion_menu_meta_current_bg: "#333355"  # Active completion meta background
+
 # ── Spinner ─────────────────────────────────────────────────────────────────
 # Customize the animated spinner shown during API calls and tool execution.
 spinner:
@@ -66,6 +66,7 @@ class Platform(Enum):
    WECOM_CALLBACK = "wecom_callback"
    WEIXIN = "weixin"
    BLUEBUBBLES = "bluebubbles"
+    QQBOT = "qqbot"


@dataclass
@@ -303,6 +304,9 @@ class GatewayConfig:
            # BlueBubbles uses extra dict for local server config
            elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
                connected.append(platform)
+            # QQBot uses extra dict for app credentials
+            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
+                connected.append(platform)
        return connected
    
    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -550,6 +554,12 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
+                if "channel_prompts" in platform_cfg:
+                    channel_prompts = platform_cfg["channel_prompts"]
+                    if isinstance(channel_prompts, dict):
+                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
+                    else:
+                        bridged["channel_prompts"] = channel_prompts
                if not bridged:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
@@ -621,8 +631,25 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["TELEGRAM_FREE_RESPONSE_CHATS"] = str(frc)
+                ignored_threads = telegram_cfg.get("ignored_threads")
+                if ignored_threads is not None and not os.getenv("TELEGRAM_IGNORED_THREADS"):
+                    if isinstance(ignored_threads, list):
+                        ignored_threads = ",".join(str(v) for v in ignored_threads)
+                    os.environ["TELEGRAM_IGNORED_THREADS"] = str(ignored_threads)
                if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
+                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
+                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
+                if "disable_link_previews" in telegram_cfg:
+                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
+                    if not isinstance(plat_data, dict):
+                        plat_data = {}
+                        platforms_data[Platform.TELEGRAM.value] = plat_data
+                    extra = plat_data.setdefault("extra", {})
+                    if not isinstance(extra, dict):
+                        extra = {}
+                        plat_data["extra"] = extra
+                    extra["disable_link_previews"] = telegram_cfg["disable_link_previews"]

            whatsapp_cfg = yaml_cfg.get("whatsapp", {})
            if isinstance(whatsapp_cfg, dict):
@@ -1109,6 +1136,32 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            name=os.getenv("BLUEBUBBLES_HOME_CHANNEL_NAME", "Home"),
        )

+    # QQ (Official Bot API v2)
+    qq_app_id = os.getenv("QQ_APP_ID")
+    qq_client_secret = os.getenv("QQ_CLIENT_SECRET")
+    if qq_app_id or qq_client_secret:
+        if Platform.QQBOT not in config.platforms:
+            config.platforms[Platform.QQBOT] = PlatformConfig()
+        config.platforms[Platform.QQBOT].enabled = True
+        extra = config.platforms[Platform.QQBOT].extra
+        if qq_app_id:
+            extra["app_id"] = qq_app_id
+        if qq_client_secret:
+            extra["client_secret"] = qq_client_secret
+        qq_allowed_users = os.getenv("QQ_ALLOWED_USERS", "").strip()
+        if qq_allowed_users:
+            extra["allow_from"] = qq_allowed_users
+        qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip()
+        if qq_group_allowed:
+            extra["group_allow_from"] = qq_group_allowed
+        qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
+        if qq_home:
+            config.platforms[Platform.QQBOT].home_channel = HomeChannel(
+                platform=Platform.QQBOT,
+                chat_id=qq_home,
+                name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"),
+            )
+
    # Session settings
    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
    if idle_minutes:
@@ -9,6 +9,10 @@ Resolution order (first non-None wins):
    3. ``_PLATFORM_DEFAULTS[<platform>][<key>]``  — built-in sensible default
    4. ``_GLOBAL_DEFAULTS[<key>]``              — built-in global default

+Exception: ``display.streaming`` is CLI-only.  Gateway streaming follows the
+top-level ``streaming`` config unless ``display.platforms.<platform>.streaming``
+sets an explicit per-platform override.
+
 Backward compatibility: ``display.tool_progress_overrides`` is still read as a
 fallback for ``tool_progress`` when no ``display.platforms`` entry exists.  A
 config migration (version bump) automatically moves the old format into the new
@@ -143,10 +147,13 @@ def resolve_display_setting(
            if val is not None:
                return _normalise(setting, val)

-    # 2. Global user setting (display.<key>)
-    val = display_cfg.get(setting)
-    if val is not None:
-        return _normalise(setting, val)
+    # 2. Global user setting (display.<key>).  Skip display.streaming because
+    # that key controls only CLI terminal streaming; gateway token streaming is
+    # governed by the top-level streaming config plus per-platform overrides.
+    if setting != "streaming":
+        val = display_cfg.get(setting)
+        if val is not None:
+            return _normalise(setting, val)

    # 3. Built-in platform default
    plat_defaults = _PLATFORM_DEFAULTS.get(platform_key)
@@ -9,9 +9,11 @@ Each adapter handles:
 """

 from .base import BasePlatformAdapter, MessageEvent, SendResult
+from .qqbot import QQAdapter

 __all__ = [
    "BasePlatformAdapter",
    "MessageEvent",
    "SendResult",
+    "QQAdapter",
 ]
@@ -10,6 +10,7 @@ Exposes an HTTP server with endpoints:
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
 - GET  /health                     — health check
+- GET  /health/detailed            — rich status for cross-container dashboard probing

 Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat,
 AnythingLLM, NextChat, ChatBox, etc.) can connect to hermes-agent
@@ -514,6 +515,8 @@ class APIServerAdapter(BasePlatformAdapter):
        session_id: Optional[str] = None,
        stream_delta_callback=None,
        tool_progress_callback=None,
+        tool_start_callback=None,
+        tool_complete_callback=None,
    ) -> Any:
        """
        Create an AIAgent instance using the gateway's runtime config.
@@ -552,6 +555,8 @@ class APIServerAdapter(BasePlatformAdapter):
            platform="api_server",
            stream_delta_callback=stream_delta_callback,
            tool_progress_callback=tool_progress_callback,
+            tool_start_callback=tool_start_callback,
+            tool_complete_callback=tool_complete_callback,
            session_db=self._ensure_session_db(),
            fallback_model=fallback_model,
        )
@@ -565,6 +570,27 @@ class APIServerAdapter(BasePlatformAdapter):
        """GET /health — simple health check."""
        return web.json_response({"status": "ok", "platform": "hermes-agent"})

+    async def _handle_health_detailed(self, request: "web.Request") -> "web.Response":
+        """GET /health/detailed — rich status for cross-container dashboard probing.
+
+        Returns gateway state, connected platforms, PID, and uptime so the
+        dashboard can display full status without needing a shared PID file or
+        /proc access.  No authentication required.
+        """
+        from gateway.status import read_runtime_status
+
+        runtime = read_runtime_status() or {}
+        return web.json_response({
+            "status": "ok",
+            "platform": "hermes-agent",
+            "gateway_state": runtime.get("gateway_state"),
+            "platforms": runtime.get("platforms", {}),
+            "active_agents": runtime.get("active_agents", 0),
+            "exit_reason": runtime.get("exit_reason"),
+            "updated_at": runtime.get("updated_at"),
+            "pid": os.getpid(),
+        })
+
    async def _handle_models(self, request: "web.Request") -> "web.Response":
        """GET /v1/models — return hermes-agent as an available model."""
        auth_err = self._check_auth(request)
@@ -876,7 +902,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return time.monotonic()

            # Stream content chunks as they arrive from the agent
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
            while True:
                try:
                    delta = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
@@ -943,6 +969,427 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

+    async def _write_sse_responses(
+        self,
+        request: "web.Request",
+        response_id: str,
+        model: str,
+        created_at: int,
+        stream_q,
+        agent_task,
+        agent_ref,
+        conversation_history: List[Dict[str, str]],
+        user_message: str,
+        instructions: Optional[str],
+        conversation: Optional[str],
+        store: bool,
+        session_id: str,
+    ) -> "web.StreamResponse":
+        """Write an SSE stream for POST /v1/responses (OpenAI Responses API).
+
+        Emits spec-compliant event types as the agent runs:
+
+        - ``response.created`` — initial envelope (status=in_progress)
+        - ``response.output_text.delta`` / ``response.output_text.done`` —
+          streamed assistant text
+        - ``response.output_item.added`` / ``response.output_item.done``
+          with ``item.type == "function_call"`` — when the agent invokes a
+          tool (both events fire; the ``done`` event carries the finalized
+          ``arguments`` string)
+        - ``response.output_item.added`` with
+          ``item.type == "function_call_output"`` — tool result with
+          ``{call_id, output, status}``
+        - ``response.completed`` — terminal event carrying the full
+          response object with all output items + usage (same payload
+          shape as the non-streaming path for parity)
+        - ``response.failed`` — terminal event on agent error
+
+        If the client disconnects mid-stream, ``agent.interrupt()`` is
+        called so the agent stops issuing upstream LLM calls, then the
+        asyncio task is cancelled.  When ``store=True`` the full response
+        is persisted to the ResponseStore in a ``finally`` block so GET
+        /v1/responses/{id} and ``previous_response_id`` chaining work the
+        same as the batch path.
+        """
+        import queue as _q
+
+        sse_headers = {
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+        }
+        origin = request.headers.get("Origin", "")
+        cors = self._cors_headers_for_origin(origin) if origin else None
+        if cors:
+            sse_headers.update(cors)
+        if session_id:
+            sse_headers["X-Hermes-Session-Id"] = session_id
+        response = web.StreamResponse(status=200, headers=sse_headers)
+        await response.prepare(request)
+
+        # State accumulated during the stream
+        final_text_parts: List[str] = []
+        # Track open function_call items by name so we can emit a matching
+        # ``done`` event when the tool completes.  Order preserved.
+        pending_tool_calls: List[Dict[str, Any]] = []
+        # Output items we've emitted so far (used to build the terminal
+        # response.completed payload).  Kept in the order they appeared.
+        emitted_items: List[Dict[str, Any]] = []
+        # Monotonic counter for output_index (spec requires it).
+        output_index = 0
+        # Monotonic counter for call_id generation if the agent doesn't
+        # provide one (it doesn't, from tool_progress_callback).
+        call_counter = 0
+        # Canonical Responses SSE events include a monotonically increasing
+        # sequence_number. Add it server-side for every emitted event so
+        # clients that validate the OpenAI event schema can parse our stream.
+        sequence_number = 0
+        # Track the assistant message item id + content index for text
+        # delta events — the spec ties deltas to a specific item.
+        message_item_id = f"msg_{uuid.uuid4().hex[:24]}"
+        message_output_index: Optional[int] = None
+        message_opened = False
+
+        async def _write_event(event_type: str, data: Dict[str, Any]) -> None:
+            nonlocal sequence_number
+            if "sequence_number" not in data:
+                data["sequence_number"] = sequence_number
+            sequence_number += 1
+            payload = f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
+            await response.write(payload.encode())
+
+        def _envelope(status: str) -> Dict[str, Any]:
+            env: Dict[str, Any] = {
+                "id": response_id,
+                "object": "response",
+                "status": status,
+                "created_at": created_at,
+                "model": model,
+            }
+            return env
+
+        final_response_text = ""
+        agent_error: Optional[str] = None
+        usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+
+        try:
+            # response.created — initial envelope, status=in_progress
+            created_env = _envelope("in_progress")
+            created_env["output"] = []
+            await _write_event("response.created", {
+                "type": "response.created",
+                "response": created_env,
+            })
+            last_activity = time.monotonic()
+
+            async def _open_message_item() -> None:
+                """Emit response.output_item.added for the assistant message
+                the first time any text delta arrives."""
+                nonlocal message_opened, message_output_index, output_index
+                if message_opened:
+                    return
+                message_opened = True
+                message_output_index = output_index
+                output_index += 1
+                item = {
+                    "id": message_item_id,
+                    "type": "message",
+                    "status": "in_progress",
+                    "role": "assistant",
+                    "content": [],
+                }
+                await _write_event("response.output_item.added", {
+                    "type": "response.output_item.added",
+                    "output_index": message_output_index,
+                    "item": item,
+                })
+
+            async def _emit_text_delta(delta_text: str) -> None:
+                await _open_message_item()
+                final_text_parts.append(delta_text)
+                await _write_event("response.output_text.delta", {
+                    "type": "response.output_text.delta",
+                    "item_id": message_item_id,
+                    "output_index": message_output_index,
+                    "content_index": 0,
+                    "delta": delta_text,
+                    "logprobs": [],
+                })
+
+            async def _emit_tool_started(payload: Dict[str, Any]) -> str:
+                """Emit response.output_item.added for a function_call.
+
+                Returns the call_id so the matching completion event can
+                reference it.  Prefer the real ``tool_call_id`` from the
+                agent when available; fall back to a generated call id for
+                safety in tests or older code paths.
+                """
+                nonlocal output_index, call_counter
+                call_counter += 1
+                call_id = payload.get("tool_call_id") or f"call_{response_id[5:]}_{call_counter}"
+                args = payload.get("arguments", {})
+                if isinstance(args, dict):
+                    arguments_str = json.dumps(args)
+                else:
+                    arguments_str = str(args)
+                item = {
+                    "id": f"fc_{uuid.uuid4().hex[:24]}",
+                    "type": "function_call",
+                    "status": "in_progress",
+                    "name": payload.get("name", ""),
+                    "call_id": call_id,
+                    "arguments": arguments_str,
+                }
+                idx = output_index
+                output_index += 1
+                pending_tool_calls.append({
+                    "call_id": call_id,
+                    "name": payload.get("name", ""),
+                    "arguments": arguments_str,
+                    "item_id": item["id"],
+                    "output_index": idx,
+                })
+                emitted_items.append({
+                    "type": "function_call",
+                    "name": payload.get("name", ""),
+                    "arguments": arguments_str,
+                    "call_id": call_id,
+                })
+                await _write_event("response.output_item.added", {
+                    "type": "response.output_item.added",
+                    "output_index": idx,
+                    "item": item,
+                })
+                return call_id
+
+            async def _emit_tool_completed(payload: Dict[str, Any]) -> None:
+                """Emit response.output_item.done (function_call) followed
+                by response.output_item.added (function_call_output)."""
+                nonlocal output_index
+                call_id = payload.get("tool_call_id")
+                result = payload.get("result", "")
+                pending = None
+                if call_id:
+                    for i, p in enumerate(pending_tool_calls):
+                        if p["call_id"] == call_id:
+                            pending = pending_tool_calls.pop(i)
+                            break
+                if pending is None:
+                    # Completion without a matching start — skip to avoid
+                    # emitting orphaned done events.
+                    return
+
+                # function_call done
+                done_item = {
+                    "id": pending["item_id"],
+                    "type": "function_call",
+                    "status": "completed",
+                    "name": pending["name"],
+                    "call_id": pending["call_id"],
+                    "arguments": pending["arguments"],
+                }
+                await _write_event("response.output_item.done", {
+                    "type": "response.output_item.done",
+                    "output_index": pending["output_index"],
+                    "item": done_item,
+                })
+
+                # function_call_output added (result)
+                result_str = result if isinstance(result, str) else json.dumps(result)
+                output_parts = [{"type": "input_text", "text": result_str}]
+                output_item = {
+                    "id": f"fco_{uuid.uuid4().hex[:24]}",
+                    "type": "function_call_output",
+                    "call_id": pending["call_id"],
+                    "output": output_parts,
+                    "status": "completed",
+                }
+                idx = output_index
+                output_index += 1
+                emitted_items.append({
+                    "type": "function_call_output",
+                    "call_id": pending["call_id"],
+                    "output": output_parts,
+                })
+                await _write_event("response.output_item.added", {
+                    "type": "response.output_item.added",
+                    "output_index": idx,
+                    "item": output_item,
+                })
+                await _write_event("response.output_item.done", {
+                    "type": "response.output_item.done",
+                    "output_index": idx,
+                    "item": output_item,
+                })
+
+            # Main drain loop — thread-safe queue fed by agent callbacks.
+            async def _dispatch(it) -> None:
+                """Route a queue item to the correct SSE emitter.
+
+                Plain strings are text deltas.  Tagged tuples with
+                ``__tool_started__`` / ``__tool_completed__`` prefixes
+                are tool lifecycle events.
+                """
+                if isinstance(it, tuple) and len(it) == 2 and isinstance(it[0], str):
+                    tag, payload = it
+                    if tag == "__tool_started__":
+                        await _emit_tool_started(payload)
+                    elif tag == "__tool_completed__":
+                        await _emit_tool_completed(payload)
+                    # Unknown tags are silently ignored (forward-compat).
+                elif isinstance(it, str):
+                    await _emit_text_delta(it)
+                # Other types (non-string, non-tuple) are silently dropped.
+
+            loop = asyncio.get_running_loop()
+            while True:
+                try:
+                    item = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
+                except _q.Empty:
+                    if agent_task.done():
+                        # Drain remaining
+                        while True:
+                            try:
+                                item = stream_q.get_nowait()
+                                if item is None:
+                                    break
+                                await _dispatch(item)
+                                last_activity = time.monotonic()
+                            except _q.Empty:
+                                break
+                        break
+                    if time.monotonic() - last_activity >= CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS:
+                        await response.write(b": keepalive\n\n")
+                        last_activity = time.monotonic()
+                    continue
+
+                if item is None:  # EOS sentinel
+                    break
+
+                await _dispatch(item)
+                last_activity = time.monotonic()
+
+            # Pick up agent result + usage from the completed task
+            try:
+                result, agent_usage = await agent_task
+                usage = agent_usage or usage
+                # If the agent produced a final_response but no text
+                # deltas were streamed (e.g. some providers only emit
+                # the full response at the end), emit a single fallback
+                # delta so Responses clients still receive a live text part.
+                agent_final = result.get("final_response", "") if isinstance(result, dict) else ""
+                if agent_final and not final_text_parts:
+                    await _emit_text_delta(agent_final)
+                if agent_final and not final_response_text:
+                    final_response_text = agent_final
+                if isinstance(result, dict) and result.get("error") and not final_response_text:
+                    agent_error = result["error"]
+            except Exception as e:  # noqa: BLE001
+                logger.error("Error running agent for streaming responses: %s", e, exc_info=True)
+                agent_error = str(e)
+
+            # Close the message item if it was opened
+            final_response_text = "".join(final_text_parts) or final_response_text
+            if message_opened:
+                await _write_event("response.output_text.done", {
+                    "type": "response.output_text.done",
+                    "item_id": message_item_id,
+                    "output_index": message_output_index,
+                    "content_index": 0,
+                    "text": final_response_text,
+                    "logprobs": [],
+                })
+                msg_done_item = {
+                    "id": message_item_id,
+                    "type": "message",
+                    "status": "completed",
+                    "role": "assistant",
+                    "content": [
+                        {"type": "output_text", "text": final_response_text}
+                    ],
+                }
+                await _write_event("response.output_item.done", {
+                    "type": "response.output_item.done",
+                    "output_index": message_output_index,
+                    "item": msg_done_item,
+                })
+
+            # Always append a final message item in the completed
+            # response envelope so clients that only parse the terminal
+            # payload still see the assistant text.  This mirrors the
+            # shape produced by _extract_output_items in the batch path.
+            final_items: List[Dict[str, Any]] = list(emitted_items)
+            final_items.append({
+                "type": "message",
+                "role": "assistant",
+                "content": [
+                    {"type": "output_text", "text": final_response_text or (agent_error or "")}
+                ],
+            })
+
+            if agent_error:
+                failed_env = _envelope("failed")
+                failed_env["output"] = final_items
+                failed_env["error"] = {"message": agent_error, "type": "server_error"}
+                failed_env["usage"] = {
+                    "input_tokens": usage.get("input_tokens", 0),
+                    "output_tokens": usage.get("output_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                }
+                await _write_event("response.failed", {
+                    "type": "response.failed",
+                    "response": failed_env,
+                })
+            else:
+                completed_env = _envelope("completed")
+                completed_env["output"] = final_items
+                completed_env["usage"] = {
+                    "input_tokens": usage.get("input_tokens", 0),
+                    "output_tokens": usage.get("output_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                }
+                await _write_event("response.completed", {
+                    "type": "response.completed",
+                    "response": completed_env,
+                })
+
+                # Persist for future chaining / GET retrieval, mirroring
+                # the batch path behavior.
+                if store:
+                    full_history = list(conversation_history)
+                    full_history.append({"role": "user", "content": user_message})
+                    if isinstance(result, dict) and result.get("messages"):
+                        full_history.extend(result["messages"])
+                    else:
+                        full_history.append({"role": "assistant", "content": final_response_text})
+                    self._response_store.put(response_id, {
+                        "response": completed_env,
+                        "conversation_history": full_history,
+                        "instructions": instructions,
+                        "session_id": session_id,
+                    })
+                    if conversation:
+                        self._response_store.set_conversation(conversation, response_id)
+
+        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
+            # Client disconnected — interrupt the agent so it stops
+            # making upstream LLM calls, then cancel the task.
+            agent = agent_ref[0] if agent_ref else None
+            if agent is not None:
+                try:
+                    agent.interrupt("SSE client disconnected")
+                except Exception:
+                    pass
+            if not agent_task.done():
+                agent_task.cancel()
+                try:
+                    await agent_task
+                except (asyncio.CancelledError, Exception):
+                    pass
+            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
+
+        return response
+
    async def _handle_responses(self, request: "web.Request") -> "web.Response":
        """POST /v1/responses — OpenAI Responses API format."""
        auth_err = self._check_auth(request)
@@ -1013,11 +1460,13 @@ class APIServerAdapter(BasePlatformAdapter):
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

+        stored_session_id = None
        if not conversation_history and previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored is None:
                return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
            conversation_history = list(stored.get("conversation_history", []))
+            stored_session_id = stored.get("session_id")
            # If no instructions provided, carry forward from previous
            if instructions is None:
                instructions = stored.get("instructions")
@@ -1035,8 +1484,83 @@ class APIServerAdapter(BasePlatformAdapter):
        if body.get("truncation") == "auto" and len(conversation_history) > 100:
            conversation_history = conversation_history[-100:]

-        # Run the agent (with Idempotency-Key support)
-        session_id = str(uuid.uuid4())
+        # Reuse session from previous_response_id chain so the dashboard
+        # groups the entire conversation under one session entry.
+        session_id = stored_session_id or str(uuid.uuid4())
+
+        stream = bool(body.get("stream", False))
+        if stream:
+            # Streaming branch — emit OpenAI Responses SSE events as the
+            # agent runs so frontends can render text deltas and tool
+            # calls in real time.  See _write_sse_responses for details.
+            import queue as _q
+            _stream_q: _q.Queue = _q.Queue()
+
+            def _on_delta(delta):
+                # None from the agent is a CLI box-close signal, not EOS.
+                # Forwarding would kill the SSE stream prematurely; the
+                # SSE writer detects completion via agent_task.done().
+                if delta is not None:
+                    _stream_q.put(delta)
+
+            def _on_tool_progress(event_type, name, preview, args, **kwargs):
+                """Queue non-start tool progress events if needed in future.
+
+                The structured Responses stream uses ``tool_start_callback``
+                and ``tool_complete_callback`` for exact call-id correlation,
+                so progress events are currently ignored here.
+                """
+                return
+
+            def _on_tool_start(tool_call_id, function_name, function_args):
+                """Queue a started tool for live function_call streaming."""
+                _stream_q.put(("__tool_started__", {
+                    "tool_call_id": tool_call_id,
+                    "name": function_name,
+                    "arguments": function_args or {},
+                }))
+
+            def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
+                """Queue a completed tool result for live function_call_output streaming."""
+                _stream_q.put(("__tool_completed__", {
+                    "tool_call_id": tool_call_id,
+                    "name": function_name,
+                    "arguments": function_args or {},
+                    "result": function_result,
+                }))
+
+            agent_ref = [None]
+            agent_task = asyncio.ensure_future(self._run_agent(
+                user_message=user_message,
+                conversation_history=conversation_history,
+                ephemeral_system_prompt=instructions,
+                session_id=session_id,
+                stream_delta_callback=_on_delta,
+                tool_progress_callback=_on_tool_progress,
+                tool_start_callback=_on_tool_start,
+                tool_complete_callback=_on_tool_complete,
+                agent_ref=agent_ref,
+            ))
+
+            response_id = f"resp_{uuid.uuid4().hex[:28]}"
+            model_name = body.get("model", self._model_name)
+            created_at = int(time.time())
+
+            return await self._write_sse_responses(
+                request=request,
+                response_id=response_id,
+                model=model_name,
+                created_at=created_at,
+                stream_q=_stream_q,
+                agent_task=agent_task,
+                agent_ref=agent_ref,
+                conversation_history=conversation_history,
+                user_message=user_message,
+                instructions=instructions,
+                conversation=conversation,
+                store=store,
+                session_id=session_id,
+            )

        async def _compute_response():
            return await self._run_agent(
@@ -1111,6 +1635,7 @@ class APIServerAdapter(BasePlatformAdapter):
                "response": response_data,
                "conversation_history": full_history,
                "instructions": instructions,
+                "session_id": session_id,
            })
            # Update conversation mapping so the next request with the same
            # conversation name automatically chains to this response
@@ -1464,6 +1989,8 @@ class APIServerAdapter(BasePlatformAdapter):
        session_id: Optional[str] = None,
        stream_delta_callback=None,
        tool_progress_callback=None,
+        tool_start_callback=None,
+        tool_complete_callback=None,
        agent_ref: Optional[list] = None,
    ) -> tuple:
        """
@@ -1477,7 +2004,7 @@ class APIServerAdapter(BasePlatformAdapter):
        callers (e.g. the SSE writer) to call ``agent.interrupt()`` from
        another thread to stop in-progress LLM calls.
        """
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()

        def _run():
            agent = self._create_agent(
@@ -1485,6 +2012,8 @@ class APIServerAdapter(BasePlatformAdapter):
                session_id=session_id,
                stream_delta_callback=stream_delta_callback,
                tool_progress_callback=tool_progress_callback,
+                tool_start_callback=tool_start_callback,
+                tool_complete_callback=tool_complete_callback,
            )
            if agent_ref is not None:
                agent_ref[0] = agent
@@ -1621,10 +2150,12 @@ class APIServerAdapter(BasePlatformAdapter):
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

+        stored_session_id = None
        if not conversation_history and previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored:
                conversation_history = list(stored.get("conversation_history", []))
+                stored_session_id = stored.get("session_id")
                if instructions is None:
                    instructions = stored.get("instructions")

@@ -1643,7 +2174,7 @@ class APIServerAdapter(BasePlatformAdapter):
                        )
                    conversation_history.append({"role": msg["role"], "content": str(content)})

-        session_id = body.get("session_id") or run_id
+        session_id = body.get("session_id") or stored_session_id or run_id
        ephemeral_system_prompt = instructions

        async def _run_and_close():
@@ -1783,6 +2314,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app = web.Application(middlewares=mws)
            self._app["api_server_adapter"] = self
            self._app.router.add_get("/health", self._handle_health)
+            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
@@ -682,6 +682,10 @@ class MessageEvent:
    # Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
    # Discord channel_skill_bindings).  A single name or ordered list.
    auto_skill: Optional[str | list[str]] = None
+
+    # Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
+    # Applied at API call time and never persisted to transcript history.
+    channel_prompt: Optional[str] = None
    
    # Internal flag — set for synthetic events (e.g. background process
    # completion notifications) that must bypass user authorization checks.
@@ -730,25 +734,56 @@ def merge_pending_message_event(
    pending_messages: Dict[str, MessageEvent],
    session_key: str,
    event: MessageEvent,
+    *,
+    merge_text: bool = False,
 ) -> None:
    """Store or merge a pending event for a session.

    Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
    events. Merge those into the existing queued event so the next turn sees
-    the whole burst, while non-photo follow-ups still replace the pending
-    event normally.
+    the whole burst.
+
+    When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
+    instead of replacing the pending turn. This is used for Telegram bursty
+    follow-ups so a multi-part user thought is not silently truncated to only
+    the last queued fragment.
    """
    existing = pending_messages.get(session_key)
-    if (
-        existing
-        and getattr(existing, "message_type", None) == MessageType.PHOTO
-        and event.message_type == MessageType.PHOTO
-    ):
-        existing.media_urls.extend(event.media_urls)
-        existing.media_types.extend(event.media_types)
-        if event.text:
-            existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
-        return
+    if existing:
+        existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
+        incoming_is_photo = event.message_type == MessageType.PHOTO
+        existing_has_media = bool(existing.media_urls)
+        incoming_has_media = bool(event.media_urls)
+
+        if existing_is_photo and incoming_is_photo:
+            existing.media_urls.extend(event.media_urls)
+            existing.media_types.extend(event.media_types)
+            if event.text:
+                existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
+            return
+
+        if existing_has_media or incoming_has_media:
+            if incoming_has_media:
+                existing.media_urls.extend(event.media_urls)
+                existing.media_types.extend(event.media_types)
+            if event.text:
+                if existing.text:
+                    existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
+                else:
+                    existing.text = event.text
+            if existing_is_photo or incoming_is_photo:
+                existing.message_type = MessageType.PHOTO
+            return
+
+        if (
+            merge_text
+            and getattr(existing, "message_type", None) == MessageType.TEXT
+            and event.message_type == MessageType.TEXT
+        ):
+            if event.text:
+                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
+            return
+
    pending_messages[session_key] = event


@@ -776,6 +811,36 @@ _RETRYABLE_ERROR_PATTERNS = (
 MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]


+def resolve_channel_prompt(
+    config_extra: dict,
+    channel_id: str,
+    parent_id: str | None = None,
+) -> str | None:
+    """Resolve a per-channel ephemeral prompt from platform config.
+
+    Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
+    Prefers an exact match on *channel_id*; falls back to *parent_id*
+    (useful for forum threads / child channels inheriting a parent prompt).
+
+    Returns the prompt string, or None if no match is found.  Blank/whitespace-
+    only prompts are treated as absent.
+    """
+    prompts = config_extra.get("channel_prompts") or {}
+    if not isinstance(prompts, dict):
+        return None
+
+    for key in (channel_id, parent_id):
+        if not key:
+            continue
+        prompt = prompts.get(key)
+        if prompt is None:
+            continue
+        prompt = str(prompt).strip()
+        if prompt:
+            return prompt
+    return None
+
+
 class BasePlatformAdapter(ABC):
    """
    Base class for platform adapters.
@@ -805,6 +870,11 @@ class BasePlatformAdapter(ABC):
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
        self._background_tasks: set[asyncio.Task] = set()
+        # One-shot callbacks to fire after the main response is delivered.
+        # Keyed by session_key.  GatewayRunner uses this to defer
+        # background-review notifications ("💾 Skill created") until the
+        # primary reply has been sent.
+        self._post_delivery_callbacks: Dict[str, Callable] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
        # Chats where auto-TTS on voice input is disabled (set by /voice off)
@@ -1221,7 +1291,7 @@ class BasePlatformAdapter(ABC):
                path = path[1:-1].strip()
            path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
            if path:
-                media.append((path, has_voice_tag))
+                media.append((os.path.expanduser(path), has_voice_tag))

        # Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
        if media:
@@ -1509,7 +1579,7 @@ class BasePlatformAdapter(ABC):
            # session lifecycle and its cleanup races with the running task
            # (see PR #4926).
            cmd = event.get_command()
-            if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart"):
+            if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart", "queue", "q"):
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -1624,6 +1694,21 @@ class BasePlatformAdapter(ABC):
            # streaming already delivered the text (already_sent=True) or
            # when the message was queued behind an active agent.  Log at
            # DEBUG to avoid noisy warnings for expected behavior.
+            #
+            # Suppress stale response when the session was interrupted by a
+            # new message that hasn't been consumed yet.  The pending message
+            # is processed by the pending-message handler below (#8221/#2483).
+            if (
+                response
+                and interrupt_event.is_set()
+                and session_key in self._pending_messages
+            ):
+                logger.info(
+                    "[%s] Suppressing stale response for interrupted session %s",
+                    self.name,
+                    session_key,
+                )
+                response = None
            if not response:
                logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
            if response:
@@ -1845,6 +1930,14 @@ class BasePlatformAdapter(ABC):
            except Exception:
                pass  # Last resort — don't let error reporting crash the handler
        finally:
+            # Fire any one-shot post-delivery callback registered for this
+            # session (e.g. deferred background-review notifications).
+            _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
+            if callable(_post_cb):
+                try:
+                    _post_cb()
+                except Exception:
+                    pass
            # Stop typing indicator
            typing_task.cancel()
            try:
@@ -224,6 +224,21 @@ class BlueBubblesAdapter(BasePlatformAdapter):
            host = "localhost"
        return f"http://{host}:{self.webhook_port}{self.webhook_path}"

+    @property
+    def _webhook_register_url(self) -> str:
+        """Webhook URL registered with BlueBubbles, including the password as
+        a query param so inbound webhook POSTs carry credentials.
+
+        BlueBubbles posts events to the exact URL registered via
+        ``/api/v1/webhook``. Its webhook registration API does not support
+        custom headers, so embedding the password in the URL is the only
+        way to authenticate inbound webhooks without disabling auth.
+        """
+        base = self._webhook_url
+        if self.password:
+            return f"{base}?password={quote(self.password, safe='')}"
+        return base
+
    async def _find_registered_webhooks(self, url: str) -> list:
        """Return list of BB webhook entries matching *url*."""
        try:
@@ -245,7 +260,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        if not self.client:
            return False

-        webhook_url = self._webhook_url
+        webhook_url = self._webhook_register_url

        # Crash resilience — reuse an existing registration if present
        existing = await self._find_registered_webhooks(webhook_url)
@@ -257,7 +272,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):

        payload = {
            "url": webhook_url,
-            "events": ["new-message", "updated-message", "message"],
+            "events": ["new-message", "updated-message"],
        }

        try:
@@ -292,7 +307,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        if not self.client:
            return False

-        webhook_url = self._webhook_url
+        webhook_url = self._webhook_register_url
        removed = False

        try:
@@ -835,6 +850,12 @@ class BlueBubblesAdapter(BasePlatformAdapter):
            payload.get("chat_guid"),
            payload.get("guid"),
        )
+        # Fallback: BlueBubbles v1.9+ webhook payloads omit top-level chatGuid;
+        # the chat GUID is nested under data.chats[0].guid instead.
+        if not chat_guid:
+            _chats = record.get("chats") or []
+            if _chats and isinstance(_chats[0], dict):
+                chat_guid = _chats[0].get("guid") or _chats[0].get("chatGuid")
        chat_identifier = self._value(
            record.get("chatIdentifier"),
            record.get("identifier"),
@@ -54,7 +54,7 @@ logger = logging.getLogger(__name__)
 MAX_MESSAGE_LENGTH = 20000
 RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
 _SESSION_WEBHOOKS_MAX = 500
-_DINGTALK_WEBHOOK_RE = re.compile(r'^https://api\.dingtalk\.com/')
+_DINGTALK_WEBHOOK_RE = re.compile(r'^https://(?:api|oapi)\.dingtalk\.com/')


 def check_dingtalk_requirements() -> bool:
@@ -128,12 +128,12 @@ class DingTalkAdapter(BasePlatformAdapter):
            return False

    async def _run_stream(self) -> None:
-        """Run the blocking stream client with auto-reconnection."""
+        """Run the stream client with auto-reconnection."""
        backoff_idx = 0
        while self._running:
            try:
                logger.debug("[%s] Starting stream client...", self.name)
-                await asyncio.to_thread(self._stream_client.start)
+                await self._stream_client.start()
            except asyncio.CancelledError:
                return
            except Exception as e:
@@ -238,18 +238,35 @@ class DingTalkAdapter(BasePlatformAdapter):

    @staticmethod
    def _extract_text(message: "ChatbotMessage") -> str:
-        """Extract plain text from a DingTalk chatbot message."""
-        text = getattr(message, "text", None) or ""
-        if isinstance(text, dict):
-            content = text.get("content", "").strip()
-        else:
-            content = str(text).strip()
+        """Extract plain text from a DingTalk chatbot message.
+
+        Handles both legacy and current dingtalk-stream SDK payload shapes:
+          * legacy: ``message.text`` was a dict ``{"content": "..."}``
+          * >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose
+            ``__str__`` returns ``"TextContent(content=...)"`` — never fall
+            back to ``str(text)`` without extracting ``.content`` first.
+          * rich text moved from ``message.rich_text`` (list) to
+            ``message.rich_text_content.rich_text_list`` (list of dicts).
+        """
+        text = getattr(message, "text", None)
+        content = ""
+        if text is not None:
+            if isinstance(text, dict):
+                content = (text.get("content") or "").strip()
+            elif hasattr(text, "content"):
+                content = str(text.content or "").strip()
+            else:
+                content = str(text).strip()

-        # Fall back to rich text if present
        if not content:
-            rich_text = getattr(message, "rich_text", None)
-            if rich_text and isinstance(rich_text, list):
-                parts = [item["text"] for item in rich_text
+            rich_list = None
+            rtc = getattr(message, "rich_text_content", None)
+            if rtc is not None and hasattr(rtc, "rich_text_list"):
+                rich_list = rtc.rich_text_list
+            if rich_list is None:
+                rich_list = getattr(message, "rich_text", None)
+            if rich_list and isinstance(rich_list, list):
+                parts = [item["text"] for item in rich_list
                         if isinstance(item, dict) and item.get("text")]
                content = " ".join(parts).strip()
        return content
@@ -314,19 +331,16 @@ class _IncomingHandler(ChatbotHandler if DINGTALK_STREAM_AVAILABLE else object):
        self._adapter = adapter
        self._loop = loop

-    def process(self, message: "ChatbotMessage"):
-        """Called by dingtalk-stream in its thread when a message arrives.
+    async def process(self, callback_message):
+        """Called by dingtalk-stream when a message arrives.

-        Schedules the async handler on the main event loop.
+        dingtalk-stream >= 0.24 passes a CallbackMessage whose `.data` contains
+        the chatbot payload. Convert it to ChatbotMessage and await the adapter
+        handler directly on the main event loop.
        """
-        loop = self._loop
-        if loop is None or loop.is_closed():
-            logger.error("[DingTalk] Event loop unavailable, cannot dispatch message")
-            return dingtalk_stream.AckMessage.STATUS_OK, "OK"
-
-        future = asyncio.run_coroutine_threadsafe(self._adapter._on_message(message), loop)
        try:
-            future.result(timeout=60)
+            chatbot_msg = ChatbotMessage.from_dict(callback_message.data)
+            await self._adapter._on_message(chatbot_msg)
        except Exception:
            logger.exception("[DingTalk] Error processing incoming message")

@@ -235,6 +235,7 @@ class VoiceReceiver:
        # Calculate dynamic RTP header size (RFC 9335 / rtpsize mode)
        cc = first_byte & 0x0F  # CSRC count
        has_extension = bool(first_byte & 0x10)  # extension bit
+        has_padding = bool(first_byte & 0x20)  # padding bit (RFC 3550 §5.1)
        header_size = 12 + (4 * cc) + (4 if has_extension else 0)

        if len(data) < header_size + 4:  # need at least header + nonce
@@ -278,6 +279,31 @@ class VoiceReceiver:
        if ext_data_len and len(decrypted) > ext_data_len:
            decrypted = decrypted[ext_data_len:]

+        # --- Strip RTP padding (RFC 3550 §5.1) ---
+        # When the P bit is set, the last payload byte holds the count of
+        # trailing padding bytes (including itself) that must be removed
+        # before further processing. Skipping this passes padding-contaminated
+        # bytes into DAVE/Opus and corrupts inbound audio.
+        if has_padding:
+            if not decrypted:
+                if self._packet_debug_count <= 10:
+                    logger.warning(
+                        "RTP padding bit set but no payload (ssrc=%d)", ssrc,
+                    )
+                return
+            pad_len = decrypted[-1]
+            if pad_len == 0 or pad_len > len(decrypted):
+                if self._packet_debug_count <= 10:
+                    logger.warning(
+                        "Invalid RTP padding length %d for payload size %d (ssrc=%d)",
+                        pad_len, len(decrypted), ssrc,
+                    )
+                return
+            decrypted = decrypted[:-pad_len]
+            if not decrypted:
+                # Padding consumed entire payload — nothing to decode
+                return
+
        # --- DAVE E2EE decrypt ---
        if self._dave_session:
            with self._lock:
@@ -1379,6 +1405,68 @@ class DiscordAdapter(BasePlatformAdapter):
            )
            return await super().send_image(chat_id, image_url, caption, reply_to)

+    async def send_animation(
+        self,
+        chat_id: str,
+        animation_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an animated GIF natively as a Discord file attachment."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        if not is_safe_url(animation_url):
+            logger.warning("[%s] Blocked unsafe animation URL during Discord send_animation", self.name)
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+
+        try:
+            import aiohttp
+
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            if not channel:
+                return SendResult(success=False, error=f"Channel {chat_id} not found")
+
+            # Download the GIF and send as a Discord file attachment
+            # (Discord renders .gif attachments as auto-playing animations inline)
+            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
+            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
+            async with aiohttp.ClientSession(**_sess_kw) as session:
+                async with session.get(animation_url, timeout=aiohttp.ClientTimeout(total=30), **_req_kw) as resp:
+                    if resp.status != 200:
+                        raise Exception(f"Failed to download animation: HTTP {resp.status}")
+
+                    animation_data = await resp.read()
+
+                    import io
+                    file = discord.File(io.BytesIO(animation_data), filename="animation.gif")
+
+                    msg = await channel.send(
+                        content=caption if caption else None,
+                        file=file,
+                    )
+                    return SendResult(success=True, message_id=str(msg.id))
+
+        except ImportError:
+            logger.warning(
+                "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
+                self.name,
+                exc_info=True,
+            )
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send animation attachment, falling back to URL: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
+            return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata)
+
    async def send_video(
        self,
        chat_id: str,
@@ -1696,6 +1784,10 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_update(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/update", "Update initiated~")

+        @tree.command(name="restart", description="Gracefully restart the Hermes gateway")
+        async def slash_restart(interaction: discord.Interaction):
+            await self._run_simple_slash(interaction, "/restart", "Restart requested~")
+
        @tree.command(name="approve", description="Approve a pending dangerous command")
        @discord.app_commands.describe(scope="Optional: 'all', 'session', 'always', 'all session', 'all always'")
        async def slash_approve(interaction: discord.Interaction, scope: str = ""):
@@ -1736,46 +1828,160 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_btw(interaction: discord.Interaction, question: str):
            await self._run_simple_slash(interaction, f"/btw {question}")

-        # Register installed skills as native slash commands (parity with
-        # Telegram, which uses telegram_menu_commands() in commands.py).
-        # Discord allows up to 100 application commands globally.
-        _DISCORD_CMD_LIMIT = 100
+        # ── Auto-register any gateway-available commands not yet on the tree ──
+        # This ensures new commands added to COMMAND_REGISTRY in
+        # hermes_cli/commands.py automatically appear as Discord slash
+        # commands without needing a manual entry here.
        try:
-            from hermes_cli.commands import discord_skill_commands
+            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

-            existing_names = {cmd.name for cmd in tree.get_commands()}
-            remaining_slots = max(0, _DISCORD_CMD_LIMIT - len(existing_names))
+            already_registered = set()
+            try:
+                already_registered = {cmd.name for cmd in tree.get_commands()}
+            except Exception:
+                pass

-            skill_entries, skipped = discord_skill_commands(
-                max_slots=remaining_slots,
+            config_overrides = _resolve_config_gates()
+
+            for cmd_def in COMMAND_REGISTRY:
+                if not _is_gateway_available(cmd_def, config_overrides):
+                    continue
+                # Discord command names: lowercase, hyphens OK, max 32 chars.
+                discord_name = cmd_def.name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                # Skip aliases that overlap with already-registered names
+                # (aliases for explicitly registered commands are handled above).
+                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
+                has_args = bool(cmd_def.args_hint)
+
+                if has_args:
+                    # Command takes optional arguments — create handler with
+                    # an optional ``args`` string parameter.
+                    def _make_args_handler(_name: str, _hint: str):
+                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
+                        async def _handler(interaction: discord.Interaction, args: str = ""):
+                            await self._run_simple_slash(
+                                interaction, f"/{_name} {args}".strip()
+                            )
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
+                else:
+                    # Parameterless command.
+                    def _make_simple_handler(_name: str):
+                        async def _handler(interaction: discord.Interaction):
+                            await self._run_simple_slash(interaction, f"/{_name}")
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_simple_handler(cmd_def.name)
+
+                auto_cmd = discord.app_commands.Command(
+                    name=discord_name,
+                    description=desc,
+                    callback=handler,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+
+            logger.debug(
+                "Discord auto-registered %d commands from COMMAND_REGISTRY",
+                len(already_registered),
+            )
+        except Exception as e:
+            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)
+
+        # Register skills under a single /skill command group with category
+        # subcommand groups.  This uses 1 top-level slot instead of N,
+        # supporting up to 25 categories × 25 skills = 625 skills.
+        self._register_skill_group(tree)
+
+    def _register_skill_group(self, tree) -> None:
+        """Register a ``/skill`` command group with category subcommand groups.
+
+        Skills are organized by their directory category under ``SKILLS_DIR``.
+        Each category becomes a subcommand group; root-level skills become
+        direct subcommands.  Discord supports 25 subcommand groups × 25
+        subcommands each = 625 skills — well beyond the old 100-command cap.
+        """
+        try:
+            from hermes_cli.commands import discord_skill_commands_by_category
+
+            existing_names = set()
+            try:
+                existing_names = {cmd.name for cmd in tree.get_commands()}
+            except Exception:
+                pass
+
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
                reserved_names=existing_names,
            )

-            for discord_name, description, cmd_key in skill_entries:
-                # Closure factory to capture cmd_key per iteration
-                def _make_skill_handler(_key: str):
-                    async def _skill_slash(interaction: discord.Interaction, args: str = ""):
-                        await self._run_simple_slash(interaction, f"{_key} {args}".strip())
-                    return _skill_slash
+            if not categories and not uncategorized:
+                return

-                handler = _make_skill_handler(cmd_key)
-                handler.__name__ = f"skill_{discord_name.replace('-', '_')}"
+            skill_group = discord.app_commands.Group(
+                name="skill",
+                description="Run a Hermes skill",
+            )

+            # ── Helper: build a callback for a skill command key ──
+            def _make_handler(_key: str):
+                @discord.app_commands.describe(args="Optional arguments for the skill")
+                async def _handler(interaction: discord.Interaction, args: str = ""):
+                    await self._run_simple_slash(interaction, f"{_key} {args}".strip())
+                _handler.__name__ = f"skill_{_key.lstrip('/').replace('-', '_')}"
+                return _handler
+
+            # ── Uncategorized (root-level) skills → direct subcommands ──
+            for discord_name, description, cmd_key in uncategorized:
                cmd = discord.app_commands.Command(
                    name=discord_name,
-                    description=description,
-                    callback=handler,
+                    description=description or f"Run the {discord_name} skill",
+                    callback=_make_handler(cmd_key),
                )
-                discord.app_commands.describe(args="Optional arguments for the skill")(cmd)
-                tree.add_command(cmd)
+                skill_group.add_command(cmd)

-            if skipped:
+            # ── Category subcommand groups ──
+            for cat_name in sorted(categories):
+                cat_desc = f"{cat_name.replace('-', ' ').title()} skills"
+                if len(cat_desc) > 100:
+                    cat_desc = cat_desc[:97] + "..."
+                cat_group = discord.app_commands.Group(
+                    name=cat_name,
+                    description=cat_desc,
+                    parent=skill_group,
+                )
+                for discord_name, description, cmd_key in categories[cat_name]:
+                    cmd = discord.app_commands.Command(
+                        name=discord_name,
+                        description=description or f"Run the {discord_name} skill",
+                        callback=_make_handler(cmd_key),
+                    )
+                    cat_group.add_command(cmd)
+
+            tree.add_command(skill_group)
+
+            total = sum(len(v) for v in categories.values()) + len(uncategorized)
+            logger.info(
+                "[%s] Registered /skill group: %d skill(s) across %d categories"
+                " + %d uncategorized",
+                self.name, total, len(categories), len(uncategorized),
+            )
+            if hidden:
                logger.warning(
-                    "[%s] Discord slash command limit reached (%d): %d skill(s) not registered",
-                    self.name, _DISCORD_CMD_LIMIT, skipped,
+                    "[%s] %d skill(s) not registered (Discord subcommand limits)",
+                    self.name, hidden,
                )
        except Exception as exc:
-            logger.warning("[%s] Failed to register skill slash commands: %s", self.name, exc)
+            logger.warning("[%s] Failed to register /skill group: %s", self.name, exc)

    def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
        """Build a MessageEvent from a Discord slash command interaction."""
@@ -1812,11 +2018,14 @@ class DiscordAdapter(BasePlatformAdapter):
        )

        msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
+        channel_id = str(interaction.channel_id)
+        parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
        return MessageEvent(
            text=text,
            message_type=msg_type,
            source=source,
            raw_message=interaction,
+            channel_prompt=self._resolve_channel_prompt(channel_id, parent_id or None),
        )

    # ------------------------------------------------------------------
@@ -1887,14 +2096,17 @@ class DiscordAdapter(BasePlatformAdapter):
            chat_topic=chat_topic,
        )

-        _parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
+        _parent_channel = self._thread_parent_channel(getattr(interaction, "channel", None))
+        _parent_id = str(getattr(_parent_channel, "id", "") or "")
        _skills = self._resolve_channel_skills(thread_id, _parent_id or None)
+        _channel_prompt = self._resolve_channel_prompt(thread_id, _parent_id or None)
        event = MessageEvent(
            text=text,
            message_type=MessageType.TEXT,
            source=source,
            raw_message=interaction,
            auto_skill=_skills,
+            channel_prompt=_channel_prompt,
        )
        await self.handle_message(event)

@@ -1923,6 +2135,11 @@ class DiscordAdapter(BasePlatformAdapter):
                    return list(dict.fromkeys(skills))  # dedup, preserve order
        return None

+    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
+        """Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
+        from gateway.platforms.base import resolve_channel_prompt
+        return resolve_channel_prompt(self.config.extra, channel_id, parent_id)
+
    def _thread_parent_channel(self, channel: Any) -> Any:
        """Return the parent text channel when invoked from a thread."""
        return getattr(channel, "parent", None) or channel
@@ -2474,6 +2691,15 @@ class DiscordAdapter(BasePlatformAdapter):
        _parent_id = str(getattr(_chan, "parent_id", "") or "")
        _chan_id = str(getattr(_chan, "id", ""))
        _skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
+        _channel_prompt = self._resolve_channel_prompt(_chan_id, _parent_id or None)
+
+        reply_to_id = None
+        reply_to_text = None
+        if message.reference:
+            reply_to_id = str(message.reference.message_id)
+            if message.reference.resolved:
+                reply_to_text = getattr(message.reference.resolved, "content", None) or None
+
        event = MessageEvent(
            text=event_text,
            message_type=msg_type,
@@ -2482,9 +2708,11 @@ class DiscordAdapter(BasePlatformAdapter):
            message_id=str(message.id),
            media_urls=media_urls,
            media_types=media_types,
-            reply_to_message_id=str(message.reference.message_id) if message.reference else None,
+            reply_to_message_id=reply_to_id,
+            reply_to_text=reply_to_text,
            timestamp=message.created_at,
            auto_skill=_skills,
+            channel_prompt=_channel_prompt,
        )

        # Track thread participation so the bot won't require @mention for
@@ -72,7 +72,10 @@ try:
        UpdateMessageRequestBody,
    )
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
-    from lark_oapi.event.callback.model.p2_card_action_trigger import P2CardActionTriggerResponse
+    from lark_oapi.event.callback.model.p2_card_action_trigger import (
+        CallBackCard,
+        P2CardActionTriggerResponse,
+    )
    from lark_oapi.event.dispatcher_handler import EventDispatcherHandler
    from lark_oapi.ws import Client as FeishuWSClient

@@ -80,6 +83,7 @@ try:
 except ImportError:
    FEISHU_AVAILABLE = False
    lark = None  # type: ignore[assignment]
+    CallBackCard = None  # type: ignore[assignment]
    P2CardActionTriggerResponse = None  # type: ignore[assignment]
    EventDispatcherHandler = None  # type: ignore[assignment]
    FeishuWSClient = None  # type: ignore[assignment]
@@ -169,6 +173,19 @@ _FEISHU_WEBHOOK_BODY_TIMEOUT_SECONDS = 30          # max seconds to read request
 _FEISHU_WEBHOOK_ANOMALY_THRESHOLD = 25             # consecutive error responses before WARNING log
 _FEISHU_WEBHOOK_ANOMALY_TTL_SECONDS = 6 * 60 * 60  # anomaly tracker TTL (6 hours) — matches openclaw
 _FEISHU_CARD_ACTION_DEDUP_TTL_SECONDS = 15 * 60    # card action token dedup window (15 min)
+
+_APPROVAL_CHOICE_MAP: Dict[str, str] = {
+    "approve_once": "once",
+    "approve_session": "session",
+    "approve_always": "always",
+    "deny": "deny",
+}
+_APPROVAL_LABEL_MAP: Dict[str, str] = {
+    "once": "Approved once",
+    "session": "Approved for session",
+    "always": "Approved permanently",
+    "deny": "Denied",
+}
 _FEISHU_BOT_MSG_TRACK_SIZE = 512                   # LRU size for tracking sent message IDs
 _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003})  # reply target withdrawn/missing → create fallback
 _FEISHU_ACK_EMOJI = "OK"
@@ -1056,6 +1073,13 @@ class FeishuAdapter(BasePlatformAdapter):
        self._webhook_rate_counts: Dict[str, tuple[int, float]] = {}  # rate_key → (count, window_start)
        self._webhook_anomaly_counts: Dict[str, tuple[int, str, float]] = {}  # ip → (count, last_status, first_seen)
        self._card_action_tokens: Dict[str, float] = {}  # token → first_seen_time
+        # Inbound events that arrived before the adapter loop was ready
+        # (e.g. during startup/restart or network-flap reconnect). A single
+        # drainer thread replays them as soon as the loop becomes available.
+        self._pending_inbound_events: List[Any] = []
+        self._pending_inbound_lock = threading.Lock()
+        self._pending_drain_scheduled = False
+        self._pending_inbound_max_depth = 1000  # cap queue; drop oldest beyond
        self._chat_locks: Dict[str, asyncio.Lock] = {}  # chat_id → lock (per-chat serial processing)
        self._sent_message_ids_to_chat: Dict[str, str] = {}  # message_id → chat_id (for reaction routing)
        self._sent_message_id_order: List[str] = []  # LRU order for _sent_message_ids_to_chat
@@ -1202,6 +1226,8 @@ class FeishuAdapter(BasePlatformAdapter):
            .register_p2_card_action_trigger(self._on_card_action_trigger)
            .register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
            .register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
+            .register_p2_im_chat_access_event_bot_p2p_chat_entered_v1(self._on_p2p_chat_entered)
+            .register_p2_im_message_recalled_v1(self._on_message_recalled)
            .build()
        )

@@ -1490,14 +1516,12 @@ class FeishuAdapter(BasePlatformAdapter):
            logger.warning("[Feishu] send_exec_approval failed: %s", exc)
            return SendResult(success=False, error=str(exc))

-    async def _update_approval_card(
-        self, message_id: str, label: str, user_name: str, choice: str,
-    ) -> None:
-        """Replace the approval card with a resolved status card."""
-        if not self._client or not message_id:
-            return
+    @staticmethod
+    def _build_resolved_approval_card(*, choice: str, user_name: str) -> Dict[str, Any]:
+        """Build raw card JSON for a resolved approval action."""
        icon = "❌" if choice == "deny" else "✅"
-        card = {
+        label = _APPROVAL_LABEL_MAP.get(choice, "Resolved")
+        return {
            "config": {"wide_screen_mode": True},
            "header": {
                "title": {"content": f"{icon} {label}", "tag": "plain_text"},
@@ -1510,13 +1534,6 @@ class FeishuAdapter(BasePlatformAdapter):
                },
            ],
        }
-        try:
-            payload = json.dumps(card, ensure_ascii=False)
-            body = self._build_update_message_body(msg_type="interactive", content=payload)
-            request = self._build_update_message_request(message_id=message_id, request_body=body)
-            await asyncio.to_thread(self._client.im.v1.message.update, request)
-        except Exception as exc:
-            logger.warning("[Feishu] Failed to update approval card %s: %s", message_id, exc)

    async def send_voice(
        self,
@@ -1749,10 +1766,22 @@ class FeishuAdapter(BasePlatformAdapter):
    # =========================================================================

    def _on_message_event(self, data: Any) -> None:
-        """Normalize Feishu inbound events into MessageEvent."""
+        """Normalize Feishu inbound events into MessageEvent.
+
+        Called by the lark_oapi SDK's event dispatcher on a background thread.
+        If the adapter loop is not currently accepting callbacks (brief window
+        during startup/restart or network-flap reconnect), the event is queued
+        for replay instead of dropped.
+        """
        loop = self._loop
-        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
-            logger.warning("[Feishu] Dropping inbound message before adapter loop is ready")
+        if not self._loop_accepts_callbacks(loop):
+            start_drainer = self._enqueue_pending_inbound_event(data)
+            if start_drainer:
+                threading.Thread(
+                    target=self._drain_pending_inbound_events,
+                    name="feishu-pending-inbound-drainer",
+                    daemon=True,
+                ).start()
            return
        future = asyncio.run_coroutine_threadsafe(
            self._handle_message_event_data(data),
@@ -1760,6 +1789,124 @@ class FeishuAdapter(BasePlatformAdapter):
        )
        future.add_done_callback(self._log_background_failure)

+    def _enqueue_pending_inbound_event(self, data: Any) -> bool:
+        """Append an event to the pending-inbound queue.
+
+        Returns True if the caller should spawn a drainer thread (no drainer
+        currently scheduled), False if a drainer is already running and will
+        pick up the new event on its next pass.
+        """
+        with self._pending_inbound_lock:
+            if len(self._pending_inbound_events) >= self._pending_inbound_max_depth:
+                # Queue full — drop the oldest to make room. This happens only
+                # if the loop stays unavailable for an extended period AND the
+                # WS keeps firing callbacks. Still better than silent drops.
+                dropped = self._pending_inbound_events.pop(0)
+                try:
+                    event = getattr(dropped, "event", None)
+                    message = getattr(event, "message", None)
+                    message_id = str(getattr(message, "message_id", "") or "unknown")
+                except Exception:
+                    message_id = "unknown"
+                logger.error(
+                    "[Feishu] Pending-inbound queue full (%d); dropped oldest event %s",
+                    self._pending_inbound_max_depth,
+                    message_id,
+                )
+            self._pending_inbound_events.append(data)
+            depth = len(self._pending_inbound_events)
+            should_start = not self._pending_drain_scheduled
+            if should_start:
+                self._pending_drain_scheduled = True
+        logger.warning(
+            "[Feishu] Queued inbound event for replay (loop not ready, queue depth=%d)",
+            depth,
+        )
+        return should_start
+
+    def _drain_pending_inbound_events(self) -> None:
+        """Replay queued inbound events once the adapter loop is ready.
+
+        Runs in a dedicated daemon thread. Polls ``_running`` and
+        ``_loop_accepts_callbacks`` until events can be dispatched or the
+        adapter shuts down. A single drainer handles the entire queue;
+        concurrent ``_on_message_event`` calls just append.
+        """
+        poll_interval = 0.25
+        max_wait_seconds = 120.0  # safety cap: drop queue after 2 minutes
+        waited = 0.0
+        try:
+            while True:
+                if not getattr(self, "_running", True):
+                    # Adapter shutting down — drop queued events rather than
+                    # holding them against a closed loop.
+                    with self._pending_inbound_lock:
+                        dropped = len(self._pending_inbound_events)
+                        self._pending_inbound_events.clear()
+                    if dropped:
+                        logger.warning(
+                            "[Feishu] Dropped %d queued inbound event(s) during shutdown",
+                            dropped,
+                        )
+                    return
+                loop = self._loop
+                if self._loop_accepts_callbacks(loop):
+                    with self._pending_inbound_lock:
+                        batch = self._pending_inbound_events[:]
+                        self._pending_inbound_events.clear()
+                    if not batch:
+                        # Queue emptied between check and grab; done.
+                        with self._pending_inbound_lock:
+                            if not self._pending_inbound_events:
+                                return
+                        continue
+                    dispatched = 0
+                    requeue: List[Any] = []
+                    for event in batch:
+                        try:
+                            fut = asyncio.run_coroutine_threadsafe(
+                                self._handle_message_event_data(event),
+                                loop,
+                            )
+                            fut.add_done_callback(self._log_background_failure)
+                            dispatched += 1
+                        except RuntimeError:
+                            # Loop closed between check and submit — requeue
+                            # and poll again.
+                            requeue.append(event)
+                    if requeue:
+                        with self._pending_inbound_lock:
+                            self._pending_inbound_events[:0] = requeue
+                    if dispatched:
+                        logger.info(
+                            "[Feishu] Replayed %d queued inbound event(s)",
+                            dispatched,
+                        )
+                    if not requeue:
+                        # Successfully drained; check if more arrived while
+                        # we were dispatching and exit if not.
+                        with self._pending_inbound_lock:
+                            if not self._pending_inbound_events:
+                                return
+                    # More events queued or requeue pending — loop again.
+                    continue
+                if waited >= max_wait_seconds:
+                    with self._pending_inbound_lock:
+                        dropped = len(self._pending_inbound_events)
+                        self._pending_inbound_events.clear()
+                    logger.error(
+                        "[Feishu] Adapter loop unavailable for %.0fs; "
+                        "dropped %d queued inbound event(s)",
+                        max_wait_seconds,
+                        dropped,
+                    )
+                    return
+                time.sleep(poll_interval)
+                waited += poll_interval
+        finally:
+            with self._pending_inbound_lock:
+                self._pending_drain_scheduled = False
+
    async def _handle_message_event_data(self, data: Any) -> None:
        """Shared inbound message handling for websocket and webhook transports."""
        event = getattr(data, "event", None)
@@ -1812,6 +1959,12 @@ class FeishuAdapter(BasePlatformAdapter):
        logger.info("[Feishu] Bot removed from chat: %s", chat_id)
        self._chat_info_cache.pop(chat_id, None)

+    def _on_p2p_chat_entered(self, data: Any) -> None:
+        logger.debug("[Feishu] User entered P2P chat with bot")
+
+    def _on_message_recalled(self, data: Any) -> None:
+        logger.debug("[Feishu] Message recalled by user")
+
    def _on_reaction_event(self, event_type: str, data: Any) -> None:
        """Route user reactions on bot messages as synthetic text events."""
        event = getattr(data, "event", None)
@@ -1845,20 +1998,82 @@ class FeishuAdapter(BasePlatformAdapter):
        future.add_done_callback(self._log_background_failure)

    def _on_card_action_trigger(self, data: Any) -> Any:
-        """Schedule Feishu card actions on the adapter loop and acknowledge immediately."""
+        """Handle card-action callback from the Feishu SDK (synchronous).
+
+        For approval actions: parses the event once, returns the resolved card
+        inline (the only reliable way to sync all clients), and schedules a
+        lightweight async method to actually unblock the agent.
+
+        For other card actions: delegates to ``_handle_card_action_event``.
+        """
        loop = self._loop
-        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
+        if not self._loop_accepts_callbacks(loop):
            logger.warning("[Feishu] Dropping card action before adapter loop is ready")
-        else:
-            future = asyncio.run_coroutine_threadsafe(
-                self._handle_card_action_event(data),
-                loop,
-            )
-            future.add_done_callback(self._log_background_failure)
+            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
+
+        event = getattr(data, "event", None)
+        action = getattr(event, "action", None)
+        action_value = getattr(action, "value", {}) or {}
+        hermes_action = action_value.get("hermes_action") if isinstance(action_value, dict) else None
+
+        if hermes_action:
+            return self._handle_approval_card_action(event=event, action_value=action_value, loop=loop)
+
+        self._submit_on_loop(loop, self._handle_card_action_event(data))
        if P2CardActionTriggerResponse is None:
            return None
        return P2CardActionTriggerResponse()

+    @staticmethod
+    def _loop_accepts_callbacks(loop: Any) -> bool:
+        """Return True when the adapter loop can accept thread-safe submissions."""
+        return loop is not None and not bool(getattr(loop, "is_closed", lambda: False)())
+
+    def _submit_on_loop(self, loop: Any, coro: Any) -> None:
+        """Schedule background work on the adapter loop with shared failure logging."""
+        future = asyncio.run_coroutine_threadsafe(coro, loop)
+        future.add_done_callback(self._log_background_failure)
+
+    def _handle_approval_card_action(self, *, event: Any, action_value: Dict[str, Any], loop: Any) -> Any:
+        """Schedule approval resolution and build the synchronous callback response."""
+        approval_id = action_value.get("approval_id")
+        if approval_id is None:
+            logger.debug("[Feishu] Card action missing approval_id, ignoring")
+            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
+        choice = _APPROVAL_CHOICE_MAP.get(action_value.get("hermes_action"), "deny")
+
+        operator = getattr(event, "operator", None)
+        open_id = str(getattr(operator, "open_id", "") or "")
+        user_name = self._get_cached_sender_name(open_id) or open_id
+
+        self._submit_on_loop(loop, self._resolve_approval(approval_id, choice, user_name))
+
+        if P2CardActionTriggerResponse is None:
+            return None
+        response = P2CardActionTriggerResponse()
+        if CallBackCard is not None:
+            card = CallBackCard()
+            card.type = "raw"
+            card.data = self._build_resolved_approval_card(choice=choice, user_name=user_name)
+            response.card = card
+        return response
+
+    async def _resolve_approval(self, approval_id: Any, choice: str, user_name: str) -> None:
+        """Pop approval state and unblock the waiting agent thread."""
+        state = self._approval_state.pop(approval_id, None)
+        if not state:
+            logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
+            return
+        try:
+            from tools.approval import resolve_gateway_approval
+            count = resolve_gateway_approval(state["session_key"], choice)
+            logger.info(
+                "Feishu button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                count, state["session_key"], choice, user_name,
+            )
+        except Exception as exc:
+            logger.error("Failed to resolve gateway approval from Feishu button: %s", exc)
+
    async def _handle_reaction_event(self, event_type: str, data: Any) -> None:
        """Fetch the reacted-to message; if it was sent by this bot, emit a synthetic text event."""
        if not self._client:
@@ -1950,51 +2165,6 @@ class FeishuAdapter(BasePlatformAdapter):
        action_tag = str(getattr(action, "tag", "") or "button")
        action_value = getattr(action, "value", {}) or {}

-        # --- Exec approval button intercept ---
-        hermes_action = action_value.get("hermes_action") if isinstance(action_value, dict) else None
-        if hermes_action:
-            approval_id = action_value.get("approval_id")
-            state = self._approval_state.pop(approval_id, None)
-            if not state:
-                logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
-                return
-
-            choice_map = {
-                "approve_once": "once",
-                "approve_session": "session",
-                "approve_always": "always",
-                "deny": "deny",
-            }
-            choice = choice_map.get(hermes_action, "deny")
-
-            label_map = {
-                "once": "Approved once",
-                "session": "Approved for session",
-                "always": "Approved permanently",
-                "deny": "Denied",
-            }
-            label = label_map.get(choice, "Resolved")
-
-            # Resolve sender name for the status card
-            sender_id = SimpleNamespace(open_id=open_id, user_id=None, union_id=None)
-            sender_profile = await self._resolve_sender_profile(sender_id)
-            user_name = sender_profile.get("user_name") or open_id
-
-            # Resolve the approval — unblocks the agent thread
-            try:
-                from tools.approval import resolve_gateway_approval
-                count = resolve_gateway_approval(state["session_key"], choice)
-                logger.info(
-                    "Feishu button resolved %d approval(s) for session %s (choice=%s, user=%s)",
-                    count, state["session_key"], choice, user_name,
-                )
-            except Exception as exc:
-                logger.error("Failed to resolve gateway approval from Feishu button: %s", exc)
-
-            # Update the card to show the decision
-            await self._update_approval_card(state.get("message_id", ""), label, user_name, choice)
-            return
-
        synthetic_text = f"/card {action_tag}"
        if action_value:
            try:
@@ -2897,6 +3067,19 @@ class FeishuAdapter(BasePlatformAdapter):
            "user_id_alt": union_id,
        }

+    def _get_cached_sender_name(self, sender_id: Optional[str]) -> Optional[str]:
+        """Return a cached sender name only while its TTL is still valid."""
+        if not sender_id:
+            return None
+        cached = self._sender_name_cache.get(sender_id)
+        if cached is None:
+            return None
+        name, expire_at = cached
+        if time.time() < expire_at:
+            return name
+        self._sender_name_cache.pop(sender_id, None)
+        return None
+
    async def _resolve_sender_name_from_api(self, sender_id: Optional[str]) -> Optional[str]:
        """Fetch the sender's display name from the Feishu contact API with a 10-minute cache.

@@ -2909,11 +3092,9 @@ class FeishuAdapter(BasePlatformAdapter):
        if not trimmed:
            return None
        now = time.time()
-        cached = self._sender_name_cache.get(trimmed)
-        if cached is not None:
-            name, expire_at = cached
-            if now < expire_at:
-                return name
+        cached_name = self._get_cached_sender_name(trimmed)
+        if cached_name is not None:
+            return cached_name
        try:
            from lark_oapi.api.contact.v3 import GetUserRequest  # lazy import
            if trimmed.startswith("ou_"):
@@ -49,7 +49,10 @@ class MessageDeduplicator:
            return False
        now = time.time()
        if msg_id in self._seen:
-            return True
+            if now - self._seen[msg_id] < self._ttl:
+                return True
+            # Entry has expired — remove it and treat as new
+            del self._seen[msg_id]
        self._seen[msg_id] = now
        if len(self._seen) > self._max_size:
            cutoff = now - self._ttl
@@ -718,6 +718,12 @@ class MattermostAdapter(BasePlatformAdapter):
            thread_id=thread_id,
        )

+        # Per-channel ephemeral prompt
+        from gateway.platforms.base import resolve_channel_prompt
+        _channel_prompt = resolve_channel_prompt(
+            self.config.extra, channel_id, None,
+        )
+
        msg_event = MessageEvent(
            text=message_text,
            message_type=msg_type,
@@ -726,6 +732,7 @@ class MattermostAdapter(BasePlatformAdapter):
            message_id=post_id,
            media_urls=media_urls if media_urls else None,
            media_types=media_types if media_types else None,
+            channel_prompt=_channel_prompt,
        )

        await self.handle_message(msg_event)
@@ -366,6 +366,20 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

+    def _dm_top_level_threads_as_sessions(self) -> bool:
+        """Whether top-level Slack DMs get per-message session threads.
+
+        Defaults to ``True`` so each visible DM reply thread is isolated as its
+        own Hermes session — matching the per-thread behavior channels already
+        have.  Set ``platforms.slack.extra.dm_top_level_threads_as_sessions``
+        to ``false`` in config.yaml to revert to the legacy behavior where all
+        top-level DMs share one continuous session.
+        """
+        raw = self.config.extra.get("dm_top_level_threads_as_sessions")
+        if raw is None:
+            return True  # default: each DM thread is its own session
+        return str(raw).strip().lower() in ("1", "true", "yes", "on")
+
    def _resolve_thread_ts(
        self,
        reply_to: Optional[str] = None,
@@ -996,10 +1010,14 @@ class SlackAdapter(BasePlatformAdapter):
        # Build thread_ts for session keying.
        # In channels: fall back to ts so each top-level @mention starts a
        #   new thread/session (the bot always replies in a thread).
-        # In DMs: only use the real thread_ts — top-level DMs should share
-        #   one continuous session, threaded DMs get their own session.
+        # In DMs: fall back to ts so each top-level DM reply thread gets
+        #   its own session key (matching channel behavior). Set
+        #   dm_top_level_threads_as_sessions: false in config to revert to
+        #   legacy single-session-per-DM-channel behavior.
        if is_dm:
-            thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")  # None for top-level DMs
+            thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")
+            if not thread_ts and self._dm_top_level_threads_as_sessions():
+                thread_ts = ts
        else:
            thread_ts = event.get("thread_ts") or ts  # ts fallback for channels

@@ -1167,6 +1185,12 @@ class SlackAdapter(BasePlatformAdapter):
            thread_id=thread_ts,
        )

+        # Per-channel ephemeral prompt
+        from gateway.platforms.base import resolve_channel_prompt
+        _channel_prompt = resolve_channel_prompt(
+            self.config.extra, channel_id, None,
+        )
+
        msg_event = MessageEvent(
            text=text,
            message_type=msg_type,
@@ -1176,6 +1200,7 @@ class SlackAdapter(BasePlatformAdapter):
            media_urls=media_urls,
            media_types=media_types,
            reply_to_message_id=thread_ts if thread_ts != ts else None,
+            channel_prompt=_channel_prompt,
        )

        # Only react when bot is directly addressed (DM or @mention).
@@ -11,6 +11,7 @@ import asyncio
 import json
 import logging
 import os
+import html as _html
 import re
 from typing import Dict, List, Optional, Any

@@ -18,6 +19,10 @@ logger = logging.getLogger(__name__)

 try:
    from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup
+    try:
+        from telegram import LinkPreviewOptions
+    except ImportError:
+        LinkPreviewOptions = None
    from telegram.ext import (
        Application,
        CommandHandler,
@@ -36,6 +41,7 @@ except ImportError:
    Message = Any
    InlineKeyboardButton = Any
    InlineKeyboardMarkup = Any
+    LinkPreviewOptions = None
    Application = Any
    CommandHandler = Any
    CallbackQueryHandler = Any
@@ -129,6 +135,7 @@ class TelegramAdapter(BasePlatformAdapter):
    # When a chunk is near this limit, a continuation is almost certain.
    _SPLIT_THRESHOLD = 4000
    MEDIA_GROUP_WAIT_SECONDS = 0.8
+    _GENERAL_TOPIC_THREAD_ID = "1"
    
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.TELEGRAM)
@@ -137,6 +144,7 @@ class TelegramAdapter(BasePlatformAdapter):
        self._webhook_mode: bool = False
        self._mention_patterns = self._compile_mention_patterns()
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
+        self._disable_link_previews: bool = self._coerce_bool_extra("disable_link_previews", False)
        # Buffer rapid/album photo updates so Telegram image bursts are handled
        # as a single MessageEvent instead of self-interrupting multiple turns.
        self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@@ -163,6 +171,38 @@ class TelegramAdapter(BasePlatformAdapter):
        # Approval button state: message_id → session_key
        self._approval_state: Dict[int, str] = {}

+    @staticmethod
+    def _is_callback_user_authorized(user_id: str) -> bool:
+        """Return whether a Telegram inline-button caller may perform gated actions."""
+        allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
+        if not allowed_csv:
+            return True
+        allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
+        return "*" in allowed_ids or user_id in allowed_ids
+
+    @classmethod
+    def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
+        if not metadata:
+            return None
+        thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
+        return str(thread_id) if thread_id is not None else None
+
+    @classmethod
+    def _message_thread_id_for_send(cls, thread_id: Optional[str]) -> Optional[int]:
+        if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
+            return None
+        return int(thread_id)
+
+    @classmethod
+    def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
+        if not thread_id:
+            return None
+        return int(thread_id)
+
+    @staticmethod
+    def _is_thread_not_found_error(error: Exception) -> bool:
+        return "thread not found" in str(error).lower()
+
    def _fallback_ips(self) -> list[str]:
        """Return validated fallback IPs from config (populated by _apply_env_overrides)."""
        configured = self.config.extra.get("fallback_ips", []) if getattr(self.config, "extra", None) else []
@@ -193,6 +233,26 @@ class TelegramAdapter(BasePlatformAdapter):
            pass
        return isinstance(error, OSError)

+    def _coerce_bool_extra(self, key: str, default: bool = False) -> bool:
+        value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
+        if value is None:
+            return default
+        if isinstance(value, str):
+            lowered = value.strip().lower()
+            if lowered in ("true", "1", "yes", "on"):
+                return True
+            if lowered in ("false", "0", "no", "off"):
+                return False
+            return default
+        return bool(value)
+
+    def _link_preview_kwargs(self) -> Dict[str, Any]:
+        if not getattr(self, "_disable_link_previews", False):
+            return {}
+        if LinkPreviewOptions is not None:
+            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
+        return {"disable_web_page_preview": True}
+
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@@ -540,7 +600,7 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_url = resolve_proxy_url()
+            proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -606,14 +666,14 @@ class TelegramAdapter(BasePlatformAdapter):
                from telegram.error import NetworkError, TimedOut
            except ImportError:
                NetworkError = TimedOut = OSError  # type: ignore[misc,assignment]
-            _max_connect = 3
+            _max_connect = 8
            for _attempt in range(_max_connect):
                try:
                    await self._app.initialize()
                    break
                except (NetworkError, TimedOut, OSError) as init_err:
                    if _attempt < _max_connect - 1:
-                        wait = 2 ** _attempt
+                        wait = min(2 ** _attempt, 15)
                        logger.warning(
                            "[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
                            self.name, _attempt + 1, _max_connect, init_err, wait,
@@ -814,7 +874,7 @@ class TelegramAdapter(BasePlatformAdapter):
                ]
            
            message_ids = []
-            thread_id = metadata.get("thread_id") if metadata else None
+            thread_id = self._metadata_thread_id(metadata)
            
            try:
                from telegram.error import NetworkError as _NetErr
@@ -834,7 +894,7 @@ class TelegramAdapter(BasePlatformAdapter):
            for i, chunk in enumerate(chunks):
                should_thread = self._should_thread_reply(reply_to, i)
                reply_to_id = int(reply_to) if should_thread else None
-                effective_thread_id = int(thread_id) if thread_id else None
+                effective_thread_id = self._message_thread_id_for_send(thread_id)

                msg = None
                for _send_attempt in range(3):
@@ -847,6 +907,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                parse_mode=ParseMode.MARKDOWN_V2,
                                reply_to_message_id=reply_to_id,
                                message_thread_id=effective_thread_id,
+                                **self._link_preview_kwargs(),
                            )
                        except Exception as md_error:
                            # Markdown parsing failed, try plain text
@@ -859,6 +920,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                    parse_mode=None,
                                    reply_to_message_id=reply_to_id,
                                    message_thread_id=effective_thread_id,
+                                    **self._link_preview_kwargs(),
                                )
                            else:
                                raise
@@ -869,8 +931,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        # (not transient network issues). Detect and handle
                        # specific cases instead of blindly retrying.
                        if _BadReq and isinstance(send_err, _BadReq):
-                            err_lower = str(send_err).lower()
-                            if "thread not found" in err_lower and effective_thread_id is not None:
+                            if self._is_thread_not_found_error(send_err) and effective_thread_id is not None:
                                # Thread doesn't exist — retry without
                                # message_thread_id so the message still
                                # reaches the chat.
@@ -880,6 +941,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                )
                                effective_thread_id = None
                                continue
+                            err_lower = str(send_err).lower()
                            if "message to be replied not found" in err_lower and reply_to_id is not None:
                                # Original message was deleted before we
                                # could reply — clear reply target and retry
@@ -1046,6 +1108,7 @@ class TelegramAdapter(BasePlatformAdapter):
                text=text,
                parse_mode=ParseMode.MARKDOWN,
                reply_markup=keyboard,
+                **self._link_preview_kwargs(),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@@ -1068,15 +1131,13 @@ class TelegramAdapter(BasePlatformAdapter):
        try:
            cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
            text = (
-                f"⚠️ *Command Approval Required*\n\n"
-                f"`{cmd_preview}`\n\n"
-                f"Reason: {description}"
+                f"⚠️ <b>Command Approval Required</b>\n\n"
+                f"<pre>{_html.escape(cmd_preview)}</pre>\n\n"
+                f"Reason: {_html.escape(description)}"
            )

            # Resolve thread context for thread replies
-            thread_id = None
-            if metadata:
-                thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
+            thread_id = self._metadata_thread_id(metadata)

            # We'll use the message_id as part of callback_data to look up session_key
            # Send a placeholder first, then update — or use a counter.
@@ -1100,11 +1161,13 @@ class TelegramAdapter(BasePlatformAdapter):
            kwargs: Dict[str, Any] = {
                "chat_id": int(chat_id),
                "text": text,
-                "parse_mode": ParseMode.MARKDOWN,
+                "parse_mode": ParseMode.HTML,
                "reply_markup": keyboard,
+                **self._link_preview_kwargs(),
            }
-            if thread_id:
-                kwargs["message_thread_id"] = int(thread_id)
+            message_thread_id = self._message_thread_id_for_send(thread_id)
+            if message_thread_id is not None:
+                kwargs["message_thread_id"] = message_thread_id

            msg = await self._bot.send_message(**kwargs)

@@ -1172,6 +1235,7 @@ class TelegramAdapter(BasePlatformAdapter):
                parse_mode=ParseMode.MARKDOWN,
                reply_markup=keyboard,
                message_thread_id=int(thread_id) if thread_id else None,
+                **self._link_preview_kwargs(),
            )

            # Store picker state keyed by chat_id
@@ -1440,12 +1504,9 @@ class TelegramAdapter(BasePlatformAdapter):

                # Only authorized users may click approval buttons.
                caller_id = str(getattr(query.from_user, "id", ""))
-                allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
-                if allowed_csv:
-                    allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
-                    if "*" not in allowed_ids and caller_id not in allowed_ids:
-                        await query.answer(text="⛔ You are not authorized to approve commands.")
-                        return
+                if not self._is_callback_user_authorized(caller_id):
+                    await query.answer(text="⛔ You are not authorized to approve commands.")
+                    return

                session_key = self._approval_state.pop(approval_id, None)
                if not session_key:
@@ -1490,6 +1551,10 @@ class TelegramAdapter(BasePlatformAdapter):
        if not data.startswith("update_prompt:"):
            return
        answer = data.split(":", 1)[1]  # "y" or "n"
+        caller_id = str(getattr(query.from_user, "id", ""))
+        if not self._is_callback_user_authorized(caller_id):
+            await query.answer(text="⛔ You are not authorized to answer update prompts.")
+            return
        await query.answer(text=f"Sent '{answer}' to the update process.")
        # Edit the message to show the choice and remove buttons
        label = "Yes" if answer == "y" else "No"
@@ -1535,23 +1600,23 @@ class TelegramAdapter(BasePlatformAdapter):
            with open(audio_path, "rb") as audio_file:
                # .ogg files -> send as voice (round playable bubble)
                if audio_path.endswith((".ogg", ".opus")):
-                    _voice_thread = metadata.get("thread_id") if metadata else None
+                    _voice_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_voice(
                        chat_id=int(chat_id),
                        voice=audio_file,
                        caption=caption[:1024] if caption else None,
                        reply_to_message_id=int(reply_to) if reply_to else None,
-                        message_thread_id=int(_voice_thread) if _voice_thread else None,
+                        message_thread_id=self._message_thread_id_for_send(_voice_thread),
                    )
                else:
                    # .mp3 and others -> send as audio file
-                    _audio_thread = metadata.get("thread_id") if metadata else None
+                    _audio_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_audio(
                        chat_id=int(chat_id),
                        audio=audio_file,
                        caption=caption[:1024] if caption else None,
                        reply_to_message_id=int(reply_to) if reply_to else None,
-                        message_thread_id=int(_audio_thread) if _audio_thread else None,
+                        message_thread_id=self._message_thread_id_for_send(_audio_thread),
                    )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@@ -1581,14 +1646,14 @@ class TelegramAdapter(BasePlatformAdapter):
            if not os.path.exists(image_path):
                return SendResult(success=False, error=f"Image file not found: {image_path}")

-            _thread = metadata.get("thread_id") if metadata else None
+            _thread = self._metadata_thread_id(metadata)
            with open(image_path, "rb") as image_file:
                msg = await self._bot.send_photo(
                    chat_id=int(chat_id),
                    photo=image_file,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
-                    message_thread_id=int(_thread) if _thread else None,
+                    message_thread_id=self._message_thread_id_for_send(_thread),
                )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@@ -1619,7 +1684,7 @@ class TelegramAdapter(BasePlatformAdapter):
                return SendResult(success=False, error=f"File not found: {file_path}")

            display_name = file_name or os.path.basename(file_path)
-            _thread = metadata.get("thread_id") if metadata else None
+            _thread = self._metadata_thread_id(metadata)

            with open(file_path, "rb") as f:
                msg = await self._bot.send_document(
@@ -1628,7 +1693,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    filename=display_name,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
-                    message_thread_id=int(_thread) if _thread else None,
+                    message_thread_id=self._message_thread_id_for_send(_thread),
                )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@@ -1652,14 +1717,14 @@ class TelegramAdapter(BasePlatformAdapter):
            if not os.path.exists(video_path):
                return SendResult(success=False, error=f"Video file not found: {video_path}")

-            _thread = metadata.get("thread_id") if metadata else None
+            _thread = self._metadata_thread_id(metadata)
            with open(video_path, "rb") as f:
                msg = await self._bot.send_video(
                    chat_id=int(chat_id),
                    video=f,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
-                    message_thread_id=int(_thread) if _thread else None,
+                    message_thread_id=self._message_thread_id_for_send(_thread),
                )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@@ -1689,13 +1754,13 @@ class TelegramAdapter(BasePlatformAdapter):

        try:
            # Telegram can send photos directly from URLs (up to ~5MB)
-            _photo_thread = metadata.get("thread_id") if metadata else None
+            _photo_thread = self._metadata_thread_id(metadata)
            msg = await self._bot.send_photo(
                chat_id=int(chat_id),
                photo=image_url,
                caption=caption[:1024] if caption else None,  # Telegram caption limit
                reply_to_message_id=int(reply_to) if reply_to else None,
-                message_thread_id=int(_photo_thread) if _photo_thread else None,
+                message_thread_id=self._message_thread_id_for_send(_photo_thread),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@@ -1718,6 +1783,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    photo=image_data,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
+                    message_thread_id=self._message_thread_id_for_send(_photo_thread),
                )
                return SendResult(success=True, message_id=str(msg.message_id))
            except Exception as e2:
@@ -1743,13 +1809,13 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")
        
        try:
-            _anim_thread = metadata.get("thread_id") if metadata else None
+            _anim_thread = self._metadata_thread_id(metadata)
            msg = await self._bot.send_animation(
                chat_id=int(chat_id),
                animation=animation_url,
                caption=caption[:1024] if caption else None,
                reply_to_message_id=int(reply_to) if reply_to else None,
-                message_thread_id=int(_anim_thread) if _anim_thread else None,
+                message_thread_id=self._message_thread_id_for_send(_anim_thread),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@@ -1766,12 +1832,23 @@ class TelegramAdapter(BasePlatformAdapter):
        """Send typing indicator."""
        if self._bot:
            try:
-                _typing_thread = metadata.get("thread_id") if metadata else None
-                await self._bot.send_chat_action(
-                    chat_id=int(chat_id),
-                    action="typing",
-                    message_thread_id=int(_typing_thread) if _typing_thread else None,
-                )
+                _typing_thread = self._metadata_thread_id(metadata)
+                message_thread_id = self._message_thread_id_for_typing(_typing_thread)
+                try:
+                    await self._bot.send_chat_action(
+                        chat_id=int(chat_id),
+                        action="typing",
+                        message_thread_id=message_thread_id,
+                    )
+                except Exception as e:
+                    if message_thread_id is not None and self._is_thread_not_found_error(e):
+                        await self._bot.send_chat_action(
+                            chat_id=int(chat_id),
+                            action="typing",
+                            message_thread_id=None,
+                        )
+                    else:
+                        raise
            except Exception as e:
                # Typing failures are non-fatal; log at debug level only.
                logger.debug(
@@ -1916,9 +1993,20 @@ class TelegramAdapter(BasePlatformAdapter):
        )

        # 9) Convert blockquotes: > at line start → protect > from escaping
+        #    Handle both regular blockquotes (> text) and expandable blockquotes
+        #    (Telegram MarkdownV2: **> for expandable start, || to end the quote)
+        def _convert_blockquote(m):
+            prefix = m.group(1)  # >, >>, >>>, **>, or **>> etc.
+            content = m.group(2)
+            # Check if content ends with || (expandable blockquote end marker)
+            # In this case, preserve the trailing || unescaped for Telegram
+            if prefix.startswith('**') and content.endswith('||'):
+                return _ph(f'{prefix} {_escape_mdv2(content[:-2])}||')
+            return _ph(f'{prefix} {_escape_mdv2(content)}')
+
        text = re.sub(
-            r'^(>{1,3}) (.+)$',
-            lambda m: _ph(m.group(1) + ' ' + _escape_mdv2(m.group(2))),
+            r'^((?:\*\*)?>{1,3}) (.+)$',
+            _convert_blockquote,
            text,
            flags=re.MULTILINE,
        )
@@ -1991,6 +2079,27 @@ class TelegramAdapter(BasePlatformAdapter):
            return {str(part).strip() for part in raw if str(part).strip()}
        return {part.strip() for part in str(raw).split(",") if part.strip()}

+    def _telegram_ignored_threads(self) -> set[int]:
+        raw = self.config.extra.get("ignored_threads")
+        if raw is None:
+            raw = os.getenv("TELEGRAM_IGNORED_THREADS", "")
+
+        if isinstance(raw, list):
+            values = raw
+        else:
+            values = str(raw).split(",")
+
+        ignored: set[int] = set()
+        for value in values:
+            text = str(value).strip()
+            if not text:
+                continue
+            try:
+                ignored.add(int(text))
+            except (TypeError, ValueError):
+                logger.warning("[%s] Ignoring invalid Telegram thread id: %r", self.name, value)
+        return ignored
+
    def _compile_mention_patterns(self) -> List[re.Pattern]:
        """Compile optional regex wake-word patterns for group triggers."""
        patterns = self.config.extra.get("mention_patterns")
@@ -2102,6 +2211,13 @@ class TelegramAdapter(BasePlatformAdapter):
        """
        if not self._is_group_chat(message):
            return True
+        thread_id = getattr(message, "message_thread_id", None)
+        if thread_id is not None:
+            try:
+                if int(thread_id) in self._telegram_ignored_threads():
+                    return False
+            except (TypeError, ValueError):
+                logger.warning("[%s] Ignoring non-numeric Telegram message_thread_id: %r", self.name, thread_id)
        if str(getattr(getattr(message, "chat", None), "id", "")) in self._telegram_free_response_chats():
            return True
        if not self._telegram_require_mention():
@@ -2677,7 +2793,9 @@ class TelegramAdapter(BasePlatformAdapter):

        # Resolve DM topic name and skill binding
        thread_id_raw = message.message_thread_id
-        thread_id_str = str(thread_id_raw) if thread_id_raw else None
+        thread_id_str = str(thread_id_raw) if thread_id_raw is not None else None
+        if chat_type == "group" and thread_id_str is None and getattr(chat, "is_forum", False):
+            thread_id_str = self._GENERAL_TOPIC_THREAD_ID
        chat_topic = None
        topic_skill = None

@@ -2726,6 +2844,15 @@ class TelegramAdapter(BasePlatformAdapter):
            reply_to_id = str(message.reply_to_message.message_id)
            reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None

+        # Per-channel/topic ephemeral prompt
+        from gateway.platforms.base import resolve_channel_prompt
+        _chat_id_str = str(chat.id)
+        _channel_prompt = resolve_channel_prompt(
+            self.config.extra,
+            thread_id_str or _chat_id_str,
+            _chat_id_str if thread_id_str else None,
+        )
+
        return MessageEvent(
            text=message.text or "",
            message_type=msg_type,
@@ -2735,6 +2862,7 @@ class TelegramAdapter(BasePlatformAdapter):
            reply_to_message_id=reply_to_id,
            reply_to_text=reply_to_text,
            auto_skill=topic_skill,
+            channel_prompt=_channel_prompt,
            timestamp=message.date,
        )

@@ -46,7 +46,7 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
 def _resolve_proxy_url() -> str | None:
    # Delegate to shared implementation (env vars + macOS system proxy detection)
    from gateway.platforms.base import resolve_proxy_url
-    return resolve_proxy_url()
+    return resolve_proxy_url("TELEGRAM_PROXY")


 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
@@ -203,6 +203,7 @@ class WebhookAdapter(BasePlatformAdapter):
            "wecom_callback",
            "weixin",
            "bluebubbles",
+            "qqbot",
        ):
            return await self._deliver_cross_platform(
                deliver_type, content, delivery
@@ -258,6 +258,20 @@ class WecomCallbackAdapter(BasePlatformAdapter):
                )
                event = self._build_event(app, decrypted)
                if event is not None:
+                    # Deduplicate: WeCom retries callbacks on timeout,
+                    # producing duplicate inbound messages (#10305).
+                    if event.message_id:
+                        now = time.time()
+                        if event.message_id in self._seen_messages:
+                            if now - self._seen_messages[event.message_id] < MESSAGE_DEDUP_TTL_SECONDS:
+                                logger.debug("[WecomCallback] Duplicate MsgId %s, skipping", event.message_id)
+                                return web.Response(text="success", content_type="text/plain")
+                            del self._seen_messages[event.message_id]
+                        self._seen_messages[event.message_id] = now
+                        # Prune expired entries when cache grows large
+                        if len(self._seen_messages) > 2000:
+                            cutoff = now - MESSAGE_DEDUP_TTL_SECONDS
+                            self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff}
                    # Record which app this user belongs to.
                    if event.source and event.source.user_id:
                        map_key = self._user_app_key(
@@ -301,6 +301,8 @@ def build_session_context_prompt(
    lines.append("")
    lines.append("**Delivery options for scheduled tasks:**")
    
+    from hermes_constants import display_hermes_home
+
    # Origin delivery
    if context.source.platform == Platform.LOCAL:
        lines.append("- `\"origin\"` → Local output (saved to files)")
@@ -309,9 +311,11 @@ def build_session_context_prompt(
            _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
        )
        lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
-    
+
    # Local always available
-    lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
+    lines.append(
+        f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
+    )
    
    # Platform home channels
    for platform, home in context.home_channels.items():
@@ -37,18 +37,24 @@ needs to replace the import + call site:
 """

 from contextvars import ContextVar
+from typing import Any
+
+# Sentinel to distinguish "never set in this context" from "explicitly set to empty".
+# When a contextvar holds _UNSET, we fall back to os.environ (CLI/cron compat).
+# When it holds "" (after clear_session_vars resets it), we return "" — no fallback.
+_UNSET: Any = object()

 # ---------------------------------------------------------------------------
 # Per-task session variables
 # ---------------------------------------------------------------------------

-_SESSION_PLATFORM: ContextVar[str] = ContextVar("HERMES_SESSION_PLATFORM", default="")
-_SESSION_CHAT_ID: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_ID", default="")
-_SESSION_CHAT_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_NAME", default="")
-_SESSION_THREAD_ID: ContextVar[str] = ContextVar("HERMES_SESSION_THREAD_ID", default="")
-_SESSION_USER_ID: ContextVar[str] = ContextVar("HERMES_SESSION_USER_ID", default="")
-_SESSION_USER_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_USER_NAME", default="")
-_SESSION_KEY: ContextVar[str] = ContextVar("HERMES_SESSION_KEY", default="")
+_SESSION_PLATFORM: ContextVar = ContextVar("HERMES_SESSION_PLATFORM", default=_UNSET)
+_SESSION_CHAT_ID: ContextVar = ContextVar("HERMES_SESSION_CHAT_ID", default=_UNSET)
+_SESSION_CHAT_NAME: ContextVar = ContextVar("HERMES_SESSION_CHAT_NAME", default=_UNSET)
+_SESSION_THREAD_ID: ContextVar = ContextVar("HERMES_SESSION_THREAD_ID", default=_UNSET)
+_SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNSET)
+_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
+_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)

 _VAR_MAP = {
    "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
@@ -91,10 +97,17 @@ def set_session_vars(


 def clear_session_vars(tokens: list) -> None:
-    """Restore session context variables to their pre-handler values."""
-    if not tokens:
-        return
-    vars_in_order = [
+    """Mark session context variables as explicitly cleared.
+
+    Sets all variables to ``""`` so that ``get_session_env`` returns an empty
+    string instead of falling back to (potentially stale) ``os.environ``
+    values.  The *tokens* argument is accepted for API compatibility with
+    callers that saved the return value of ``set_session_vars``, but the
+    actual clearing uses ``var.set("")`` rather than ``var.reset(token)``
+    to ensure the "explicitly cleared" state is distinguishable from
+    "never set" (which holds the ``_UNSET`` sentinel).
+    """
+    for var in (
        _SESSION_PLATFORM,
        _SESSION_CHAT_ID,
        _SESSION_CHAT_NAME,
@@ -102,9 +115,8 @@ def clear_session_vars(tokens: list) -> None:
        _SESSION_USER_ID,
        _SESSION_USER_NAME,
        _SESSION_KEY,
-    ]
-    for var, token in zip(vars_in_order, tokens):
-        var.reset(token)
+    ):
+        var.set("")


 def get_session_env(name: str, default: str = "") -> str:
@@ -113,8 +125,13 @@ def get_session_env(name: str, default: str = "") -> str:
    Drop-in replacement for ``os.getenv("HERMES_SESSION_*", default)``.

    Resolution order:
-    1. Context variable (set by the gateway for concurrency-safe access)
-    2. ``os.environ`` (used by CLI, cron scheduler, and tests)
+    1. Context variable (set by the gateway for concurrency-safe access).
+       If the variable was explicitly set (even to ``""``) via
+       ``set_session_vars`` or ``clear_session_vars``, that value is
+       returned — **no fallback to os.environ**.
+    2. ``os.environ`` (only when the context variable was never set in
+       this context — i.e. CLI, cron scheduler, and test processes that
+       don't use ``set_session_vars`` at all).
    3. *default*
    """
    import os
@@ -122,7 +139,7 @@ def get_session_env(name: str, default: str = "") -> str:
    var = _VAR_MAP.get(name)
    if var is not None:
        value = var.get()
-        if value:
+        if value is not _UNSET:
            return value
    # Fall back to os.environ for CLI, cron, and test compatibility
    return os.getenv(name, default)
@@ -266,9 +266,25 @@ def read_runtime_status() -> Optional[dict[str, Any]]:


 def remove_pid_file() -> None:
-    """Remove the gateway PID file if it exists."""
+    """Remove the gateway PID file, but only if it belongs to this process.
+
+    During --replace handoffs, the old process's atexit handler can fire AFTER
+    the new process has written its own PID file.  Blindly removing the file
+    would delete the new process's record, leaving the gateway running with no
+    PID file (invisible to ``get_running_pid()``).
+    """
    try:
-        _get_pid_path().unlink(missing_ok=True)
+        path = _get_pid_path()
+        record = _read_json_file(path)
+        if record is not None:
+            try:
+                file_pid = int(record["pid"])
+            except (KeyError, TypeError, ValueError):
+                file_pid = None
+            if file_pid is not None and file_pid != os.getpid():
+                # PID file belongs to a different process — leave it alone.
+                return
+        path.unlink(missing_ok=True)
    except Exception:
        pass

@@ -43,6 +43,7 @@ class StreamConsumerConfig:
    edit_interval: float = 1.0
    buffer_threshold: int = 40
    cursor: str = " ▉"
+    buffer_only: bool = False


 class GatewayStreamConsumer:
@@ -64,6 +65,18 @@ class GatewayStreamConsumer:
    # progressive edits for the remainder of the stream.
    _MAX_FLOOD_STRIKES = 3

+    # Reasoning/thinking tags that models emit inline in content.
+    # Must stay in sync with cli.py _OPEN_TAGS/_CLOSE_TAGS and
+    # run_agent.py _strip_think_blocks() tag variants.
+    _OPEN_THINK_TAGS = (
+        "<REASONING_SCRATCHPAD>", "<think>", "<reasoning>",
+        "<THINKING>", "<thinking>", "<thought>",
+    )
+    _CLOSE_THINK_TAGS = (
+        "</REASONING_SCRATCHPAD>", "</think>", "</reasoning>",
+        "</THINKING>", "</thinking>", "</thought>",
+    )
+
    def __init__(
        self,
        adapter: Any,
@@ -88,6 +101,10 @@ class GatewayStreamConsumer:
        self._current_edit_interval = self.cfg.edit_interval  # Adaptive backoff
        self._final_response_sent = False

+        # Think-block filter state (mirrors CLI's _stream_delta tag suppression)
+        self._in_think_block = False
+        self._think_buffer = ""
+
    @property
    def already_sent(self) -> bool:
        """True if at least one message was sent or edited during the run."""
@@ -132,6 +149,112 @@ class GatewayStreamConsumer:
        """Signal that the stream is complete."""
        self._queue.put(_DONE)

+    # ── Think-block filtering ────────────────────────────────────────
+    # Models like MiniMax emit inline <think>...</think> blocks in their
+    # content.  The CLI's _stream_delta suppresses these via a state
+    # machine; we do the same here so gateway users never see raw
+    # reasoning tags.  The agent also strips them from the final
+    # response (run_agent.py _strip_think_blocks), but the stream
+    # consumer sends intermediate edits before that stripping happens.
+
+    def _filter_and_accumulate(self, text: str) -> None:
+        """Add a text delta to the accumulated buffer, suppressing think blocks.
+
+        Uses a state machine that tracks whether we are inside a
+        reasoning/thinking block.  Text inside such blocks is silently
+        discarded.  Partial tags at buffer boundaries are held back in
+        ``_think_buffer`` until enough characters arrive to decide.
+        """
+        buf = self._think_buffer + text
+        self._think_buffer = ""
+
+        while buf:
+            if self._in_think_block:
+                # Look for the earliest closing tag
+                best_idx = -1
+                best_len = 0
+                for tag in self._CLOSE_THINK_TAGS:
+                    idx = buf.find(tag)
+                    if idx != -1 and (best_idx == -1 or idx < best_idx):
+                        best_idx = idx
+                        best_len = len(tag)
+
+                if best_len:
+                    # Found closing tag — discard block, process remainder
+                    self._in_think_block = False
+                    buf = buf[best_idx + best_len:]
+                else:
+                    # No closing tag yet — hold tail that could be a
+                    # partial closing tag prefix, discard the rest.
+                    max_tag = max(len(t) for t in self._CLOSE_THINK_TAGS)
+                    self._think_buffer = buf[-max_tag:] if len(buf) > max_tag else buf
+                    return
+            else:
+                # Look for earliest opening tag at a block boundary
+                # (start of text / preceded by newline + optional whitespace).
+                # This prevents false positives when models *mention* tags
+                # in prose (e.g. "the <think> tag is used for…").
+                best_idx = -1
+                best_len = 0
+                for tag in self._OPEN_THINK_TAGS:
+                    search_start = 0
+                    while True:
+                        idx = buf.find(tag, search_start)
+                        if idx == -1:
+                            break
+                        # Block-boundary check (mirrors cli.py logic)
+                        if idx == 0:
+                            is_boundary = (
+                                not self._accumulated
+                                or self._accumulated.endswith("\n")
+                            )
+                        else:
+                            preceding = buf[:idx]
+                            last_nl = preceding.rfind("\n")
+                            if last_nl == -1:
+                                is_boundary = (
+                                    (not self._accumulated
+                                     or self._accumulated.endswith("\n"))
+                                    and preceding.strip() == ""
+                                )
+                            else:
+                                is_boundary = preceding[last_nl + 1:].strip() == ""
+
+                        if is_boundary and (best_idx == -1 or idx < best_idx):
+                            best_idx = idx
+                            best_len = len(tag)
+                            break  # first boundary hit for this tag is enough
+                        search_start = idx + 1
+
+                if best_len:
+                    # Emit text before the tag, enter think block
+                    self._accumulated += buf[:best_idx]
+                    self._in_think_block = True
+                    buf = buf[best_idx + best_len:]
+                else:
+                    # No opening tag — check for a partial tag at the tail
+                    held_back = 0
+                    for tag in self._OPEN_THINK_TAGS:
+                        for i in range(1, len(tag)):
+                            if buf.endswith(tag[:i]) and i > held_back:
+                                held_back = i
+                    if held_back:
+                        self._accumulated += buf[:-held_back]
+                        self._think_buffer = buf[-held_back:]
+                    else:
+                        self._accumulated += buf
+                    return
+
+    def _flush_think_buffer(self) -> None:
+        """Flush any held-back partial-tag buffer into accumulated text.
+
+        Called when the stream ends (got_done) so that partial text that
+        was held back waiting for a possible opening tag is not lost.
+        """
+        if self._think_buffer and not self._in_think_block:
+            self._accumulated += self._think_buffer
+            self._think_buffer = ""
+
    async def run(self) -> None:
        """Async task that drains the queue and edits the platform message."""
        # Platform message length limit — leave room for cursor + formatting
@@ -156,10 +279,16 @@ class GatewayStreamConsumer:
                        if isinstance(item, tuple) and len(item) == 2 and item[0] is _COMMENTARY:
                            commentary_text = item[1]
                            break
-                        self._accumulated += item
+                        self._filter_and_accumulate(item)
                    except queue.Empty:
                        break

+                # Flush any held-back partial-tag buffer on stream end
+                # so trailing text that was waiting for a potential open
+                # tag is not lost.
+                if got_done:
+                    self._flush_think_buffer()
+
                # Decide whether to flush an edit
                now = time.monotonic()
                elapsed = now - self._last_edit_time
@@ -167,10 +296,13 @@ class GatewayStreamConsumer:
                    got_done
                    or got_segment_break
                    or commentary_text is not None
-                    or (elapsed >= self._current_edit_interval
-                        and self._accumulated)
-                    or len(self._accumulated) >= self.cfg.buffer_threshold
                )
+                if not self.cfg.buffer_only:
+                    should_edit = should_edit or (
+                        (elapsed >= self._current_edit_interval
+                            and self._accumulated)
+                        or len(self._accumulated) >= self.cfg.buffer_threshold
+                    )

                current_update_visible = False
                if should_edit and self._accumulated:
@@ -275,11 +407,21 @@ class GatewayStreamConsumer:

        except asyncio.CancelledError:
            # Best-effort final edit on cancellation
+            _best_effort_ok = False
            if self._accumulated and self._message_id:
                try:
-                    await self._send_or_edit(self._accumulated)
+                    _best_effort_ok = bool(await self._send_or_edit(self._accumulated))
                except Exception:
                    pass
+            # Only confirm final delivery if the best-effort send above
+            # actually succeeded OR if the final response was already
+            # confirmed before we were cancelled.  Previously this
+            # promoted any partial send (already_sent=True) to
+            # final_response_sent — which suppressed the gateway's
+            # fallback send even when only intermediate text (e.g.
+            # "Let me search…") had been delivered, not the real answer.
+            if _best_effort_ok and not self._final_response_sent:
+                self._final_response_sent = True
        except Exception as e:
            logger.error("Stream consumer error: %s", e)

@@ -377,9 +519,17 @@ class GatewayStreamConsumer:
        self._fallback_final_send = False
        if not continuation.strip():
            # Nothing new to send — the visible partial already matches final text.
-            self._already_sent = True
-            self._final_response_sent = True
-            return
+            # BUT: if final_text itself has meaningful content (e.g. a timeout
+            # message after a long tool call), the prefix-based continuation
+            # calculation may wrongly conclude "already shown" because the
+            # streamed prefix was from a *previous* segment (before the tool
+            # boundary).  In that case, send the full final_text as-is (#10807).
+            if final_text.strip() and final_text != self._visible_prefix():
+                continuation = final_text
+            else:
+                self._already_sent = True
+                self._final_response_sent = True
+                return

        raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
        safe_limit = max(500, raw_limit - 100)
@@ -473,12 +623,15 @@ class GatewayStreamConsumer:
                content=text,
                metadata=self.metadata,
            )
-            if result.success:
-                self._already_sent = True
-                return True
+            # Note: do NOT set _already_sent = True here.
+            # Commentary messages are interim status updates (e.g. "Using browser
+            # tool..."), not the final response. Setting already_sent would cause
+            # the final response to be incorrectly suppressed when there are
+            # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
+            return result.success
        except Exception as e:
            logger.error("Commentary send error: %s", e)
-        return False
+            return False

    async def _send_or_edit(self, text: str) -> bool:
        """Send or edit the streaming message.
@@ -496,10 +649,26 @@ class GatewayStreamConsumer:
        visible_without_cursor = text
        if self.cfg.cursor:
            visible_without_cursor = visible_without_cursor.replace(self.cfg.cursor, "")
-        if not visible_without_cursor.strip():
+        _visible_stripped = visible_without_cursor.strip()
+        if not _visible_stripped:
            return True  # cursor-only / whitespace-only update
        if not text.strip():
            return True  # nothing to send is "success"
+        # Guard: do not create a brand-new standalone message when the only
+        # visible content is a handful of characters alongside the streaming
+        # cursor.  During rapid tool-calling the model often emits 1-2 tokens
+        # before switching to tool calls; the resulting "X ▉" message risks
+        # leaving the cursor permanently visible if the follow-up edit (to
+        # strip the cursor on segment break) is rate-limited by the platform.
+        # This was reported on Telegram, Matrix, and other clients where the
+        # ▉ block character renders as a visible white box ("tofu").
+        # Existing messages (edits) are unaffected — only first sends gated.
+        _MIN_NEW_MSG_CHARS = 4
+        if (self._message_id is None
+                and self.cfg.cursor
+                and self.cfg.cursor in text
+                and len(_visible_stripped) < _MIN_NEW_MSG_CHARS):
+            return True  # too short for a standalone message — accumulate more
        try:
            if self._message_id is not None:
                if self._edit_supported:
@@ -0,0 +1,160 @@
+# Hermes Agent Has Had "Routines" Since March
+
+Anthropic just announced [Claude Code Routines](https://claude.com/blog/introducing-routines-in-claude-code) — scheduled tasks, GitHub event triggers, and API-triggered agent runs. Bundled prompt + repo + connectors, running on their infrastructure.
+
+It's a good feature. We shipped it two months ago.
+
+---
+
+## The Three Trigger Types — Side by Side
+
+Claude Code Routines offers three ways to trigger an automation:
+
+**1. Scheduled (cron)**
+> "Every night at 2am: pull the top bug from Linear, attempt a fix, and open a draft PR."
+
+Hermes equivalent — works today:
+```bash
+hermes cron create "0 2 * * *" \
+  "Pull the top bug from the issue tracker, attempt a fix, and open a draft PR." \
+  --name "Nightly bug fix" \
+  --deliver telegram
+```
+
+**2. GitHub Events (webhook)**
+> "Flag PRs that touch the /auth-provider module and post to #auth-changes."
+
+Hermes equivalent — works today:
+```bash
+hermes webhook subscribe auth-watch \
+  --events "pull_request" \
+  --prompt "PR #{pull_request.number}: {pull_request.title} by {pull_request.user.login}. Check if it touches the auth-provider module. If yes, summarize the changes." \
+  --deliver slack
+```
+
+**3. API Triggers**
+> "Read the alert payload, find the owning service, post a triage summary to #oncall."
+
+Hermes equivalent — works today:
+```bash
+hermes webhook subscribe alert-triage \
+  --prompt "Alert: {alert.name} — Severity: {alert.severity}. Find the owning service, investigate, and post a triage summary with proposed first steps." \
+  --deliver slack
+```
+
+Every use case in their blog post — backlog triage, docs drift, deploy verification, alert correlation, library porting, bespoke PR review — has a working Hermes implementation. No new features needed. It's been shipping since March 2026.
+
+---
+
+## What's Different
+
+| | Claude Code Routines | Hermes Agent |
+|---|---|---|
+| **Scheduled tasks** | ✅ Schedule-based | ✅ Any cron expression + human-readable intervals |
+| **GitHub triggers** | ✅ PR, issue, push events | ✅ Any GitHub event via webhook subscriptions |
+| **API triggers** | ✅ POST to unique endpoint | ✅ POST to webhook routes with HMAC auth |
+| **MCP connectors** | ✅ Native connectors | ✅ Full MCP client support |
+| **Script pre-processing** | ❌ | ✅ Python scripts run before agent, inject context |
+| **Skill chaining** | ❌ | ✅ Load multiple skills per automation |
+| **Daily limit** | 5-25 runs/day | **Unlimited** |
+| **Model choice** | Claude only | **Any model** — Claude, GPT, Gemini, DeepSeek, Qwen, local |
+| **Delivery targets** | GitHub comments | Telegram, Discord, Slack, SMS, email, GitHub comments, webhooks, local files |
+| **Infrastructure** | Anthropic's servers | **Your infrastructure** — VPS, home server, laptop |
+| **Data residency** | Anthropic's cloud | **Your machines** |
+| **Cost** | Pro/Max/Team/Enterprise subscription | Your API key, your rates |
+| **Open source** | No | **Yes** — MIT license |
+
+---
+
+## Things Hermes Does That Routines Can't
+
+### Script Injection
+
+Run a Python script *before* the agent. The script's stdout becomes context. The script handles mechanical work (fetching, diffing, computing); the agent handles reasoning.
+
+```bash
+hermes cron create "every 1h" \
+  "If CHANGE DETECTED, summarize what changed. If NO_CHANGE, respond with [SILENT]." \
+  --script ~/.hermes/scripts/watch-site.py \
+  --name "Pricing monitor" \
+  --deliver telegram
+```
+
+The `[SILENT]` pattern means you only get notified when something actually happens. No spam.
+
+### Multi-Skill Workflows
+
+Chain specialized skills together. Each skill teaches the agent a specific capability, and the prompt ties them together.
+
+```bash
+hermes cron create "0 8 * * *" \
+  "Search arXiv for papers on language model reasoning. Save the top 3 as Obsidian notes." \
+  --skills "arxiv,obsidian" \
+  --name "Paper digest"
+```
+
+### Deliver Anywhere
+
+One automation, any destination:
+
+```bash
+--deliver telegram                      # Telegram home channel
+--deliver discord                       # Discord home channel  
+--deliver slack                         # Slack channel
+--deliver sms:+15551234567              # Text message
+--deliver telegram:-1001234567890:42    # Specific Telegram forum topic
+--deliver local                         # Save to file, no notification
+```
+
+### Model-Agnostic
+
+Your nightly triage can run on Claude. Your deploy verification can run on GPT. Your cost-sensitive monitors can run on DeepSeek or a local model. Same automation system, any backend.
+
+---
+
+## The Limits Tell the Story
+
+Claude Code Routines: **5 routines per day** on Pro. **25 on Enterprise.** That's their ceiling.
+
+Hermes has no daily limit. Run 500 automations a day if you want. The only constraint is your API budget, and you choose which models to use for which tasks.
+
+A nightly backlog triage on Sonnet costs roughly $0.02-0.05. A monitoring check on DeepSeek costs fractions of a cent. You control the economics.
+
+---
+
+## Get Started
+
+Hermes Agent is open source and free. The automation infrastructure — cron scheduler, webhook platform, skill system, multi-platform delivery — is built in.
+
+```bash
+pip install hermes-agent
+hermes setup
+```
+
+Set up a scheduled task in 30 seconds:
+```bash
+hermes cron create "0 9 * * 1" \
+  "Generate a weekly AI news digest. Search the web for major announcements, trending repos, and notable papers. Keep it under 500 words with links." \
+  --name "Weekly digest" \
+  --deliver telegram
+```
+
+Set up a GitHub webhook in 60 seconds:
+```bash
+hermes gateway setup    # enable webhooks
+hermes webhook subscribe pr-review \
+  --events "pull_request" \
+  --prompt "Review PR #{pull_request.number}: {pull_request.title}" \
+  --skills "github-code-review" \
+  --deliver github_comment
+```
+
+Full automation templates gallery: [hermes-agent.nousresearch.com/docs/guides/automation-templates](https://hermes-agent.nousresearch.com/docs/guides/automation-templates)
+
+Documentation: [hermes-agent.nousresearch.com](https://hermes-agent.nousresearch.com)
+
+GitHub: [github.com/NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent)
+
+---
+
+*Hermes Agent is built by [Nous Research](https://nousresearch.com). Open source, model-agnostic, runs on your infrastructure.*
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.9.0"
-__release_date__ = "2026.4.13"
+__version__ = "0.10.0"
+__release_date__ = "2026.4.16"
@@ -70,6 +70,7 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
+DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -77,6 +78,10 @@ QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
 QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
 QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120

+# Google Gemini OAuth (google-gemini-cli provider, Cloud Code Assist backend)
+DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
+GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60  # refresh 60s before expiry
+

 # =============================================================================
 # Provider Registry
@@ -121,6 +126,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="oauth_external",
        inference_base_url=DEFAULT_QWEN_BASE_URL,
    ),
+    "google-gemini-cli": ProviderConfig(
+        id="google-gemini-cli",
+        name="Google Gemini (OAuth)",
+        auth_type="oauth_external",
+        inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
+    ),
    "copilot": ProviderConfig(
        id="copilot",
        name="GitHub Copilot",
@@ -224,7 +235,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
    ),
    "ai-gateway": ProviderConfig(
        id="ai-gateway",
-        name="AI Gateway",
+        name="Vercel AI Gateway",
        auth_type="api_key",
        inference_base_url="https://ai-gateway.vercel.sh/v1",
        api_key_env_vars=("AI_GATEWAY_API_KEY",),
@@ -274,6 +285,22 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "ollama-cloud": ProviderConfig(
+        id="ollama-cloud",
+        name="Ollama Cloud",
+        auth_type="api_key",
+        inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL,
+        api_key_env_vars=("OLLAMA_API_KEY",),
+        base_url_env_var="OLLAMA_BASE_URL",
+    ),
+    "bedrock": ProviderConfig(
+        id="bedrock",
+        name="AWS Bedrock",
+        auth_type="aws_sdk",
+        inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
+        api_key_env_vars=(),
+        base_url_env_var="BEDROCK_BASE_URL",
+    ),
 }


@@ -383,13 +410,16 @@ def _resolve_api_key_provider_secret(
 # Z.AI has separate billing for general vs coding plans, and global vs China
 # endpoints.  A key that works on one may return "Insufficient balance" on
 # another.  We probe at setup time and store the working endpoint.
+# Each entry lists candidate models to try in order — newer coding plan accounts
+# may only have access to recent models (glm-5.1, glm-5v-turbo) while older
+# ones still use glm-4.7.

 ZAI_ENDPOINTS = [
-    # (id, base_url, default_model, label)
-    ("global",        "https://api.z.ai/api/paas/v4",        "glm-5",   "Global"),
-    ("cn",            "https://open.bigmodel.cn/api/paas/v4", "glm-5",   "China"),
-    ("coding-global", "https://api.z.ai/api/coding/paas/v4",  "glm-4.7", "Global (Coding Plan)"),
-    ("coding-cn",     "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"),
+    # (id, base_url, probe_models, label)
+    ("global",        "https://api.z.ai/api/paas/v4",        ["glm-5"],   "Global"),
+    ("cn",            "https://open.bigmodel.cn/api/paas/v4", ["glm-5"],   "China"),
+    ("coding-global", "https://api.z.ai/api/coding/paas/v4",  ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"),
+    ("coding-cn",     "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"),
 ]


@@ -397,35 +427,37 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
    """Probe z.ai endpoints to find one that accepts this API key.

    Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the
-    first working endpoint, or None if all fail.
+    first working endpoint, or None if all fail.  For endpoints with multiple
+    candidate models, tries each in order and returns the first that succeeds.
    """
-    for ep_id, base_url, model, label in ZAI_ENDPOINTS:
-        try:
-            resp = httpx.post(
-                f"{base_url}/chat/completions",
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": model,
-                    "stream": False,
-                    "max_tokens": 1,
-                    "messages": [{"role": "user", "content": "ping"}],
-                },
-                timeout=timeout,
-            )
-            if resp.status_code == 200:
-                logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url)
-                return {
-                    "id": ep_id,
-                    "base_url": base_url,
-                    "model": model,
-                    "label": label,
-                }
-            logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code)
-        except Exception as exc:
-            logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc)
+    for ep_id, base_url, probe_models, label in ZAI_ENDPOINTS:
+        for model in probe_models:
+            try:
+                resp = httpx.post(
+                    f"{base_url}/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json",
+                    },
+                    json={
+                        "model": model,
+                        "stream": False,
+                        "max_tokens": 1,
+                        "messages": [{"role": "user", "content": "ping"}],
+                    },
+                    timeout=timeout,
+                )
+                if resp.status_code == 200:
+                    logger.debug("Z.AI endpoint probe: %s (%s) model=%s OK", ep_id, base_url, model)
+                    return {
+                        "id": ep_id,
+                        "base_url": base_url,
+                        "model": model,
+                        "label": label,
+                    }
+                logger.debug("Z.AI endpoint probe: %s model=%s returned %s", ep_id, model, resp.status_code)
+            except Exception as exc:
+                logger.debug("Z.AI endpoint probe: %s model=%s failed: %s", ep_id, model, exc)
    return None


@@ -906,6 +938,7 @@ def resolve_provider(
    _PROVIDER_ALIASES = {
        "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
        "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
+        "x-ai": "xai", "x.ai": "xai", "grok": "xai",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
        "arcee-ai": "arcee", "arceeai": "arcee",
@@ -916,14 +949,16 @@ def resolve_provider(
        "github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
        "aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
        "opencode": "opencode-zen", "zen": "opencode-zen",
-        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
+        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
+        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
        # Local server aliases — route through the generic custom provider
        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
-        "ollama": "custom", "vllm": "custom", "llamacpp": "custom",
+        "ollama": "custom", "ollama_cloud": "ollama-cloud",
+        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)
@@ -975,6 +1010,15 @@ def resolve_provider(
            if has_usable_secret(os.getenv(env_var, "")):
                return pid

+    # AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars).
+    # This runs after API-key providers so explicit keys always win.
+    try:
+        from agent.bedrock_adapter import has_aws_credentials
+        if has_aws_credentials():
+            return "bedrock"
+    except ImportError:
+        pass  # boto3 not installed — skip Bedrock auto-detection
+
    raise AuthError(
        "No inference provider configured. Run 'hermes model' to choose a "
        "provider and model, or set an API key (OPENROUTER_API_KEY, "
@@ -1217,6 +1261,83 @@ def get_qwen_auth_status() -> Dict[str, Any]:
        }


+# =============================================================================
+# Google Gemini OAuth (google-gemini-cli) — PKCE flow + Cloud Code Assist.
+#
+# Tokens live in ~/.hermes/auth/google_oauth.json (managed by agent.google_oauth).
+# The `base_url` here is the marker "cloudcode-pa://google" that run_agent.py
+# uses to construct a GeminiCloudCodeClient instead of the default OpenAI SDK.
+# Actual HTTP traffic goes to https://cloudcode-pa.googleapis.com/v1internal:*.
+# =============================================================================
+
+def resolve_gemini_oauth_runtime_credentials(
+    *,
+    force_refresh: bool = False,
+) -> Dict[str, Any]:
+    """Resolve runtime OAuth creds for google-gemini-cli."""
+    try:
+        from agent.google_oauth import (
+            GoogleOAuthError,
+            _credentials_path,
+            get_valid_access_token,
+            load_credentials,
+        )
+    except ImportError as exc:
+        raise AuthError(
+            f"agent.google_oauth is not importable: {exc}",
+            provider="google-gemini-cli",
+            code="google_oauth_module_missing",
+        ) from exc
+
+    try:
+        access_token = get_valid_access_token(force_refresh=force_refresh)
+    except GoogleOAuthError as exc:
+        raise AuthError(
+            str(exc),
+            provider="google-gemini-cli",
+            code=exc.code,
+        ) from exc
+
+    creds = load_credentials()
+    base_url = DEFAULT_GEMINI_CLOUDCODE_BASE_URL
+    return {
+        "provider": "google-gemini-cli",
+        "base_url": base_url,
+        "api_key": access_token,
+        "source": "google-oauth",
+        "expires_at_ms": (creds.expires_ms if creds else None),
+        "auth_file": str(_credentials_path()),
+        "email": (creds.email if creds else "") or "",
+        "project_id": (creds.project_id if creds else "") or "",
+    }
+
+
+def get_gemini_oauth_auth_status() -> Dict[str, Any]:
+    """Return a status dict for `hermes auth list` / `hermes status`."""
+    try:
+        from agent.google_oauth import _credentials_path, load_credentials
+    except ImportError:
+        return {"logged_in": False, "error": "agent.google_oauth unavailable"}
+    auth_path = _credentials_path()
+    creds = load_credentials()
+    if creds is None or not creds.access_token:
+        return {
+            "logged_in": False,
+            "auth_file": str(auth_path),
+            "error": "not logged in",
+        }
+    return {
+        "logged_in": True,
+        "auth_file": str(auth_path),
+        "source": "google-oauth",
+        "api_key": creds.access_token,
+        "expires_at_ms": creds.expires_ms,
+        "email": creds.email,
+        "project_id": creds.project_id,
+    }
+
+
+
 # =============================================================================
 # SSH / remote session detection
 # =============================================================================
@@ -2379,7 +2500,7 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()

-    if provider_id == "kimi-coding":
+    if provider_id in ("kimi-coding", "kimi-coding-cn"):
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
    elif env_url:
        base_url = env_url
@@ -2435,12 +2556,21 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
        return get_codex_auth_status()
    if target == "qwen-oauth":
        return get_qwen_auth_status()
+    if target == "google-gemini-cli":
+        return get_gemini_oauth_auth_status()
    if target == "copilot-acp":
        return get_external_process_provider_status(target)
    # API-key providers
    pconfig = PROVIDER_REGISTRY.get(target)
    if pconfig and pconfig.auth_type == "api_key":
        return get_api_key_provider_status(target)
+    # AWS SDK providers (Bedrock) — check via boto3 credential chain
+    if pconfig and pconfig.auth_type == "aws_sdk":
+        try:
+            from agent.bedrock_adapter import has_aws_credentials
+            return {"logged_in": has_aws_credentials(), "provider": target}
+        except ImportError:
+            return {"logged_in": False, "provider": target, "error": "boto3 not installed"}
    return {"logged_in": False}


@@ -2465,7 +2595,7 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()

-    if provider_id == "kimi-coding":
+    if provider_id in ("kimi-coding", "kimi-coding-cn"):
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
    elif provider_id == "zai":
        base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
@@ -3167,6 +3297,14 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:

        inference_base_url = auth_state["inference_base_url"]

+        # Snapshot the prior active_provider BEFORE _save_provider_state
+        # overwrites it to "nous".  If the user picks "Skip (keep current)"
+        # during model selection below, we restore this so the user's previous
+        # provider (e.g. openrouter) is preserved.
+        with _auth_store_lock():
+            _prior_store = _load_auth_store()
+            prior_active_provider = _prior_store.get("active_provider")
+
        with _auth_store_lock():
            auth_store = _load_auth_store()
            _save_provider_state(auth_store, "nous", auth_state)
@@ -3226,6 +3364,27 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            print(f"Login succeeded, but could not fetch available models. Reason: {message}")

        # Write provider + model atomically so config is never mismatched.
+        # If no model was selected (user picked "Skip (keep current)",
+        # model list fetch failed, or no curated models were available),
+        # preserve the user's previous provider — don't silently switch
+        # them to Nous with a mismatched model.  The Nous OAuth tokens
+        # stay saved for future use.
+        if not selected_model:
+            # Restore the prior active_provider that _save_provider_state
+            # overwrote to "nous".  config.yaml model.provider is left
+            # untouched, so the user's previous provider is fully preserved.
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                if prior_active_provider:
+                    auth_store["active_provider"] = prior_active_provider
+                else:
+                    auth_store.pop("active_provider", None)
+                _save_auth_store(auth_store)
+            print()
+            print("No provider change. Nous credentials saved for future use.")
+            print("  Run `hermes model` again to switch to Nous Portal.")
+            return
+
        config_path = _update_config_for_provider(
            "nous", inference_base_url, default_model=selected_model,
        )
@@ -4,6 +4,7 @@ from __future__ import annotations

 from getpass import getpass
 import math
+import sys
 import time
 from types import SimpleNamespace
 import uuid
@@ -32,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL


 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}


 def _get_custom_provider_names() -> list:
@@ -147,7 +148,7 @@ def auth_add_command(args) -> None:
        if provider.startswith(CUSTOM_POOL_PREFIX):
            requested_type = AUTH_TYPE_API_KEY
        else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY

    pool = load_pool(provider)

@@ -160,7 +161,10 @@ def auth_add_command(args) -> None:
        default_label = _api_key_default_label(len(pool.entries()) + 1)
        label = (getattr(args, "label", None) or "").strip()
        if not label:
-            label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
+            if sys.stdin.isatty():
+                label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
+            else:
+                label = default_label
        entry = PooledCredential(
            provider=provider,
            id=uuid.uuid4().hex[:6],
@@ -250,6 +254,27 @@ def auth_add_command(args) -> None:
        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
        return

+    if provider == "google-gemini-cli":
+        from agent.google_oauth import run_gemini_oauth_login_pure
+
+        creds = run_gemini_oauth_login_pure()
+        label = (getattr(args, "label", None) or "").strip() or (
+            creds.get("email") or _oauth_default_label(provider, len(pool.entries()) + 1)
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:google_pkce",
+            access_token=creds["access_token"],
+            refresh_token=creds.get("refresh_token"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
    if provider == "qwen-oauth":
        creds = auth_mod.resolve_qwen_runtime_credentials(refresh_if_expiring=False)
        label = (getattr(args, "label", None) or "").strip() or label_from_token(
@@ -368,6 +393,27 @@ def _interactive_auth() -> None:
    print("=" * 50)

    auth_list_command(SimpleNamespace(provider=None))
+
+    # Show AWS Bedrock credential status (not in the pool — uses boto3 chain)
+    try:
+        from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
+        if has_aws_credentials():
+            auth_source = resolve_aws_auth_env_var() or "unknown"
+            region = resolve_bedrock_region()
+            print(f"bedrock (AWS SDK credential chain):")
+            print(f"  Auth: {auth_source}")
+            print(f"  Region: {region}")
+            try:
+                import boto3
+                sts = boto3.client("sts", region_name=region)
+                identity = sts.get_caller_identity()
+                arn = identity.get("Arn", "unknown")
+                print(f"  Identity: {arn}")
+            except Exception:
+                print(f"  Identity: (could not resolve — boto3 STS call failed)")
+            print()
+    except ImportError:
+        pass  # boto3 or bedrock_adapter not available
    print()

    # Main menu
@@ -75,12 +75,12 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
        if not hasattr(cli, "_secret_deadline"):
            cli._secret_deadline = 0
        try:
-            value = getpass.getpass(f"{prompt} (hidden, Enter to skip): ")
+            value = getpass.getpass(f"{prompt} (hidden, ESC or empty Enter to skip): ")
        except (EOFError, KeyboardInterrupt):
            value = ""

        if not value:
-            cprint(f"\n{_DIM}  ⏭ Secret entry cancelled{_RST}")
+            cprint(f"\n{_DIM}  ⏭ Secret entry skipped{_RST}")
            return {
                "success": True,
                "reason": "cancelled",
@@ -133,7 +133,7 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
                cli._app.invalidate()

            if not value:
-                cprint(f"\n{_DIM}  ⏭ Secret entry cancelled{_RST}")
+                cprint(f"\n{_DIM}  ⏭ Secret entry skipped{_RST}")
                return {
                    "success": True,
                    "reason": "cancelled",
@@ -12,6 +12,9 @@ from __future__ import annotations

 import os
 import re
+import shutil
+import subprocess
+import time
 from collections.abc import Callable, Mapping
 from dataclasses import dataclass
 from typing import Any
@@ -99,6 +102,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"),
    CommandDef("provider", "Show available providers and current provider",
               "Configuration"),
+    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"),

    CommandDef("personality", "Set a predefined personality", "Configuration",
               args_hint="[name]"),
@@ -161,7 +165,7 @@ COMMAND_REGISTRY: list[CommandDef] = [

    # Exit
    CommandDef("quit", "Exit the CLI", "Exit",
-               cli_only=True, aliases=("exit", "q")),
+               cli_only=True, aliases=("exit",)),
 ]


@@ -447,7 +451,7 @@ def _collect_gateway_skill_entries(
            name = sanitize_name(cmd_name) if sanitize_name else cmd_name
            if not name:
                continue
-            desc = "Plugin command"
+            desc = plugin_cmds[cmd_name].get("description", "Plugin command")
            if len(desc) > desc_limit:
                desc = desc[:desc_limit - 3] + "..."
            plugin_pairs.append((name, desc))
@@ -579,6 +583,116 @@ def discord_skill_commands(
    )


+def discord_skill_commands_by_category(
+    reserved_names: set[str],
+) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]:
+    """Return skill entries organized by category for Discord ``/skill`` subcommand groups.
+
+    Skills whose directory is nested at least 2 levels under ``SKILLS_DIR``
+    (e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level
+    category.  Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as
+    *uncategorized* — the caller should register them as direct subcommands
+    of the ``/skill`` group.
+
+    The same filtering as :func:`discord_skill_commands` is applied: hub
+    skills excluded, per-platform disabled excluded, names clamped.
+
+    Returns:
+        ``(categories, uncategorized, hidden_count)``
+
+        - *categories*: ``{category_name: [(name, description, cmd_key), ...]}``
+        - *uncategorized*: ``[(name, description, cmd_key), ...]``
+        - *hidden_count*: skills dropped due to Discord group limits
+          (25 subcommand groups, 25 subcommands per group)
+    """
+    from pathlib import Path as _P
+
+    _platform_disabled: set[str] = set()
+    try:
+        from agent.skill_utils import get_disabled_skill_names
+        _platform_disabled = get_disabled_skill_names(platform="discord")
+    except Exception:
+        pass
+
+    # Collect raw skill data --------------------------------------------------
+    categories: dict[str, list[tuple[str, str, str]]] = {}
+    uncategorized: list[tuple[str, str, str]] = []
+    _names_used: set[str] = set(reserved_names)
+    hidden = 0
+
+    try:
+        from agent.skill_commands import get_skill_commands
+        from tools.skills_tool import SKILLS_DIR
+        _skills_dir = SKILLS_DIR.resolve()
+        _hub_dir = (SKILLS_DIR / ".hub").resolve()
+        skill_cmds = get_skill_commands()
+
+        for cmd_key in sorted(skill_cmds):
+            info = skill_cmds[cmd_key]
+            skill_path = info.get("skill_md_path", "")
+            if not skill_path:
+                continue
+            sp = _P(skill_path).resolve()
+            # Skip skills outside SKILLS_DIR or from the hub
+            if not str(sp).startswith(str(_skills_dir)):
+                continue
+            if str(sp).startswith(str(_hub_dir)):
+                continue
+
+            skill_name = info.get("name", "")
+            if skill_name in _platform_disabled:
+                continue
+
+            raw_name = cmd_key.lstrip("/")
+            # Clamp to 32 chars (Discord limit)
+            discord_name = raw_name[:32]
+            if discord_name in _names_used:
+                continue
+            _names_used.add(discord_name)
+
+            desc = info.get("description", "")
+            if len(desc) > 100:
+                desc = desc[:97] + "..."
+
+            # Determine category from the relative path within SKILLS_DIR.
+            # e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art")
+            try:
+                rel = sp.parent.relative_to(_skills_dir)
+            except ValueError:
+                continue
+            parts = rel.parts
+            if len(parts) >= 2:
+                cat = parts[0]
+                categories.setdefault(cat, []).append((discord_name, desc, cmd_key))
+            else:
+                uncategorized.append((discord_name, desc, cmd_key))
+    except Exception:
+        pass
+
+    # Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------
+    _MAX_GROUPS = 25
+    _MAX_PER_GROUP = 25
+
+    trimmed_categories: dict[str, list[tuple[str, str, str]]] = {}
+    group_count = 0
+    for cat in sorted(categories):
+        if group_count >= _MAX_GROUPS:
+            hidden += len(categories[cat])
+            continue
+        entries = categories[cat][:_MAX_PER_GROUP]
+        hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP)
+        trimmed_categories[cat] = entries
+        group_count += 1
+
+    # Uncategorized skills also count against the 25 top-level limit
+    remaining_slots = _MAX_GROUPS - group_count
+    if len(uncategorized) > remaining_slots:
+        hidden += len(uncategorized) - remaining_slots
+        uncategorized = uncategorized[:remaining_slots]
+
+    return trimmed_categories, uncategorized, hidden
+
+
 def slack_subcommand_map() -> dict[str, str]:
    """Return subcommand -> /command mapping for Slack /hermes handler.

@@ -610,6 +724,10 @@ class SlashCommandCompleter(Completer):
    ) -> None:
        self._skill_commands_provider = skill_commands_provider
        self._command_filter = command_filter
+        # Cached project file list for fuzzy @ completions
+        self._file_cache: list[str] = []
+        self._file_cache_time: float = 0.0
+        self._file_cache_cwd: str = ""

    def _command_allowed(self, slash_command: str) -> bool:
        if self._command_filter is None:
@@ -727,8 +845,7 @@ class SlashCommandCompleter(Completer):
            return None
        return word

-    @staticmethod
-    def _context_completions(word: str, limit: int = 30):
+    def _context_completions(self, word: str, limit: int = 30):
        """Yield Claude Code-style @ context completions.

        Bare ``@`` or ``@partial`` shows static references and matching
@@ -794,46 +911,138 @@ class SlashCommandCompleter(Completer):
                    count += 1
                return

-        # Bare @ or @partial — show matching files/folders from cwd
+        # Bare @ or @partial — fuzzy project-wide file search
        query = word[1:]  # strip the @
-        if not query:
-            search_dir, match_prefix = ".", ""
-        else:
-            expanded = os.path.expanduser(query)
-            if expanded.endswith("/"):
-                search_dir, match_prefix = expanded, ""
-            else:
-                search_dir = os.path.dirname(expanded) or "."
-                match_prefix = os.path.basename(expanded)
+        yield from self._fuzzy_file_completions(word, query, limit)

-        try:
-            entries = os.listdir(search_dir)
-        except OSError:
+    def _get_project_files(self) -> list[str]:
+        """Return cached list of project files (refreshed every 5s)."""
+        cwd = os.getcwd()
+        now = time.monotonic()
+        if (
+            self._file_cache
+            and self._file_cache_cwd == cwd
+            and now - self._file_cache_time < 5.0
+        ):
+            return self._file_cache
+
+        files: list[str] = []
+        # Try rg first (fast, respects .gitignore), then fd, then find.
+        for cmd in [
+            ["rg", "--files", "--sortr=modified", cwd],
+            ["rg", "--files", cwd],
+            ["fd", "--type", "f", "--base-directory", cwd],
+        ]:
+            tool = cmd[0]
+            if not shutil.which(tool):
+                continue
+            try:
+                proc = subprocess.run(
+                    cmd, capture_output=True, text=True, timeout=2,
+                    cwd=cwd,
+                )
+                if proc.returncode == 0 and proc.stdout.strip():
+                    raw = proc.stdout.strip().split("\n")
+                    # Store relative paths
+                    for p in raw[:5000]:
+                        rel = os.path.relpath(p, cwd) if os.path.isabs(p) else p
+                        files.append(rel)
+                    break
+            except (subprocess.TimeoutExpired, OSError):
+                continue
+
+        self._file_cache = files
+        self._file_cache_time = now
+        self._file_cache_cwd = cwd
+        return files
+
+    @staticmethod
+    def _score_path(filepath: str, query: str) -> int:
+        """Score a file path against a fuzzy query. Higher = better match."""
+        if not query:
+            return 1  # show everything when query is empty
+
+        filename = os.path.basename(filepath)
+        lower_file = filename.lower()
+        lower_path = filepath.lower()
+        lower_q = query.lower()
+
+        # Exact filename match
+        if lower_file == lower_q:
+            return 100
+        # Filename starts with query
+        if lower_file.startswith(lower_q):
+            return 80
+        # Filename contains query as substring
+        if lower_q in lower_file:
+            return 60
+        # Full path contains query
+        if lower_q in lower_path:
+            return 40
+        # Initials / abbreviation match: e.g. "fo" matches "file_operations"
+        # Check if query chars appear in order in filename
+        qi = 0
+        for c in lower_file:
+            if qi < len(lower_q) and c == lower_q[qi]:
+                qi += 1
+        if qi == len(lower_q):
+            # Bonus if matches land on word boundaries (after _, -, /, .)
+            boundary_hits = 0
+            qi = 0
+            prev = "_"  # treat start as boundary
+            for c in lower_file:
+                if qi < len(lower_q) and c == lower_q[qi]:
+                    if prev in "_-./":
+                        boundary_hits += 1
+                    qi += 1
+                prev = c
+            if boundary_hits >= len(lower_q) * 0.5:
+                return 35
+            return 25
+        return 0
+
+    def _fuzzy_file_completions(self, word: str, query: str, limit: int = 20):
+        """Yield fuzzy file completions for bare @query."""
+        files = self._get_project_files()
+
+        if not query:
+            # No query — show recently modified files (already sorted by mtime)
+            for fp in files[:limit]:
+                is_dir = fp.endswith("/")
+                filename = os.path.basename(fp)
+                kind = "folder" if is_dir else "file"
+                meta = "dir" if is_dir else _file_size_label(
+                    os.path.join(os.getcwd(), fp)
+                )
+                yield Completion(
+                    f"@{kind}:{fp}",
+                    start_position=-len(word),
+                    display=filename,
+                    display_meta=meta,
+                )
            return

-        count = 0
-        prefix_lower = match_prefix.lower()
-        for entry in sorted(entries):
-            if match_prefix and not entry.lower().startswith(prefix_lower):
-                continue
-            if entry.startswith("."):
-                continue  # skip hidden files in bare @ mode
-            if count >= limit:
-                break
-            full_path = os.path.join(search_dir, entry)
-            is_dir = os.path.isdir(full_path)
-            display_path = os.path.relpath(full_path)
-            suffix = "/" if is_dir else ""
+        # Score and rank
+        scored = []
+        for fp in files:
+            s = self._score_path(fp, query)
+            if s > 0:
+                scored.append((s, fp))
+        scored.sort(key=lambda x: (-x[0], x[1]))
+
+        for _, fp in scored[:limit]:
+            is_dir = fp.endswith("/")
+            filename = os.path.basename(fp)
            kind = "folder" if is_dir else "file"
-            meta = "dir" if is_dir else _file_size_label(full_path)
-            completion = f"@{kind}:{display_path}{suffix}"
-            yield Completion(
-                completion,
-                start_position=-len(word),
-                display=entry + suffix,
-                display_meta=meta,
+            meta = "dir" if is_dir else _file_size_label(
+                os.path.join(os.getcwd(), fp)
+            )
+            yield Completion(
+                f"@{kind}:{fp}",
+                start_position=-len(word),
+                display=filename,
+                display_meta=f"{fp}  {meta}" if meta else fp,
            )
-            count += 1

    def _model_completions(self, sub_text: str, sub_lower: str):
        """Yield completions for /model from config aliases + built-in aliases."""
@@ -931,6 +1140,22 @@ class SlashCommandCompleter(Completer):
                    display_meta=f"⚡ {short_desc}",
                )

+        # Plugin-registered slash commands
+        try:
+            from hermes_cli.plugins import get_plugin_commands
+            for cmd_name, cmd_info in get_plugin_commands().items():
+                if cmd_name.startswith(word):
+                    desc = str(cmd_info.get("description", "Plugin command"))
+                    short_desc = desc[:50] + ("..." if len(desc) > 50 else "")
+                    yield Completion(
+                        self._completion_text(cmd_name, word),
+                        start_position=-len(word),
+                        display=f"/{cmd_name}",
+                        display_meta=f"🔌 {short_desc}",
+                    )
+        except Exception:
+            pass
+

 # ---------------------------------------------------------------------------
 # Inline auto-suggest (ghost text) for slash commands
@@ -0,0 +1,315 @@
+"""Shell completion script generation for hermes CLI.
+
+Walks the live argparse parser tree to generate accurate, always-up-to-date
+completion scripts — no hardcoded subcommand lists, no extra dependencies.
+
+Supports bash, zsh, and fish.
+"""
+
+from __future__ import annotations
+
+import argparse
+from typing import Any
+
+
+def _walk(parser: argparse.ArgumentParser) -> dict[str, Any]:
+    """Recursively extract subcommands and flags from a parser.
+
+    Uses _SubParsersAction._choices_actions to get canonical names (no aliases)
+    along with their help text.
+    """
+    flags: list[str] = []
+    subcommands: dict[str, Any] = {}
+
+    for action in parser._actions:
+        if isinstance(action, argparse._SubParsersAction):
+            # _choices_actions has one entry per canonical name; aliases are
+            # omitted, which keeps completion lists clean.
+            seen: set[str] = set()
+            for pseudo in action._choices_actions:
+                name = pseudo.dest
+                if name in seen:
+                    continue
+                seen.add(name)
+                subparser = action.choices.get(name)
+                if subparser is None:
+                    continue
+                info = _walk(subparser)
+                info["help"] = _clean(pseudo.help or "")
+                subcommands[name] = info
+        elif action.option_strings:
+            flags.extend(o for o in action.option_strings if o.startswith("-"))
+
+    return {"flags": flags, "subcommands": subcommands}
+
+
+def _clean(text: str, maxlen: int = 60) -> str:
+    """Strip shell-unsafe characters and truncate."""
+    return text.replace("'", "").replace('"', "").replace("\\", "")[:maxlen]
+
+
+# ---------------------------------------------------------------------------
+# Bash
+# ---------------------------------------------------------------------------
+
+def generate_bash(parser: argparse.ArgumentParser) -> str:
+    tree = _walk(parser)
+    top_cmds = " ".join(sorted(tree["subcommands"]))
+
+    cases: list[str] = []
+    for cmd in sorted(tree["subcommands"]):
+        info = tree["subcommands"][cmd]
+        if cmd == "profile" and info["subcommands"]:
+            # Profile subcommand: complete actions, then profile names for
+            # actions that accept a profile argument.
+            subcmds = " ".join(sorted(info["subcommands"]))
+            profile_actions = "use delete show alias rename export"
+            cases.append(
+                f"        profile)\n"
+                f"            case \"$prev\" in\n"
+                f"                profile)\n"
+                f"                    COMPREPLY=($(compgen -W \"{subcmds}\" -- \"$cur\"))\n"
+                f"                    return\n"
+                f"                    ;;\n"
+                f"                {profile_actions.replace(' ', '|')})\n"
+                f"                    COMPREPLY=($(compgen -W \"$(_hermes_profiles)\" -- \"$cur\"))\n"
+                f"                    return\n"
+                f"                    ;;\n"
+                f"            esac\n"
+                f"            ;;"
+            )
+        elif info["subcommands"]:
+            subcmds = " ".join(sorted(info["subcommands"]))
+            cases.append(
+                f"        {cmd})\n"
+                f"            COMPREPLY=($(compgen -W \"{subcmds}\" -- \"$cur\"))\n"
+                f"            return\n"
+                f"            ;;"
+            )
+        elif info["flags"]:
+            flags = " ".join(info["flags"])
+            cases.append(
+                f"        {cmd})\n"
+                f"            COMPREPLY=($(compgen -W \"{flags}\" -- \"$cur\"))\n"
+                f"            return\n"
+                f"            ;;"
+            )
+
+    cases_str = "\n".join(cases)
+
+    return f"""# Hermes Agent bash completion
+# Add to ~/.bashrc:
+#   eval "$(hermes completion bash)"
+
+_hermes_profiles() {{
+    local profiles_dir="$HOME/.hermes/profiles"
+    local profiles="default"
+    if [ -d "$profiles_dir" ]; then
+        profiles="$profiles $(ls "$profiles_dir" 2>/dev/null)"
+    fi
+    echo "$profiles"
+}}
+
+_hermes_completion() {{
+    local cur prev
+    COMPREPLY=()
+    cur="${{COMP_WORDS[COMP_CWORD]}}"
+    prev="${{COMP_WORDS[COMP_CWORD-1]}}"
+
+    # Complete profile names after -p / --profile
+    if [[ "$prev" == "-p" || "$prev" == "--profile" ]]; then
+        COMPREPLY=($(compgen -W "$(_hermes_profiles)" -- "$cur"))
+        return
+    fi
+
+    if [[ $COMP_CWORD -ge 2 ]]; then
+        case "${{COMP_WORDS[1]}}" in
+{cases_str}
+        esac
+    fi
+
+    if [[ $COMP_CWORD -eq 1 ]]; then
+        COMPREPLY=($(compgen -W "{top_cmds}" -- "$cur"))
+    fi
+}}
+
+complete -F _hermes_completion hermes
+"""
+
+
+# ---------------------------------------------------------------------------
+# Zsh
+# ---------------------------------------------------------------------------
+
+def generate_zsh(parser: argparse.ArgumentParser) -> str:
+    tree = _walk(parser)
+
+    top_cmds_lines: list[str] = []
+    for cmd in sorted(tree["subcommands"]):
+        help_text = _clean(tree["subcommands"][cmd].get("help", ""))
+        top_cmds_lines.append(f"                '{cmd}:{help_text}'")
+    top_cmds_str = "\n".join(top_cmds_lines)
+
+    sub_cases: list[str] = []
+    for cmd in sorted(tree["subcommands"]):
+        info = tree["subcommands"][cmd]
+        if not info["subcommands"]:
+            continue
+        if cmd == "profile":
+            # Profile subcommand: complete actions, then profile names for
+            # actions that accept a profile argument.
+            sub_lines: list[str] = []
+            for sc in sorted(info["subcommands"]):
+                sh = _clean(info["subcommands"][sc].get("help", ""))
+                sub_lines.append(f"                        '{sc}:{sh}'")
+            sub_str = "\n".join(sub_lines)
+            sub_cases.append(
+                f"                profile)\n"
+                f"                    case ${{line[2]}} in\n"
+                f"                        use|delete|show|alias|rename|export)\n"
+                f"                            _hermes_profiles\n"
+                f"                            ;;\n"
+                f"                        *)\n"
+                f"                            local -a profile_cmds\n"
+                f"                            profile_cmds=(\n"
+                f"{sub_str}\n"
+                f"                            )\n"
+                f"                            _describe 'profile command' profile_cmds\n"
+                f"                            ;;\n"
+                f"                    esac\n"
+                f"                    ;;"
+            )
+        else:
+            sub_lines = []
+            for sc in sorted(info["subcommands"]):
+                sh = _clean(info["subcommands"][sc].get("help", ""))
+                sub_lines.append(f"                    '{sc}:{sh}'")
+            sub_str = "\n".join(sub_lines)
+            safe = cmd.replace("-", "_")
+            sub_cases.append(
+                f"                {cmd})\n"
+                f"                    local -a {safe}_cmds\n"
+                f"                    {safe}_cmds=(\n"
+                f"{sub_str}\n"
+                f"                    )\n"
+                f"                    _describe '{cmd} command' {safe}_cmds\n"
+                f"                    ;;"
+            )
+    sub_cases_str = "\n".join(sub_cases)
+
+    return f"""#compdef hermes
+# Hermes Agent zsh completion
+# Add to ~/.zshrc:
+#   eval "$(hermes completion zsh)"
+
+_hermes_profiles() {{
+    local -a profiles
+    profiles=(default)
+    if [[ -d "$HOME/.hermes/profiles" ]]; then
+        profiles+=("${{(@f)$(ls $HOME/.hermes/profiles 2>/dev/null)}}")
+    fi
+    _describe 'profile' profiles
+}}
+
+_hermes() {{
+    local context state line
+    typeset -A opt_args
+
+    _arguments -C \\
+        '(-h --help){{-h,--help}}[Show help and exit]' \\
+        '(-V --version){{-V,--version}}[Show version and exit]' \\
+        '(-p --profile){{-p,--profile}}[Profile name]:profile:_hermes_profiles' \\
+        '1:command:->commands' \\
+        '*::arg:->args'
+
+    case $state in
+        commands)
+            local -a subcmds
+            subcmds=(
+{top_cmds_str}
+            )
+            _describe 'hermes command' subcmds
+            ;;
+        args)
+            case ${{line[1]}} in
+{sub_cases_str}
+            esac
+            ;;
+    esac
+}}
+
+_hermes "$@"
+"""
+
+
+# ---------------------------------------------------------------------------
+# Fish
+# ---------------------------------------------------------------------------
+
+def generate_fish(parser: argparse.ArgumentParser) -> str:
+    tree = _walk(parser)
+    top_cmds = sorted(tree["subcommands"])
+    top_cmds_str = " ".join(top_cmds)
+
+    lines: list[str] = [
+        "# Hermes Agent fish completion",
+        "# Add to your config:",
+        "#   hermes completion fish | source",
+        "",
+        "# Helper: list available profiles",
+        "function __hermes_profiles",
+        "    echo default",
+        "    if test -d $HOME/.hermes/profiles",
+        "        ls $HOME/.hermes/profiles 2>/dev/null",
+        "    end",
+        "end",
+        "",
+        "# Disable file completion by default",
+        "complete -c hermes -f",
+        "",
+        "# Complete profile names after -p / --profile",
+        "complete -c hermes -f -s p -l profile"
+        " -d 'Profile name' -xa '(__hermes_profiles)'",
+        "",
+        "# Top-level subcommands",
+    ]
+
+    for cmd in top_cmds:
+        info = tree["subcommands"][cmd]
+        help_text = _clean(info.get("help", ""))
+        lines.append(
+            f"complete -c hermes -f "
+            f"-n 'not __fish_seen_subcommand_from {top_cmds_str}' "
+            f"-a {cmd} -d '{help_text}'"
+        )
+
+    lines.append("")
+    lines.append("# Subcommand completions")
+
+    profile_name_actions = {"use", "delete", "show", "alias", "rename", "export"}
+
+    for cmd in top_cmds:
+        info = tree["subcommands"][cmd]
+        if not info["subcommands"]:
+            continue
+        lines.append(f"# {cmd}")
+        for sc in sorted(info["subcommands"]):
+            sinfo = info["subcommands"][sc]
+            sh = _clean(sinfo.get("help", ""))
+            lines.append(
+                f"complete -c hermes -f "
+                f"-n '__fish_seen_subcommand_from {cmd}' "
+                f"-a {sc} -d '{sh}'"
+            )
+        # For profile subcommand, complete profile names for relevant actions
+        if cmd == "profile":
+            for action in sorted(profile_name_actions):
+                lines.append(
+                    f"complete -c hermes -f "
+                    f"-n '__fish_seen_subcommand_from {action}; "
+                    f"and __fish_seen_subcommand_from profile' "
+                    f"-a '(__hermes_profiles)' -d 'Profile name'"
+                )
+
+    lines.append("")
+    return "\n".join(lines)
@@ -23,7 +23,6 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

-from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled

 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
@@ -45,6 +44,9 @@ _EXTRA_ENV_KEYS = frozenset({
    "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
    "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
    "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
+    "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",
+    "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
+    "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
@@ -238,13 +240,41 @@ def _secure_dir(path):
        pass


+def _is_container() -> bool:
+    """Detect if we're running inside a Docker/Podman/LXC container.
+
+    When Hermes runs in a container with volume-mounted config files, forcing
+    0o600 permissions breaks multi-process setups where the gateway and
+    dashboard run as different UIDs or the volume mount requires broader
+    permissions.
+    """
+    # Explicit opt-out
+    if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"):
+        return True
+    # Docker / Podman marker file
+    if os.path.exists("/.dockerenv"):
+        return True
+    # LXC / cgroup-based detection
+    try:
+        with open("/proc/1/cgroup", "r") as f:
+            cgroup_content = f.read()
+        if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
+            return True
+    except (OSError, IOError):
+        pass
+    return False
+
+
 def _secure_file(path):
    """Set file to owner-only read/write (0600). No-op on Windows.

    Skipped in managed mode — the NixOS activation script sets
    group-readable permissions (0640) on config files.
+
+    Skipped in containers — Docker/Podman volume mounts often need broader
+    permissions.  Set HERMES_SKIP_CHMOD=1 to force-skip on other systems.
    """
-    if is_managed():
+    if is_managed() or _is_container():
        return
    try:
        if os.path.exists(str(path)):
@@ -389,8 +419,7 @@ DEFAULT_CONFIG = {
        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
        "camofox": {
            # When true, Hermes sends a stable profile-scoped userId to Camofox
-            # so the server can map it to a persistent browser profile directory.
-            # Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
+            # so the server maps it to a persistent Firefox profile automatically.
            # When false (default), each session gets a random userId (ephemeral).
            "managed_persistence": False,
        },
@@ -416,6 +445,27 @@ DEFAULT_CONFIG = {
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed

    },
+
+    # AWS Bedrock provider configuration.
+    # Only used when model.provider is "bedrock".
+    "bedrock": {
+        "region": "",  # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1)
+        "discovery": {
+            "enabled": True,           # Auto-discover models via ListFoundationModels
+            "provider_filter": [],     # Only show models from these providers (e.g. ["anthropic", "amazon"])
+            "refresh_interval": 3600,  # Cache discovery results for this many seconds
+        },
+        "guardrail": {
+            # Amazon Bedrock Guardrails — content filtering and safety policies.
+            # Create a guardrail in the Bedrock console, then set the ID and version here.
+            # See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html
+            "guardrail_identifier": "",  # e.g. "abc123def456"
+            "guardrail_version": "",     # e.g. "1" or "DRAFT"
+            "stream_processing_mode": "async",  # "sync" or "async"
+            "trace": "disabled",         # "enabled", "disabled", or "enabled_full"
+        },
+    },
+
    "smart_model_routing": {
        "enabled": False,
        "max_simple_chars": 160,
@@ -507,6 +557,11 @@ DEFAULT_CONFIG = {
        "platforms": {},  # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
    },

+    # Web dashboard settings
+    "dashboard": {
+        "theme": "default",  # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
+    },
+
    # Privacy settings
    "privacy": {
        "redact_pii": False,  # When True, hash user IDs and strip phone numbers from LLM context
@@ -514,7 +569,7 @@ DEFAULT_CONFIG = {
    
    # Text-to-speech configuration
    "tts": {
-        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
+        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
            "voice": "en-US-AriaNeural",
            # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@@ -528,6 +583,12 @@ DEFAULT_CONFIG = {
            "voice": "alloy",
            # Voices: alloy, echo, fable, onyx, nova, shimmer
        },
+        "xai": {
+            "voice_id": "eve",
+            "language": "en",
+            "sample_rate": 24000,
+            "bit_rate": 128000,
+        },
        "mistral": {
            "model": "voxtral-mini-tts-2603",
            "voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8",  # Paul - Neutral
@@ -635,6 +696,7 @@ DEFAULT_CONFIG = {
        "allowed_channels": "",        # If set, bot ONLY responds in these channel IDs (whitelist)
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
+        "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
    },

    # WhatsApp platform settings (gateway mode)
@@ -645,6 +707,21 @@ DEFAULT_CONFIG = {
        # Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n"
    },

+    # Telegram platform settings (gateway mode)
+    "telegram": {
+        "channel_prompts": {},         # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
+    },
+
+    # Slack platform settings (gateway mode)
+    "slack": {
+        "channel_prompts": {},         # Per-channel ephemeral system prompts
+    },
+
+    # Mattermost platform settings (gateway mode)
+    "mattermost": {
+        "channel_prompts": {},         # Per-channel ephemeral system prompts
+    },
+
    # Approval mode for dangerous commands:
    #   manual — always prompt the user (default)
    #   smart  — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
@@ -700,7 +777,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 17,
+    "_config_version": 18,
 }

 # =============================================================================
@@ -768,6 +845,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "XAI_API_KEY": {
+        "description": "xAI API key",
+        "prompt": "xAI API key",
+        "url": "https://console.x.ai/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "XAI_BASE_URL": {
+        "description": "xAI base URL override",
+        "prompt": "xAI base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "GLM_API_KEY": {
        "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
        "prompt": "Z.AI / GLM API key",
@@ -909,6 +1002,30 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "HERMES_GEMINI_CLIENT_ID": {
+        "description": "Google OAuth client ID for google-gemini-cli (optional; defaults to Google's public gemini-cli client)",
+        "prompt": "Google OAuth client ID (optional — leave empty to use the public default)",
+        "url": "https://console.cloud.google.com/apis/credentials",
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
+    "HERMES_GEMINI_CLIENT_SECRET": {
+        "description": "Google OAuth client secret for google-gemini-cli (optional)",
+        "prompt": "Google OAuth client secret (optional)",
+        "url": "https://console.cloud.google.com/apis/credentials",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "HERMES_GEMINI_PROJECT_ID": {
+        "description": "GCP project ID for paid Gemini tiers (free tier auto-provisions)",
+        "prompt": "GCP project ID for Gemini OAuth (leave empty for free tier)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "OPENCODE_ZEN_API_KEY": {
        "description": "OpenCode Zen API key (pay-as-you-go access to curated models)",
        "prompt": "OpenCode Zen API key",
@@ -956,6 +1073,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "OLLAMA_API_KEY": {
+        "description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)",
+        "prompt": "Ollama Cloud API key",
+        "url": "https://ollama.com/settings",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "OLLAMA_BASE_URL": {
+        "description": "Ollama Cloud base URL override (default: https://ollama.com/v1)",
+        "prompt": "Ollama base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "XIAOMI_API_KEY": {
        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
        "prompt": "Xiaomi MiMo API Key",
@@ -971,6 +1104,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "AWS_REGION": {
+        "description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
+        "prompt": "AWS Region",
+        "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html",
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
+    "AWS_PROFILE": {
+        "description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)",
+        "prompt": "AWS Profile",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },

    # ── Tool API keys ──
    "EXA_API_KEY": {
@@ -1168,6 +1317,12 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "messaging",
    },
+    "TELEGRAM_PROXY": {
+        "description": "Proxy URL for Telegram connections (overrides HTTPS_PROXY). Supports http://, https://, socks5://",
+        "prompt": "Telegram proxy URL (optional)",
+        "password": False,
+        "category": "messaging",
+    },
    "DISCORD_BOT_TOKEN": {
        "description": "Discord bot token from Developer Portal",
        "prompt": "Discord bot token",
@@ -1331,6 +1486,53 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "messaging",
    },
+    "BLUEBUBBLES_ALLOW_ALL_USERS": {
+        "description": "Allow all BlueBubbles users without allowlist",
+        "prompt": "Allow All BlueBubbles Users",
+        "category": "messaging",
+    },
+    "QQ_APP_ID": {
+        "description": "QQ Bot App ID from QQ Open Platform (q.qq.com)",
+        "prompt": "QQ App ID",
+        "url": "https://q.qq.com",
+        "category": "messaging",
+    },
+    "QQ_CLIENT_SECRET": {
+        "description": "QQ Bot Client Secret from QQ Open Platform",
+        "prompt": "QQ Client Secret",
+        "password": True,
+        "category": "messaging",
+    },
+    "QQ_ALLOWED_USERS": {
+        "description": "Comma-separated QQ user IDs allowed to use the bot",
+        "prompt": "QQ Allowed Users",
+        "category": "messaging",
+    },
+    "QQ_GROUP_ALLOWED_USERS": {
+        "description": "Comma-separated QQ group IDs allowed to interact with the bot",
+        "prompt": "QQ Group Allowed Users",
+        "category": "messaging",
+    },
+    "QQ_ALLOW_ALL_USERS": {
+        "description": "Allow all QQ users without an allowlist (true/false)",
+        "prompt": "Allow All QQ Users",
+        "category": "messaging",
+    },
+    "QQ_HOME_CHANNEL": {
+        "description": "Default QQ channel/group for cron delivery and notifications",
+        "prompt": "QQ Home Channel",
+        "category": "messaging",
+    },
+    "QQ_HOME_CHANNEL_NAME": {
+        "description": "Display name for the QQ home channel",
+        "prompt": "QQ Home Channel Name",
+        "category": "messaging",
+    },
+    "QQ_SANDBOX": {
+        "description": "Enable QQ sandbox mode for development testing (true/false)",
+        "prompt": "QQ Sandbox Mode",
+        "category": "messaging",
+    },
    "GATEWAY_ALLOW_ALL_USERS": {
        "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
        "prompt": "Allow all users (true/false)",
@@ -1379,6 +1581,22 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
+    "GATEWAY_PROXY_URL": {
+        "description": "URL of a remote Hermes API server to forward messages to (proxy mode). When set, the gateway handles platform I/O only — all agent work is delegated to the remote server. Use for Docker E2EE containers that relay to a host agent. Also configurable via gateway.proxy_url in config.yaml.",
+        "prompt": "Remote Hermes API server URL (e.g. http://192.168.1.100:8642)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
+    "GATEWAY_PROXY_KEY": {
+        "description": "Bearer token for authenticating with the remote Hermes API server (proxy mode). Must match the API_SERVER_KEY on the remote host.",
+        "prompt": "Remote API server auth key",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+        "advanced": True,
+    },
    "WEBHOOK_ENABLED": {
        "description": "Enable the webhook platform adapter for receiving events from GitHub, GitLab, etc.",
        "prompt": "Enable webhooks (true/false)",
@@ -1402,13 +1620,8 @@ OPTIONAL_ENV_VARS = {
    },

    # ── Agent settings ──
-    "MESSAGING_CWD": {
-        "description": "Working directory for terminal commands via messaging",
-        "prompt": "Messaging working directory (default: home)",
-        "url": None,
-        "password": False,
-        "category": "setting",
-    },
+    # NOTE: MESSAGING_CWD was removed here — use terminal.cwd in config.yaml
+    # instead.  The gateway reads TERMINAL_CWD (bridged from terminal.cwd).
    "SUDO_PASSWORD": {
        "description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting",
        "prompt": "Sudo password",
@@ -1456,14 +1669,8 @@ OPTIONAL_ENV_VARS = {
    },
 }

-if not _managed_nous_tools_enabled():
-    for _hidden_var in (
-        "FIRECRAWL_GATEWAY_URL",
-        "TOOL_GATEWAY_DOMAIN",
-        "TOOL_GATEWAY_SCHEME",
-        "TOOL_GATEWAY_USER_TOKEN",
-    ):
-        OPTIONAL_ENV_VARS.pop(_hidden_var, None)
+# Tool Gateway env vars are always visible — they're useful for
+# self-hosted / custom gateway setups regardless of subscription state.


 def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
@@ -1887,6 +2094,52 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
    sys.stderr.write("\n".join(lines) + "\n\n")


+def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> None:
+    """Warn if MESSAGING_CWD or TERMINAL_CWD is set in .env instead of config.yaml.
+
+    These env vars are deprecated — the canonical setting is terminal.cwd
+    in config.yaml.  Prints a migration hint to stderr.
+    """
+    import os, sys
+    messaging_cwd = os.environ.get("MESSAGING_CWD")
+    terminal_cwd_env = os.environ.get("TERMINAL_CWD")
+
+    if config is None:
+        try:
+            config = load_config()
+        except Exception:
+            return
+
+    terminal_cfg = config.get("terminal", {})
+    config_cwd = terminal_cfg.get("cwd", ".") if isinstance(terminal_cfg, dict) else "."
+    # Only warn if config.yaml doesn't have an explicit path
+    config_has_explicit_cwd = config_cwd not in (".", "auto", "cwd", "")
+
+    lines: list[str] = []
+    if messaging_cwd:
+        lines.append(
+            f"  \033[33m⚠\033[0m MESSAGING_CWD={messaging_cwd} found in .env — "
+            f"this is deprecated."
+        )
+    if terminal_cwd_env and not config_has_explicit_cwd:
+        # TERMINAL_CWD in env but not from config bridge — likely from .env
+        lines.append(
+            f"  \033[33m⚠\033[0m TERMINAL_CWD={terminal_cwd_env} found in .env — "
+            f"this is deprecated."
+        )
+    if lines:
+        hint_path = os.environ.get("HERMES_HOME", "~/.hermes")
+        lines.insert(0, "\033[33m⚠ Deprecated .env settings detected:\033[0m")
+        lines.append(
+            f"  \033[2mMove to config.yaml instead:  "
+            f"terminal:\\n    cwd: /your/project/path\033[0m"
+        )
+        lines.append(
+            f"  \033[2mThen remove the old entries from {hint_path}/.env\033[0m"
+        )
+        sys.stderr.write("\n".join(lines) + "\n\n")
+
+
 def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]:
    """
    Migrate config to latest version, prompting for new required fields.
@@ -2700,6 +2953,47 @@ def sanitize_env_file() -> int:
    return fixes


+def _check_non_ascii_credential(key: str, value: str) -> str:
+    """Warn and strip non-ASCII characters from credential values.
+
+    API keys and tokens must be pure ASCII — they are sent as HTTP header
+    values which httpx/httpcore encode as ASCII.  Non-ASCII characters
+    (commonly introduced by copy-pasting from rich-text editors or PDFs
+    that substitute lookalike Unicode glyphs for ASCII letters) cause
+    ``UnicodeEncodeError: 'ascii' codec can't encode character`` at
+    request time.
+
+    Returns the sanitized (ASCII-only) value.  Prints a warning if any
+    non-ASCII characters were found and removed.
+    """
+    try:
+        value.encode("ascii")
+        return value  # all ASCII — nothing to do
+    except UnicodeEncodeError:
+        pass
+
+    # Build a readable list of the offending characters
+    bad_chars: list[str] = []
+    for i, ch in enumerate(value):
+        if ord(ch) > 127:
+            bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
+    sanitized = value.encode("ascii", errors="ignore").decode("ascii")
+
+    import sys
+    print(
+        f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
+        f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
+        f"  or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
+        f"\n"
+        + "\n".join(f"  {line}" for line in bad_chars[:5])
+        + ("\n  ... and more" if len(bad_chars) > 5 else "")
+        + f"\n\n  The non-ASCII characters have been stripped automatically.\n"
+        f"  If authentication fails, re-copy the key from the provider's dashboard.\n",
+        file=sys.stderr,
+    )
+    return sanitized
+
+
 def save_env_value(key: str, value: str):
    """Save or update a value in ~/.hermes/.env."""
    if is_managed():
@@ -2708,6 +3002,8 @@ def save_env_value(key: str, value: str):
    if not _ENV_VAR_NAME_RE.match(key):
        raise ValueError(f"Invalid environment variable name: {key!r}")
    value = value.replace("\n", "").replace("\r", "")
+    # API keys / tokens must be ASCII — strip non-ASCII with a warning.
+    value = _check_non_ascii_credential(key, value)
    ensure_hermes_home()
    env_path = get_env_path()
    
@@ -2738,12 +3034,25 @@ def save_env_value(key: str, value: str):
        lines.append(f"{key}={value}\n")
    
    fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
+    # Preserve original permissions so Docker volume mounts aren't clobbered.
+    original_mode = None
+    if env_path.exists():
+        try:
+            original_mode = stat.S_IMODE(env_path.stat().st_mode)
+        except OSError:
+            pass
    try:
        with os.fdopen(fd, 'w', **write_kw) as f:
            f.writelines(lines)
            f.flush()
            os.fsync(f.fileno())
        os.replace(tmp_path, env_path)
+        # Restore original permissions before _secure_file may tighten them.
+        if original_mode is not None:
+            try:
+                os.chmod(env_path, original_mode)
+            except OSError:
+                pass
    except BaseException:
        try:
            os.unlink(tmp_path)
@@ -2754,13 +3063,6 @@ def save_env_value(key: str, value: str):

    os.environ[key] = value

-    # Restrict .env permissions to owner-only (contains API keys)
-    if not _IS_WINDOWS:
-        try:
-            os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
-        except OSError:
-            pass
-

 def remove_env_value(key: str) -> bool:
    """Remove a key from ~/.hermes/.env and os.environ.
@@ -2789,12 +3091,23 @@ def remove_env_value(key: str) -> bool:

    if found:
        fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
+        # Preserve original permissions so Docker volume mounts aren't clobbered.
+        original_mode = None
+        try:
+            original_mode = stat.S_IMODE(env_path.stat().st_mode)
+        except OSError:
+            pass
        try:
            with os.fdopen(fd, 'w', **write_kw) as f:
                f.writelines(new_lines)
                f.flush()
                os.fsync(f.fileno())
            os.replace(tmp_path, env_path)
+            if original_mode is not None:
+                try:
+                    os.chmod(env_path, original_mode)
+                except OSError:
+                    pass
        except BaseException:
            try:
                os.unlink(tmp_path)
@@ -166,6 +166,7 @@ def curses_radiolist(
    selected: int = 0,
    *,
    cancel_returns: int | None = None,
+    description: str | None = None,
 ) -> int:
    """Curses single-select radio list. Returns the selected index.

@@ -174,6 +175,9 @@ def curses_radiolist(
        items: Display labels for each row.
        selected: Index that starts selected (pre-selected).
        cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
+        description: Optional multi-line text shown between the title and
+            the item list.  Useful for context that should survive the
+            curses screen clear.
    """
    if cancel_returns is None:
        cancel_returns = selected
@@ -181,6 +185,10 @@ def curses_radiolist(
    if not sys.stdin.isatty():
        return cancel_returns

+    desc_lines: list[str] = []
+    if description:
+        desc_lines = description.splitlines()
+
    try:
        import curses
        result_holder: list = [None]
@@ -199,22 +207,35 @@ def curses_radiolist(
                stdscr.clear()
                max_y, max_x = stdscr.getmaxyx()

+                row = 0
+
                # Header
                try:
                    hattr = curses.A_BOLD
                    if curses.has_colors():
                        hattr |= curses.color_pair(2)
-                    stdscr.addnstr(0, 0, title, max_x - 1, hattr)
+                    stdscr.addnstr(row, 0, title, max_x - 1, hattr)
+                    row += 1
+
+                    # Description lines
+                    for dline in desc_lines:
+                        if row >= max_y - 1:
+                            break
+                        stdscr.addnstr(row, 0, dline, max_x - 1, curses.A_NORMAL)
+                        row += 1
+
                    stdscr.addnstr(
-                        1, 0,
+                        row, 0,
                        "  \u2191\u2193 navigate  ENTER/SPACE select  ESC cancel",
                        max_x - 1, curses.A_DIM,
                    )
+                    row += 1
                except curses.error:
                    pass

                # Scrollable item list
-                visible_rows = max_y - 4
+                items_start = row + 1
+                visible_rows = max_y - items_start - 1
                if cursor < scroll_offset:
                    scroll_offset = cursor
                elif cursor >= scroll_offset + visible_rows:
@@ -223,7 +244,7 @@ def curses_radiolist(
                for draw_i, i in enumerate(
                    range(scroll_offset, min(len(items), scroll_offset + visible_rows))
                ):
-                    y = draw_i + 3
+                    y = draw_i + items_start
                    if y >= max_y - 1:
                        break
                    radio = "\u25cf" if i == selected else "\u25cb"
@@ -27,6 +27,110 @@ _DPASTE_COM_URL = "https://dpaste.com/api/"
 # paste.rs caps at ~1 MB; we stay under that with headroom.
 _MAX_LOG_BYTES = 512_000

+# Auto-delete pastes after this many seconds (6 hours).
+_AUTO_DELETE_SECONDS = 21600
+
+
+# ---------------------------------------------------------------------------
+# Privacy / delete helpers
+# ---------------------------------------------------------------------------
+
+_PRIVACY_NOTICE = """\
+⚠️  This will upload the following to a public paste service:
+  • System info (OS, Python version, Hermes version, provider, which API keys
+    are configured — NOT the actual keys)
+  • Recent log lines (agent.log, errors.log, gateway.log — may contain
+    conversation fragments and file paths)
+  • Full agent.log and gateway.log (up to 512 KB each — likely contains
+    conversation content, tool outputs, and file paths)
+
+Pastes auto-delete after 6 hours.
+"""
+
+_GATEWAY_PRIVACY_NOTICE = (
+    "⚠️ **Privacy notice:** This uploads system info + recent log tails "
+    "(may contain conversation fragments) to a public paste service. "
+    "Full logs are NOT included from the gateway — use `hermes debug share` "
+    "from the CLI for full log uploads.\n"
+    "Pastes auto-delete after 6 hours."
+)
+
+
+def _extract_paste_id(url: str) -> Optional[str]:
+    """Extract the paste ID from a paste.rs or dpaste.com URL.
+
+    Returns the ID string, or None if the URL doesn't match a known service.
+    """
+    url = url.strip().rstrip("/")
+    for prefix in ("https://paste.rs/", "http://paste.rs/"):
+        if url.startswith(prefix):
+            return url[len(prefix):]
+    return None
+
+
+def delete_paste(url: str) -> bool:
+    """Delete a paste from paste.rs.  Returns True on success.
+
+    Only paste.rs supports unauthenticated DELETE.  dpaste.com pastes
+    expire automatically but cannot be deleted via API.
+    """
+    paste_id = _extract_paste_id(url)
+    if not paste_id:
+        raise ValueError(
+            f"Cannot delete: only paste.rs URLs are supported.  Got: {url}"
+        )
+
+    target = f"{_PASTE_RS_URL}{paste_id}"
+    req = urllib.request.Request(
+        target, method="DELETE",
+        headers={"User-Agent": "hermes-agent/debug-share"},
+    )
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return 200 <= resp.status < 300
+
+
+def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SECONDS):
+    """Spawn a detached process to delete paste.rs pastes after *delay_seconds*.
+
+    The child process is fully detached (``start_new_session=True``) so it
+    survives the parent exiting (important for CLI mode).  Only paste.rs
+    URLs are attempted — dpaste.com pastes auto-expire on their own.
+    """
+    import subprocess
+
+    paste_rs_urls = [u for u in urls if _extract_paste_id(u)]
+    if not paste_rs_urls:
+        return
+
+    # Build a tiny inline Python script.  No imports beyond stdlib.
+    url_list = ", ".join(f'"{u}"' for u in paste_rs_urls)
+    script = (
+        "import time, urllib.request; "
+        f"time.sleep({delay_seconds}); "
+        f"[urllib.request.urlopen(urllib.request.Request(u, method='DELETE', "
+        f"headers={{'User-Agent': 'hermes-agent/auto-delete'}}), timeout=15) "
+        f"for u in [{url_list}]]"
+    )
+
+    try:
+        subprocess.Popen(
+            [sys.executable, "-c", script],
+            start_new_session=True,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+    except Exception:
+        pass  # Best-effort; manual delete still available.
+
+
+def _delete_hint(url: str) -> str:
+    """Return a one-liner delete command for the given paste URL."""
+    paste_id = _extract_paste_id(url)
+    if paste_id:
+        return f"hermes debug delete {url}"
+    # dpaste.com — no API delete, expires on its own.
+    return "(auto-expires per dpaste.com policy)"
+

 def _upload_paste_rs(content: str) -> str:
    """Upload to paste.rs.  Returns the paste URL.
@@ -250,6 +354,9 @@ def run_debug_share(args):
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)

+    if not local_only:
+        print(_PRIVACY_NOTICE)
+
    print("Collecting debug report...")

    # Capture dump once — prepended to every paste for context.
@@ -315,22 +422,56 @@ def run_debug_share(args):
    if failures:
        print(f"\n  (failed to upload: {', '.join(failures)})")

+    # Schedule auto-deletion after 6 hours
+    _schedule_auto_delete(list(urls.values()))
+    print(f"\n⏱  Pastes will auto-delete in 6 hours.")
+
+    # Manual delete fallback
+    print(f"To delete now:  hermes debug delete <url>")
+
    print(f"\nShare these links with the Hermes team for support.")


+def run_debug_delete(args):
+    """Delete one or more paste URLs uploaded by /debug."""
+    urls = getattr(args, "urls", [])
+    if not urls:
+        print("Usage: hermes debug delete <url> [<url> ...]")
+        print("  Deletes paste.rs pastes uploaded by 'hermes debug share'.")
+        return
+
+    for url in urls:
+        try:
+            ok = delete_paste(url)
+            if ok:
+                print(f"  ✓ Deleted: {url}")
+            else:
+                print(f"  ✗ Failed to delete: {url} (unexpected response)")
+        except ValueError as exc:
+            print(f"  ✗ {exc}")
+        except Exception as exc:
+            print(f"  ✗ Could not delete {url}: {exc}")
+
+
 def run_debug(args):
    """Route debug subcommands."""
    subcmd = getattr(args, "debug_command", None)
    if subcmd == "share":
        run_debug_share(args)
+    elif subcmd == "delete":
+        run_debug_delete(args)
    else:
        # Default: show help
-        print("Usage: hermes debug share [--lines N] [--expire N] [--local]")
+        print("Usage: hermes debug <command>")
        print()
        print("Commands:")
        print("  share    Upload debug report to a paste service and print URL")
+        print("  delete   Delete a previously uploaded paste")
        print()
-        print("Options:")
+        print("Options (share):")
        print("  --lines N    Number of log lines to include (default: 200)")
        print("  --expire N   Paste expiry in days (default: 7)")
        print("  --local      Print report locally instead of uploading")
+        print()
+        print("Options (delete):")
+        print("  <url> ...    One or more paste URLs to delete")
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
 from hermes_constants import display_hermes_home
@@ -42,6 +43,7 @@ _PROVIDER_ENV_HINTS = (
    "ZAI_API_KEY",
    "Z_AI_API_KEY",
    "KIMI_API_KEY",
+    "KIMI_CN_API_KEY",
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
@@ -371,7 +373,11 @@ def run_doctor(args):
    print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))

    try:
-        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
+        from hermes_cli.auth import (
+            get_nous_auth_status,
+            get_codex_auth_status,
+            get_gemini_oauth_auth_status,
+        )

        nous_status = get_nous_auth_status()
        if nous_status.get("logged_in"):
@@ -386,6 +392,20 @@ def run_doctor(args):
            check_warn("OpenAI Codex auth", "(not logged in)")
            if codex_status.get("error"):
                check_info(codex_status["error"])
+
+        gemini_status = get_gemini_oauth_auth_status()
+        if gemini_status.get("logged_in"):
+            email = gemini_status.get("email") or ""
+            project = gemini_status.get("project_id") or ""
+            pieces = []
+            if email:
+                pieces.append(email)
+            if project:
+                pieces.append(f"project={project}")
+            suffix = f" ({', '.join(pieces)})" if pieces else ""
+            check_ok("Google Gemini OAuth", f"(logged in{suffix})")
+        else:
+            check_warn("Google Gemini OAuth", "(not logged in)")
    except Exception as e:
        check_warn("Auth provider status", f"(could not check: {e})")

@@ -512,7 +532,87 @@ def run_doctor(args):
            pass

    _check_gateway_service_linger(issues)
-    
+
+    # =========================================================================
+    # Check: Command installation (hermes bin symlink)
+    # =========================================================================
+    if sys.platform != "win32":
+        print()
+        print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD))
+
+        # Determine the venv entry point location
+        _venv_bin = None
+        for _venv_name in ("venv", ".venv"):
+            _candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes"
+            if _candidate.exists():
+                _venv_bin = _candidate
+                break
+
+        # Determine the expected command link directory (mirrors install.sh logic)
+        _prefix = os.environ.get("PREFIX", "")
+        _is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix
+        if _is_termux_env and _prefix:
+            _cmd_link_dir = Path(_prefix) / "bin"
+            _cmd_link_display = "$PREFIX/bin"
+        else:
+            _cmd_link_dir = Path.home() / ".local" / "bin"
+            _cmd_link_display = "~/.local/bin"
+        _cmd_link = _cmd_link_dir / "hermes"
+
+        if _venv_bin is None:
+            check_warn(
+                "Venv entry point not found",
+                "(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')"
+            )
+            manual_issues.append(
+                f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'"
+            )
+        else:
+            check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})")
+
+            # Check the symlink at the command link location
+            if _cmd_link.is_symlink():
+                _target = _cmd_link.resolve()
+                _expected = _venv_bin.resolve()
+                if _target == _expected:
+                    check_ok(f"{_cmd_link_display}/hermes → correct target")
+                else:
+                    check_warn(
+                        f"{_cmd_link_display}/hermes points to wrong target",
+                        f"(→ {_target}, expected → {_expected})"
+                    )
+                    if should_fix:
+                        _cmd_link.unlink()
+                        _cmd_link.symlink_to(_venv_bin)
+                        check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                        fixed_count += 1
+                    else:
+                        issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'")
+            elif _cmd_link.exists():
+                # It's a regular file, not a symlink — possibly a wrapper script
+                check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)")
+            else:
+                check_fail(
+                    f"{_cmd_link_display}/hermes not found",
+                    "(hermes command may not work outside the venv)"
+                )
+                if should_fix:
+                    _cmd_link_dir.mkdir(parents=True, exist_ok=True)
+                    _cmd_link.symlink_to(_venv_bin)
+                    check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                    fixed_count += 1
+
+                    # Check if the link dir is on PATH
+                    _path_dirs = os.environ.get("PATH", "").split(os.pathsep)
+                    if str(_cmd_link_dir) not in _path_dirs:
+                        check_warn(
+                            f"{_cmd_link_display} is not on your PATH",
+                            "(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")"
+                        )
+                        manual_issues.append(f"Add {_cmd_link_display} to your PATH")
+                else:
+                    issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'")
+
    # =========================================================================
    # Check: External tools
    # =========================================================================
@@ -729,10 +829,11 @@ def run_doctor(args):
        # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
-        ("AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
+        ("Vercel AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
-        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         "https://opencode.ai/zen/go/v1/models", "OPENCODE_GO_BASE_URL", True),
+        # OpenCode Go has no shared /models endpoint; skip the health check.
+        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         None,                                  "OPENCODE_GO_BASE_URL", False),
    ]
    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
        _key = ""
@@ -749,7 +850,7 @@ def run_doctor(args):
            print(f"  Checking {_pname} API...", end="", flush=True)
            try:
                import httpx
-                _base = os.getenv(_base_env, "")
+                _base = os.getenv(_base_env, "") if _base_env else ""
                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
                if not _base and _key.startswith("sk-kimi-"):
                    _base = "https://api.kimi.com/coding/v1"
@@ -777,6 +878,31 @@ def run_doctor(args):
            except Exception as _e:
                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)}           ")

+    # -- AWS Bedrock --
+    # Bedrock uses the AWS SDK credential chain, not API keys.
+    try:
+        from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
+        if has_aws_credentials():
+            _auth_var = resolve_aws_auth_env_var()
+            _region = resolve_bedrock_region()
+            _label = "AWS Bedrock".ljust(20)
+            print(f"  Checking AWS Bedrock...", end="", flush=True)
+            try:
+                import boto3
+                _br_client = boto3.client("bedrock", region_name=_region)
+                _br_resp = _br_client.list_foundation_models()
+                _model_count = len(_br_resp.get("modelSummaries", []))
+                print(f"\r  {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)}           ")
+            except ImportError:
+                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)}           ")
+                issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]")
+            except Exception as _e:
+                _err_name = type(_e).__name__
+                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)}           ")
+                issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels")
+    except ImportError:
+        pass  # bedrock_adapter not available — skip silently
+
    # =========================================================================
    # Check: Submodules
    # =========================================================================
@@ -131,6 +131,7 @@ def _configured_platforms() -> list[str]:
        "wecom": "WECOM_BOT_ID",
        "wecom_callback": "WECOM_CALLBACK_CORP_ID",
        "weixin": "WEIXIN_ACCOUNT_ID",
+        "qqbot": "QQ_APP_ID",
    }
    return [name for name, env in checks.items() if os.getenv(env)]

@@ -8,11 +8,40 @@ from pathlib import Path
 from dotenv import load_dotenv


+# Env var name suffixes that indicate credential values.  These are the
+# only env vars whose values we sanitize on load — we must not silently
+# alter arbitrary user env vars, but credentials are known to require
+# pure ASCII (they become HTTP header values).
+_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
+
+
+def _sanitize_loaded_credentials() -> None:
+    """Strip non-ASCII characters from credential env vars in os.environ.
+
+    Called after dotenv loads so the rest of the codebase never sees
+    non-ASCII API keys.  Only touches env vars whose names end with
+    known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
+    """
+    for key, value in list(os.environ.items()):
+        if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
+            continue
+        try:
+            value.encode("ascii")
+        except UnicodeEncodeError:
+            os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
+
+
 def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
    try:
        load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
    except UnicodeDecodeError:
        load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
+    # Strip non-ASCII characters from credential env vars that were just
+    # loaded.  API keys must be pure ASCII since they're sent as HTTP
+    # header values (httpx encodes headers as ASCII).  Non-ASCII chars
+    # typically come from copy-pasting keys from PDFs or rich-text editors
+    # that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
+    _sanitize_loaded_credentials()


 def _sanitize_env_file_if_needed(path: Path) -> None:
@@ -222,7 +222,7 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
                    current_cmd = ""
        else:
            result = subprocess.run(
-                ["ps", "eww", "-ax", "-o", "pid=,command="],
+                ["ps", "-A", "eww", "-o", "pid=,command="],
                capture_output=True,
                text=True,
                timeout=10,
@@ -715,7 +715,9 @@ def _detect_venv_dir() -> Path | None:
    """Detect the active virtualenv directory.

    Checks ``sys.prefix`` first (works regardless of the directory name),
-    then falls back to probing common directory names under PROJECT_ROOT.
+    then ``VIRTUAL_ENV`` env var (covers uv-managed environments where
+    sys.prefix == sys.base_prefix), then falls back to probing common
+    directory names under PROJECT_ROOT.
    Returns ``None`` when no virtualenv can be found.
    """
    # If we're running inside a virtualenv, sys.prefix points to it.
@@ -724,6 +726,15 @@ def _detect_venv_dir() -> Path | None:
        if venv.is_dir():
            return venv

+    # uv and some other tools set VIRTUAL_ENV without changing sys.prefix.
+    # This catches `uv run` where sys.prefix == sys.base_prefix but the
+    # environment IS a venv.  (#8620)
+    _virtual_env = os.environ.get("VIRTUAL_ENV")
+    if _virtual_env:
+        venv = Path(_virtual_env)
+        if venv.is_dir():
+            return venv
+
    # Fallback: check common virtualenv directory names under the project root.
    for candidate in (".venv", "venv"):
        venv = PROJECT_ROOT / candidate
@@ -1128,7 +1139,62 @@ def systemd_restart(system: bool = False):

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
-        print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
+        # SIGUSR1 sent — the gateway will drain active agents, exit with
+        # code 75, and systemd will restart it after RestartSec (30s).
+        # Wait for the old process to die and the new one to become active
+        # so the CLI doesn't return while the service is still restarting.
+        import time
+        scope_label = _service_scope_label(system).capitalize()
+        svc = get_service_name()
+        scope_cmd = _systemctl_cmd(system)
+
+        # Phase 1: wait for old process to exit (drain + shutdown)
+        print(f"⏳ {scope_label} service draining active work...")
+        deadline = time.time() + 90
+        while time.time() < deadline:
+            try:
+                os.kill(pid, 0)
+                time.sleep(1)
+            except (ProcessLookupError, PermissionError):
+                break  # old process is gone
+        else:
+            print(f"⚠ Old process (PID {pid}) still alive after 90s")
+
+        # Phase 2: wait for systemd to start the new process
+        print(f"⏳ Waiting for {svc} to restart...")
+        deadline = time.time() + 60
+        while time.time() < deadline:
+            try:
+                result = subprocess.run(
+                    scope_cmd + ["is-active", svc],
+                    capture_output=True, text=True, timeout=5,
+                )
+                if result.stdout.strip() == "active":
+                    # Verify it's a NEW process, not the old one somehow
+                    new_pid = get_running_pid()
+                    if new_pid and new_pid != pid:
+                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                        return
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                pass
+            time.sleep(2)
+
+        # Timed out — check final state
+        try:
+            result = subprocess.run(
+                scope_cmd + ["is-active", svc],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip() == "active":
+                print(f"✓ {scope_label} service restarted")
+                return
+        except Exception:
+            pass
+        print(
+            f"⚠ {scope_label} service did not become active within 60s.\n"
+            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        )
        return
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
@@ -1913,6 +1979,29 @@ _PLATFORMS = [
             "help": "Phone number or Apple ID to deliver cron results and notifications to."},
        ],
    },
+    {
+        "key": "qqbot",
+        "label": "QQ Bot",
+        "emoji": "🐧",
+        "token_var": "QQ_APP_ID",
+        "setup_instructions": [
+            "1. Register a QQ Bot application at q.qq.com",
+            "2. Note your App ID and App Secret from the application page",
+            "3. Enable the required intents (C2C, Group, Guild messages)",
+            "4. Configure sandbox or publish the bot",
+        ],
+        "vars": [
+            {"name": "QQ_APP_ID", "prompt": "QQ Bot App ID", "password": False,
+             "help": "Your QQ Bot App ID from q.qq.com."},
+            {"name": "QQ_CLIENT_SECRET", "prompt": "QQ Bot App Secret", "password": True,
+             "help": "Your QQ Bot App Secret from q.qq.com."},
+            {"name": "QQ_ALLOWED_USERS", "prompt": "Allowed user OpenIDs (comma-separated, leave empty for open access)", "password": False,
+             "is_allowlist": True,
+             "help": "Optional — restrict DM access to specific user OpenIDs."},
+            {"name": "QQ_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
+             "help": "OpenID to deliver cron results and notifications to."},
+        ],
+    },
 ]


@@ -2841,6 +2930,15 @@ def gateway_command(args):

    elif subcmd == "start":
        system = getattr(args, 'system', False)
+        start_all = getattr(args, 'all', False)
+
+        if start_all:
+            # Kill all stale gateway processes across all profiles before starting
+            killed = kill_gateway_processes(all_profiles=True)
+            if killed:
+                print(f"✓ Killed {killed} stale gateway process(es) across all profiles")
+                _wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+
        if is_termux():
            print("Gateway service start is not supported on Termux because there is no system service manager.")
            print("Run manually: hermes gateway")
@@ -2926,7 +3024,39 @@ def gateway_command(args):
        # Try service first, fall back to killing and restarting
        service_available = False
        system = getattr(args, 'system', False)
+        restart_all = getattr(args, 'all', False)
        service_configured = False
+
+        if restart_all:
+            # --all: stop every gateway process across all profiles, then start fresh
+            service_stopped = False
+            if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+                try:
+                    systemd_stop(system=system)
+                    service_stopped = True
+                except subprocess.CalledProcessError:
+                    pass
+            elif is_macos() and get_launchd_plist_path().exists():
+                try:
+                    launchd_stop()
+                    service_stopped = True
+                except subprocess.CalledProcessError:
+                    pass
+            killed = kill_gateway_processes(all_profiles=True)
+            total = killed + (1 if service_stopped else 0)
+            if total:
+                print(f"✓ Stopped {total} gateway process(es) across all profiles")
+            _wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+
+            # Start the current profile's service fresh
+            print("Starting gateway...")
+            if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+                systemd_start(system=system)
+            elif is_macos() and get_launchd_plist_path().exists():
+                launchd_start()
+            else:
+                run_gateway(verbose=0)
+            return
        
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
            service_configured = True
@@ -1118,6 +1118,8 @@ def select_provider_and_model(args=None):
        _model_flow_openai_codex(config, current_model)
    elif selected_provider == "qwen-oauth":
        _model_flow_qwen_oauth(config, current_model)
+    elif selected_provider == "google-gemini-cli":
+        _model_flow_google_gemini_cli(config, current_model)
    elif selected_provider == "copilot-acp":
        _model_flow_copilot_acp(config, current_model)
    elif selected_provider == "copilot":
@@ -1139,7 +1141,9 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
-    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
+    elif selected_provider == "bedrock":
+        _model_flow_bedrock(config, current_model)
+    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"):
        _model_flow_api_key_provider(config, selected_provider, current_model)

    # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -1275,11 +1279,8 @@ def _model_flow_nous(config, current_model="", args=None):
        AuthError, format_auth_error,
        _login_nous, PROVIDER_REGISTRY,
    )
-    from hermes_cli.config import get_env_value, save_config, save_env_value
-    from hermes_cli.nous_subscription import (
-        apply_nous_provider_defaults,
-        get_nous_subscription_explainer_lines,
-    )
+    from hermes_cli.config import get_env_value, load_config, save_config, save_env_value
+    from hermes_cli.nous_subscription import prompt_enable_tool_gateway
    import argparse

    state = get_provider_auth_state("nous")
@@ -1298,9 +1299,12 @@ def _model_flow_nous(config, current_model="", args=None):
                insecure=bool(getattr(args, "insecure", False)),
            )
            _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
-            print()
-            for line in get_nous_subscription_explainer_lines():
-                print(line)
+            # Offer Tool Gateway enablement for paid subscribers
+            try:
+                _refreshed = load_config() or {}
+                prompt_enable_tool_gateway(_refreshed)
+            except Exception:
+                pass
        except SystemExit:
            print("Login cancelled or failed.")
            return
@@ -1408,18 +1412,10 @@ def _model_flow_nous(config, current_model="", args=None):
        if get_env_value("OPENAI_BASE_URL"):
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
-        changed_defaults = apply_nous_provider_defaults(config)
        save_config(config)
        print(f"Default model set to: {selected} (via Nous Portal)")
-        if "tts" in changed_defaults:
-            print("TTS provider set to: OpenAI TTS via your Nous subscription")
-        else:
-            current_tts = str(config.get("tts", {}).get("provider") or "edge")
-            if current_tts.lower() not in {"", "edge"}:
-                print(f"Keeping your existing TTS provider: {current_tts}")
-        print()
-        for line in get_nous_subscription_explainer_lines():
-            print(line)
+        # Offer Tool Gateway enablement for paid subscribers
+        prompt_enable_tool_gateway(config)
    else:
        print("No change.")

@@ -1526,6 +1522,76 @@ def _model_flow_qwen_oauth(_config, current_model=""):
        print("No change.")


+def _model_flow_google_gemini_cli(_config, current_model=""):
+    """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers.
+
+    Flow:
+      1. Show upfront warning about Google's ToS stance (per opencode-gemini-auth).
+      2. If creds missing, run PKCE browser OAuth via agent.google_oauth.
+      3. Resolve project context (env -> config -> auto-discover -> free tier).
+      4. Prompt user to pick a model.
+      5. Save to ~/.hermes/config.yaml.
+    """
+    from hermes_cli.auth import (
+        DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
+        get_gemini_oauth_auth_status,
+        resolve_gemini_oauth_runtime_credentials,
+        _prompt_model_selection,
+        _save_model_choice,
+        _update_config_for_provider,
+    )
+    from hermes_cli.models import _PROVIDER_MODELS
+
+    print()
+    print("⚠  Google considers using the Gemini CLI OAuth client with third-party")
+    print("   software a policy violation. Some users have reported account")
+    print("   restrictions. You can use your own API key via 'gemini' provider")
+    print("   for the lowest-risk experience.")
+    print()
+    try:
+        proceed = input("Continue with OAuth login? [y/N]: ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        print("Cancelled.")
+        return
+    if proceed not in {"y", "yes"}:
+        print("Cancelled.")
+        return
+
+    status = get_gemini_oauth_auth_status()
+    if not status.get("logged_in"):
+        try:
+            from agent.google_oauth import resolve_project_id_from_env, start_oauth_flow
+
+            env_project = resolve_project_id_from_env()
+            start_oauth_flow(force_relogin=True, project_id=env_project)
+        except Exception as exc:
+            print(f"OAuth login failed: {exc}")
+            return
+
+    # Verify creds resolve + trigger project discovery
+    try:
+        creds = resolve_gemini_oauth_runtime_credentials(force_refresh=False)
+        project_id = creds.get("project_id", "")
+        if project_id:
+            print(f"  Using GCP project: {project_id}")
+        else:
+            print("  No GCP project configured — free tier will be auto-provisioned on first request.")
+    except Exception as exc:
+        print(f"Failed to resolve Gemini credentials: {exc}")
+        return
+
+    models = list(_PROVIDER_MODELS.get("google-gemini-cli") or [])
+    default = current_model or (models[0] if models else "gemini-2.5-flash")
+    selected = _prompt_model_selection(models, current_model=default)
+    if selected:
+        _save_model_choice(selected)
+        _update_config_for_provider("google-gemini-cli", DEFAULT_GEMINI_CLOUDCODE_BASE_URL)
+        print(f"Default model set to: {selected} (via Google Gemini OAuth / Code Assist)")
+    else:
+        print("No change.")
+
+
+

 def _model_flow_custom(config):
    """Custom endpoint: collect URL, API key, and model name.
@@ -1566,6 +1632,27 @@ def _model_flow_custom(config):

    effective_key = api_key or current_key

+    # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1
+    # in the base URL for OpenAI-compatible chat completions.  Prompt the
+    # user if the URL looks like a local server without /v1.
+    _url_lower = effective_url.rstrip("/").lower()
+    _looks_local = any(h in _url_lower for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000"))
+    if _looks_local and not _url_lower.endswith("/v1"):
+        print()
+        print(f"  Hint: Did you mean to add /v1 at the end?")
+        print(f"  Most local model servers (Ollama, vLLM, llama.cpp) require it.")
+        print(f"  e.g. {effective_url.rstrip('/')}/v1")
+        try:
+            _add_v1 = input("  Add /v1? [Y/n]: ").strip().lower()
+        except (KeyboardInterrupt, EOFError):
+            _add_v1 = "n"
+        if _add_v1 in ("", "y", "yes"):
+            effective_url = effective_url.rstrip("/") + "/v1"
+            if base_url:
+                base_url = effective_url
+            print(f"  Updated URL: {effective_url}")
+        print()
+
    from hermes_cli.models import probe_api_models

    probe = probe_api_models(effective_key, effective_url)
@@ -1618,6 +1705,10 @@ def _model_flow_custom(config):
            model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()

        context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
+
+        # Prompt for a display name — shown in the provider menu on future runs
+        default_name = _auto_provider_name(effective_url)
+        display_name = input(f"Display name [{default_name}]: ").strip() or default_name
    except (KeyboardInterrupt, EOFError):
        print("\nCancelled.")
        return
@@ -1673,15 +1764,37 @@ def _model_flow_custom(config):
        print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")

    # Auto-save to custom_providers so it appears in the menu next time
-    _save_custom_provider(effective_url, effective_key, model_name or "", context_length=context_length)
+    _save_custom_provider(effective_url, effective_key, model_name or "",
+                          context_length=context_length, name=display_name)


-def _save_custom_provider(base_url, api_key="", model="", context_length=None):
+def _auto_provider_name(base_url: str) -> str:
+    """Generate a display name from a custom endpoint URL.
+
+    Returns a human-friendly label like "Local (localhost:11434)" or
+    "RunPod (xyz.runpod.io)".  Used as the default when prompting the
+    user for a display name during custom endpoint setup.
+    """
+    import re
+    clean = base_url.replace("https://", "").replace("http://", "").rstrip("/")
+    clean = re.sub(r"/v1/?$", "", clean)
+    name = clean.split("/")[0]
+    if "localhost" in name or "127.0.0.1" in name:
+        name = f"Local ({name})"
+    elif "runpod" in name.lower():
+        name = f"RunPod ({name})"
+    else:
+        name = name.capitalize()
+    return name
+
+
+def _save_custom_provider(base_url, api_key="", model="", context_length=None,
+                          name=None):
    """Save a custom endpoint to custom_providers in config.yaml.

    Deduplicates by base_url — if the URL already exists, updates the
    model name and context_length but doesn't add a duplicate entry.
-    Auto-generates a display name from the URL hostname.
+    Uses *name* when provided, otherwise auto-generates from the URL.
    """
    from hermes_cli.config import load_config, save_config

@@ -1709,20 +1822,9 @@ def _save_custom_provider(base_url, api_key="", model="", context_length=None):
                save_config(cfg)
            return  # already saved, updated if needed

-    # Auto-generate a name from the URL
-    import re
-    clean = base_url.replace("https://", "").replace("http://", "").rstrip("/")
-    # Remove /v1 suffix for cleaner names
-    clean = re.sub(r"/v1/?$", "", clean)
-    # Use hostname:port as the name
-    name = clean.split("/")[0]
-    # Capitalize for readability
-    if "localhost" in name or "127.0.0.1" in name:
-        name = f"Local ({name})"
-    elif "runpod" in name.lower():
-        name = f"RunPod ({name})"
-    else:
-        name = name.capitalize()
+    # Use provided name or auto-generate from URL
+    if not name:
+        name = _auto_provider_name(base_url)

    entry = {"name": name, "base_url": base_url}
    if api_key:
@@ -2410,6 +2512,252 @@ def _model_flow_kimi(config, current_model=""):
        print("No change.")


+def _model_flow_bedrock_api_key(config, region, current_model=""):
+    """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
+
+    For developers who don't have an AWS account but received a Bedrock API Key
+    from their AWS admin. Works like any OpenAI-compatible endpoint.
+    """
+    from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
+    from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
+    from hermes_cli.models import _PROVIDER_MODELS
+
+    mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1"
+
+    # Prompt for API key
+    existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
+    if existing_key:
+        print(f"  Bedrock API Key: {existing_key[:12]}... ✓")
+    else:
+        print(f"  Endpoint: {mantle_base_url}")
+        print()
+        try:
+            import getpass
+            api_key = getpass.getpass("  Bedrock API Key: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return
+        if not api_key:
+            print("  Cancelled.")
+            return
+        save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key)
+        existing_key = api_key
+        print("  ✓ API key saved.")
+    print()
+
+    # Model selection — use static list (mantle doesn't need boto3 for discovery)
+    model_list = _PROVIDER_MODELS.get("bedrock", [])
+    print(f"  Showing {len(model_list)} curated models")
+
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("  Model ID: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        _save_model_choice(selected)
+
+        # Save as custom provider pointing to bedrock-mantle
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "custom"
+        model["base_url"] = mantle_base_url
+        model.pop("api_mode", None)  # chat_completions is the default
+
+        # Also save region in bedrock config for reference
+        bedrock_cfg = cfg.get("bedrock", {})
+        if not isinstance(bedrock_cfg, dict):
+            bedrock_cfg = {}
+        bedrock_cfg["region"] = region
+        cfg["bedrock"] = bedrock_cfg
+
+        # Save the API key env var name so hermes knows where to find it
+        save_env_value("OPENAI_API_KEY", existing_key)
+        save_env_value("OPENAI_BASE_URL", mantle_base_url)
+
+        save_config(cfg)
+        deactivate_provider()
+
+        print(f"  Default model set to: {selected} (via Bedrock API Key, {region})")
+        print(f"  Endpoint: {mantle_base_url}")
+    else:
+        print("  No change.")
+
+
+def _model_flow_bedrock(config, current_model=""):
+    """AWS Bedrock provider: verify credentials, pick region, discover models.
+
+    Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint.
+    Auth is handled by the AWS SDK default credential chain (env vars, profile,
+    instance role), so no API key prompt is needed.
+    """
+    from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
+    from hermes_cli.config import load_config, save_config
+    from hermes_cli.models import _PROVIDER_MODELS
+
+    # 1. Check for AWS credentials
+    try:
+        from agent.bedrock_adapter import (
+            has_aws_credentials,
+            resolve_aws_auth_env_var,
+            resolve_bedrock_region,
+            discover_bedrock_models,
+        )
+    except ImportError:
+        print("  ✗ boto3 is not installed. Install it with:")
+        print("    pip install boto3")
+        print()
+        return
+
+    if not has_aws_credentials():
+        print("  ⚠ No AWS credentials detected via environment variables.")
+        print("  Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)")
+        print()
+
+    auth_var = resolve_aws_auth_env_var()
+    if auth_var:
+        print(f"  AWS credentials: {auth_var} ✓")
+    else:
+        print("  AWS credentials: boto3 default chain (instance role / SSO)")
+    print()
+
+    # 2. Region selection
+    current_region = resolve_bedrock_region()
+    try:
+        region_input = input(f"  AWS Region [{current_region}]: ").strip()
+    except (KeyboardInterrupt, EOFError):
+        print()
+        return
+    region = region_input or current_region
+
+    # 2b. Authentication mode
+    print("  Choose authentication method:")
+    print()
+    print("    1. IAM credential chain (recommended)")
+    print("       Works with EC2 instance roles, SSO, env vars, aws configure")
+    print("    2. Bedrock API Key")
+    print("       Enter your Bedrock API Key directly — also supports")
+    print("       team scenarios where an admin distributes keys")
+    print()
+    try:
+        auth_choice = input("  Choice [1]: ").strip()
+    except (KeyboardInterrupt, EOFError):
+        print()
+        return
+
+    if auth_choice == "2":
+        _model_flow_bedrock_api_key(config, region, current_model)
+        return
+
+    # 3. Model discovery — try live API first, fall back to static list
+    print(f"  Discovering models in {region}...")
+    live_models = discover_bedrock_models(region)
+
+    if live_models:
+        _EXCLUDE_PREFIXES = (
+            "stability.", "cohere.embed", "twelvelabs.", "us.stability.",
+            "us.cohere.embed", "us.twelvelabs.", "global.cohere.embed",
+            "global.twelvelabs.",
+        )
+        _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision")
+        filtered = []
+        for m in live_models:
+            mid = m["id"]
+            if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES):
+                continue
+            if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS):
+                continue
+            filtered.append(m)
+
+        # Deduplicate: prefer inference profiles (us.*, global.*) over bare
+        # foundation model IDs.
+        profile_base_ids = set()
+        for m in filtered:
+            mid = m["id"]
+            if mid.startswith(("us.", "global.")):
+                base = mid.split(".", 1)[1] if "." in mid[3:] else mid
+                profile_base_ids.add(base)
+
+        deduped = []
+        for m in filtered:
+            mid = m["id"]
+            if not mid.startswith(("us.", "global.")) and mid in profile_base_ids:
+                continue
+            deduped.append(m)
+
+        _RECOMMENDED = [
+            "us.anthropic.claude-sonnet-4-6",
+            "us.anthropic.claude-opus-4-6",
+            "us.anthropic.claude-haiku-4-5",
+            "us.amazon.nova-pro",
+            "us.amazon.nova-lite",
+            "us.amazon.nova-micro",
+            "deepseek.v3",
+            "us.meta.llama4-maverick",
+            "us.meta.llama4-scout",
+        ]
+
+        def _sort_key(m):
+            mid = m["id"]
+            for i, rec in enumerate(_RECOMMENDED):
+                if mid.startswith(rec):
+                    return (0, i, mid)
+            if mid.startswith("global."):
+                return (1, 0, mid)
+            return (2, 0, mid)
+
+        deduped.sort(key=_sort_key)
+        model_list = [m["id"] for m in deduped]
+        print(f"  Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)")
+    else:
+        model_list = _PROVIDER_MODELS.get("bedrock", [])
+        if model_list:
+            print(f"  Using {len(model_list)} curated models (live discovery unavailable)")
+        else:
+            print("  No models found. Check IAM permissions for bedrock:ListFoundationModels.")
+            return
+
+    # 4. Model selection
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("  Model ID: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        _save_model_choice(selected)
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "bedrock"
+        model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com"
+        model.pop("api_mode", None)  # bedrock_converse is auto-detected
+
+        bedrock_cfg = cfg.get("bedrock", {})
+        if not isinstance(bedrock_cfg, dict):
+            bedrock_cfg = {}
+        bedrock_cfg["region"] = region
+        cfg["bedrock"] = bedrock_cfg
+
+        save_config(cfg)
+        deactivate_provider()
+
+        print(f"  Default model set to: {selected} (via AWS Bedrock, {region})")
+    else:
+        print("  No change.")
+
+
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
    """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
    from hermes_cli.auth import (
@@ -2471,34 +2819,43 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
    #   1. models.dev registry (cached, filtered for agentic/tool-capable models)
    #   2. Curated static fallback list (offline insurance)
    #   3. Live /models endpoint probe (small providers without models.dev data)
-    curated = _PROVIDER_MODELS.get(provider_id, [])
-
-    # Try models.dev first — returns tool-capable models, filtered for noise
-    mdev_models: list = []
-    try:
-        from agent.models_dev import list_agentic_models
-        mdev_models = list_agentic_models(provider_id)
-    except Exception:
-        pass
-
-    if mdev_models:
-        model_list = mdev_models
-        print(f"  Found {len(model_list)} model(s) from models.dev registry")
-    elif curated and len(curated) >= 8:
-        # Curated list is substantial — use it directly, skip live probe
-        model_list = curated
-        print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
-    else:
+    #
+    # Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
+    if provider_id == "ollama-cloud":
+        from hermes_cli.models import fetch_ollama_cloud_models
        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
-        live_models = fetch_api_models(api_key_for_probe, effective_base)
-        if live_models and len(live_models) >= len(curated):
-            model_list = live_models
-            print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
-        else:
+        model_list = fetch_ollama_cloud_models(api_key=api_key_for_probe, base_url=effective_base)
+        if model_list:
+            print(f"  Found {len(model_list)} model(s) from Ollama Cloud")
+    else:
+        curated = _PROVIDER_MODELS.get(provider_id, [])
+
+        # Try models.dev first — returns tool-capable models, filtered for noise
+        mdev_models: list = []
+        try:
+            from agent.models_dev import list_agentic_models
+            mdev_models = list_agentic_models(provider_id)
+        except Exception:
+            pass
+
+        if mdev_models:
+            model_list = mdev_models
+            print(f"  Found {len(model_list)} model(s) from models.dev registry")
+        elif curated and len(curated) >= 8:
+            # Curated list is substantial — use it directly, skip live probe
            model_list = curated
-            if model_list:
-                print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
-        # else: no defaults either, will fall through to raw input
+            print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
+        else:
+            api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+            live_models = fetch_api_models(api_key_for_probe, effective_base)
+            if live_models and len(live_models) >= len(curated):
+                model_list = live_models
+                print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+            else:
+                model_list = curated
+                if model_list:
+                    print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
+            # else: no defaults either, will fall through to raw input

    if provider_id in {"opencode-zen", "opencode-go"}:
        model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list]
@@ -4021,7 +4378,40 @@ def cmd_update(args):
                                    capture_output=True, text=True, timeout=15,
                                )
                                if restart.returncode == 0:
-                                    restarted_services.append(svc_name)
+                                    # Verify the service actually survived the
+                                    # restart.  systemctl restart returns 0 even
+                                    # if the new process crashes immediately.
+                                    import time as _time
+                                    _time.sleep(3)
+                                    verify = subprocess.run(
+                                        scope_cmd + ["is-active", svc_name],
+                                        capture_output=True, text=True, timeout=5,
+                                    )
+                                    if verify.stdout.strip() == "active":
+                                        restarted_services.append(svc_name)
+                                    else:
+                                        # Retry once — transient startup failures
+                                        # (stale module cache, import race) often
+                                        # resolve on the second attempt.
+                                        print(f"  ⚠ {svc_name} died after restart, retrying...")
+                                        retry = subprocess.run(
+                                            scope_cmd + ["restart", svc_name],
+                                            capture_output=True, text=True, timeout=15,
+                                        )
+                                        _time.sleep(3)
+                                        verify2 = subprocess.run(
+                                            scope_cmd + ["is-active", svc_name],
+                                            capture_output=True, text=True, timeout=5,
+                                        )
+                                        if verify2.stdout.strip() == "active":
+                                            restarted_services.append(svc_name)
+                                            print(f"  ✓ {svc_name} recovered on retry")
+                                        else:
+                                            print(
+                                                f"  ✗ {svc_name} failed to stay running after restart.\n"
+                                                f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
+                                                f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
+                                            )
                                else:
                                    print(f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}")
                    except (FileNotFoundError, subprocess.TimeoutExpired):
@@ -4109,6 +4499,8 @@ def _coalesce_session_name_args(argv: list) -> list:
        "status", "cron", "doctor", "config", "pairing", "skills", "tools",
        "mcp", "sessions", "insights", "version", "update", "uninstall",
        "profile", "dashboard",
+        "honcho", "claw", "plugins", "acp",
+        "webhook", "memory", "dump", "debug", "backup", "import", "completion", "logs",
    }
    _SESSION_FLAGS = {"-c", "--continue", "-r", "--resume"}

@@ -4404,17 +4796,20 @@ def cmd_dashboard(args):
        host=args.host,
        port=args.port,
        open_browser=not args.no_open,
+        allow_public=getattr(args, "insecure", False),
    )


-def cmd_completion(args):
+def cmd_completion(args, parser=None):
    """Print shell completion script."""
-    from hermes_cli.profiles import generate_bash_completion, generate_zsh_completion
+    from hermes_cli.completion import generate_bash, generate_zsh, generate_fish
    shell = getattr(args, "shell", "bash")
    if shell == "zsh":
-        print(generate_zsh_completion())
+        print(generate_zsh(parser))
+    elif shell == "fish":
+        print(generate_fish(parser))
    else:
-        print(generate_bash_completion())
+        print(generate_bash(parser))


 def cmd_logs(args):
@@ -4559,7 +4954,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
        default=None,
        help="Inference provider (default: auto)"
    )
@@ -4696,6 +5091,7 @@ For more help on a command:
    # gateway start
    gateway_start = gateway_subparsers.add_parser("start", help="Start the installed systemd/launchd background service")
    gateway_start.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
+    gateway_start.add_argument("--all", action="store_true", help="Kill ALL stale gateway processes across all profiles before starting")
    
    # gateway stop
    gateway_stop = gateway_subparsers.add_parser("stop", help="Stop gateway service")
@@ -4705,6 +5101,7 @@ For more help on a command:
    # gateway restart
    gateway_restart = gateway_subparsers.add_parser("restart", help="Restart gateway service")
    gateway_restart.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
+    gateway_restart.add_argument("--all", action="store_true", help="Kill ALL gateway processes across all profiles before restarting")
    
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
@@ -5018,6 +5415,7 @@ Examples:
    hermes debug share --lines 500  Include more log lines
    hermes debug share --expire 30  Keep paste for 30 days
    hermes debug share --local      Print report locally (no upload)
+    hermes debug delete <url>       Delete a previously uploaded paste
 """,
    )
    debug_sub = debug_parser.add_subparsers(dest="debug_command")
@@ -5037,6 +5435,14 @@ Examples:
        "--local", action="store_true",
        help="Print the report locally instead of uploading",
    )
+    delete_parser = debug_sub.add_parser(
+        "delete",
+        help="Delete a paste uploaded by 'hermes debug share'",
+    )
+    delete_parser.add_argument(
+        "urls", nargs="*", default=[],
+        help="One or more paste URLs to delete (e.g. https://paste.rs/abc123)",
+    )
    debug_parser.set_defaults(func=cmd_debug)

    # =========================================================================
@@ -5194,6 +5600,25 @@ Examples:
    skills_uninstall = skills_subparsers.add_parser("uninstall", help="Remove a hub-installed skill")
    skills_uninstall.add_argument("name", help="Skill name to remove")

+    skills_reset = skills_subparsers.add_parser(
+        "reset",
+        help="Reset a bundled skill — clears 'user-modified' tracking so updates work again",
+        description=(
+            "Clear a bundled skill's entry from the sync manifest (~/.hermes/skills/.bundled_manifest) "
+            "so future 'hermes update' runs stop marking it as user-modified. Pass --restore to also "
+            "replace the current copy with the bundled version."
+        ),
+    )
+    skills_reset.add_argument("name", help="Skill name to reset (e.g. google-workspace)")
+    skills_reset.add_argument(
+        "--restore", action="store_true",
+        help="Also delete the current copy and re-copy the bundled version",
+    )
+    skills_reset.add_argument(
+        "--yes", "-y", action="store_true",
+        help="Skip confirmation prompt when using --restore",
+    )
+
    skills_publish = skills_subparsers.add_parser("publish", help="Publish a skill to a registry")
    skills_publish.add_argument("skill_path", help="Path to skill directory")
    skills_publish.add_argument("--to", default="github", choices=["github", "clawhub"], help="Target registry")
@@ -5317,6 +5742,18 @@ Examples:
    memory_sub.add_parser("setup", help="Interactive provider selection and configuration")
    memory_sub.add_parser("status", help="Show current memory provider config")
    memory_sub.add_parser("off", help="Disable external provider (built-in only)")
+    _reset_parser = memory_sub.add_parser(
+        "reset",
+        help="Erase all built-in memory (MEMORY.md and USER.md)",
+    )
+    _reset_parser.add_argument(
+        "--yes", "-y", action="store_true",
+        help="Skip confirmation prompt",
+    )
+    _reset_parser.add_argument(
+        "--target", choices=["all", "memory", "user"], default="all",
+        help="Which store to reset: 'all' (default), 'memory', or 'user'",
+    )

    def cmd_memory(args):
        sub = getattr(args, "memory_command", None)
@@ -5329,6 +5766,44 @@ Examples:
            save_config(config)
            print("\n  ✓ Memory provider: built-in only")
            print("  Saved to config.yaml\n")
+        elif sub == "reset":
+            from hermes_constants import get_hermes_home, display_hermes_home
+            mem_dir = get_hermes_home() / "memories"
+            target = getattr(args, "target", "all")
+            files_to_reset = []
+            if target in ("all", "memory"):
+                files_to_reset.append(("MEMORY.md", "agent notes"))
+            if target in ("all", "user"):
+                files_to_reset.append(("USER.md", "user profile"))
+
+            # Check what exists
+            existing = [(f, desc) for f, desc in files_to_reset if (mem_dir / f).exists()]
+            if not existing:
+                print(f"\n  Nothing to reset — no memory files found in {display_hermes_home()}/memories/\n")
+                return
+
+            print(f"\n  This will permanently erase the following memory files:")
+            for f, desc in existing:
+                path = mem_dir / f
+                size = path.stat().st_size
+                print(f"    ◆ {f} ({desc}) — {size:,} bytes")
+
+            if not getattr(args, "yes", False):
+                try:
+                    answer = input("\n  Type 'yes' to confirm: ").strip().lower()
+                except (EOFError, KeyboardInterrupt):
+                    print("\n  Cancelled.\n")
+                    return
+                if answer != "yes":
+                    print("  Cancelled.\n")
+                    return
+
+            for f, desc in existing:
+                (mem_dir / f).unlink()
+                print(f"  ✓ Deleted {f} ({desc})")
+
+            print(f"\n  Memory reset complete. New sessions will start with a blank slate.")
+            print(f"  Files were in: {display_hermes_home()}/memories/\n")
        else:
            from hermes_cli.memory_setup import memory_command
            memory_command(args)
@@ -5448,6 +5923,12 @@ Examples:
    mcp_cfg_p = mcp_sub.add_parser("configure", aliases=["config"], help="Toggle tool selection")
    mcp_cfg_p.add_argument("name", help="Server name to configure")

+    mcp_login_p = mcp_sub.add_parser(
+        "login",
+        help="Force re-authentication for an OAuth-based MCP server",
+    )
+    mcp_login_p.add_argument("name", help="Server name to re-authenticate")
+
    def cmd_mcp(args):
        from hermes_cli.mcp_config import mcp_command
        mcp_command(args)
@@ -5894,13 +6375,13 @@ Examples:
    # =========================================================================
    completion_parser = subparsers.add_parser(
        "completion",
-        help="Print shell completion script (bash or zsh)",
+        help="Print shell completion script (bash, zsh, or fish)",
    )
    completion_parser.add_argument(
-        "shell", nargs="?", default="bash", choices=["bash", "zsh"],
+        "shell", nargs="?", default="bash", choices=["bash", "zsh", "fish"],
        help="Shell type (default: bash)",
    )
-    completion_parser.set_defaults(func=cmd_completion)
+    completion_parser.set_defaults(func=lambda args: cmd_completion(args, parser))

    # =========================================================================
    # dashboard command
@@ -5913,6 +6394,10 @@ Examples:
    dashboard_parser.add_argument("--port", type=int, default=9119, help="Port (default 9119)")
    dashboard_parser.add_argument("--host", default="127.0.0.1", help="Host (default 127.0.0.1)")
    dashboard_parser.add_argument("--no-open", action="store_true", help="Don't open browser automatically")
+    dashboard_parser.add_argument(
+        "--insecure", action="store_true",
+        help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
+    )
    dashboard_parser.set_defaults(func=cmd_dashboard)

    # =========================================================================
@@ -5987,7 +6472,42 @@ Examples:
        sys.exit(1)

    _processed_argv = _coalesce_session_name_args(sys.argv[1:])
-    args = parser.parse_args(_processed_argv)
+
+    # ── Defensive subparser routing (bpo-9338 workaround) ───────────
+    # On some Python versions (notably <3.11), argparse fails to route
+    # subcommand tokens when the parent parser has nargs='?' optional
+    # arguments (--continue).  The symptom: "unrecognized arguments: model"
+    # even though 'model' is a registered subcommand.
+    #
+    # Fix: when argv contains a token matching a known subcommand, set
+    # subparsers.required=True to force deterministic routing.  If that
+    # fails (e.g. 'hermes -c model' where 'model' is consumed as the
+    # session name for --continue), fall back to the default behaviour.
+    import io as _io
+    _known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
+    _has_cmd_token = any(t in _known_cmds for t in _processed_argv if not t.startswith("-"))
+
+    if _has_cmd_token:
+        subparsers.required = True
+        _saved_stderr = sys.stderr
+        try:
+            sys.stderr = _io.StringIO()
+            args = parser.parse_args(_processed_argv)
+            sys.stderr = _saved_stderr
+        except SystemExit as exc:
+            sys.stderr = _saved_stderr
+            # Help/version flags (exit code 0) already printed output —
+            # re-raise immediately to avoid a second parse_args printing
+            # the same help text again (#10230).
+            if exc.code == 0:
+                raise
+            # Subcommand name was consumed as a flag value (e.g. -c model).
+            # Fall back to optional subparsers so argparse handles it normally.
+            subparsers.required = False
+            args = parser.parse_args(_processed_argv)
+    else:
+        subparsers.required = False
+        args = parser.parse_args(_processed_argv)

    # Handle --version flag
    if args.version:
@@ -279,8 +279,8 @@ def cmd_mcp_add(args):
        _info(f"Starting OAuth flow for '{name}'...")
        oauth_ok = False
        try:
-            from tools.mcp_oauth import build_oauth_auth
-            oauth_auth = build_oauth_auth(name, url)
+            from tools.mcp_oauth_manager import get_manager
+            oauth_auth = get_manager().get_or_build_provider(name, url, None)
            if oauth_auth:
                server_config["auth"] = "oauth"
                _success("OAuth configured (tokens will be acquired on first connection)")
@@ -428,10 +428,12 @@ def cmd_mcp_remove(args):
    _remove_mcp_server(name)
    _success(f"Removed '{name}' from config")

-    # Clean up OAuth tokens if they exist
+    # Clean up OAuth tokens if they exist — route through MCPOAuthManager so
+    # any provider instance cached in the current process (e.g. from an
+    # earlier `hermes mcp test` in the same session) is evicted too.
    try:
-        from tools.mcp_oauth import remove_oauth_tokens
-        remove_oauth_tokens(name)
+        from tools.mcp_oauth_manager import get_manager
+        get_manager().remove(name)
        _success("Cleaned up OAuth tokens")
    except Exception:
        pass
@@ -577,6 +579,63 @@ def _interpolate_value(value: str) -> str:
    return re.sub(r"\$\{(\w+)\}", _replace, value)


+# ─── hermes mcp login ────────────────────────────────────────────────────────
+
+def cmd_mcp_login(args):
+    """Force re-authentication for an OAuth-based MCP server.
+
+    Deletes cached tokens (both on disk and in the running process's
+    MCPOAuthManager cache) and triggers a fresh OAuth flow via the
+    existing probe path.
+
+    Use this when:
+      - Tokens are stuck in a bad state (server revoked, refresh token
+        consumed by an external process, etc.)
+      - You want to re-authenticate to change scopes or account
+      - A tool call returned ``needs_reauth: true``
+    """
+    name = args.name
+    servers = _get_mcp_servers()
+
+    if name not in servers:
+        _error(f"Server '{name}' not found in config.")
+        if servers:
+            _info(f"Available servers: {', '.join(servers)}")
+        return
+
+    server_config = servers[name]
+    url = server_config.get("url")
+    if not url:
+        _error(f"Server '{name}' has no URL — not an OAuth-capable server")
+        return
+    if server_config.get("auth") != "oauth":
+        _error(f"Server '{name}' is not configured for OAuth (auth={server_config.get('auth')})")
+        _info("Use `hermes mcp remove` + `hermes mcp add` to reconfigure auth.")
+        return
+
+    # Wipe both disk and in-memory cache so the next probe forces a fresh
+    # OAuth flow.
+    try:
+        from tools.mcp_oauth_manager import get_manager
+        mgr = get_manager()
+        mgr.remove(name)
+    except Exception as exc:
+        _warning(f"Could not clear existing OAuth state: {exc}")
+
+    print()
+    _info(f"Starting OAuth flow for '{name}'...")
+
+    # Probe triggers the OAuth flow (browser redirect + callback capture).
+    try:
+        tools = _probe_single_server(name, server_config)
+        if tools:
+            _success(f"Authenticated — {len(tools)} tool(s) available")
+        else:
+            _success("Authenticated (server reported no tools)")
+    except Exception as exc:
+        _error(f"Authentication failed: {exc}")
+
+
 # ─── hermes mcp configure ────────────────────────────────────────────────────

 def cmd_mcp_configure(args):
@@ -696,6 +755,7 @@ def mcp_command(args):
        "test": cmd_mcp_test,
        "configure": cmd_mcp_configure,
        "config": cmd_mcp_configure,
+        "login": cmd_mcp_login,
    }

    handler = handlers.get(action)
@@ -713,4 +773,5 @@ def mcp_command(args):
        _info("hermes mcp list                               List servers")
        _info("hermes mcp test <name>                        Test connection")
        _info("hermes mcp configure <name>                   Toggle tools")
+        _info("hermes mcp login <name>                       Re-authenticate OAuth")
        print()
@@ -58,9 +58,11 @@ def _prompt(label: str, default: str | None = None, secret: bool = False) -> str
 def _install_dependencies(provider_name: str) -> None:
    """Install pip dependencies declared in plugin.yaml."""
    import subprocess
-    from pathlib import Path as _Path
+    from plugins.memory import find_provider_dir

-    plugin_dir = _Path(__file__).parent.parent / "plugins" / "memory" / provider_name
+    plugin_dir = find_provider_dir(provider_name)
+    if not plugin_dir:
+        return
    yaml_path = plugin_dir / "plugin.yaml"
    if not yaml_path.exists():
        return
@@ -324,6 +326,9 @@ def cmd_setup(args) -> None:
                val = _prompt(desc, default=str(effective_default) if effective_default else None)
                if val:
                    provider_config[key] = val
+                    # Also write to .env if this field has an env_var
+                    if env_var and env_var not in env_writes:
+                        env_writes[env_var] = val

    # Write activation key to config.yaml
    config["memory"]["provider"] = name
@@ -409,12 +414,13 @@ def cmd_status(args) -> None:
                    else:
                        print(f"  Status:    not available ✗")
                        schema = p.get_config_schema() if hasattr(p, "get_config_schema") else []
-                        secrets = [f for f in schema if f.get("secret")]
-                        if secrets:
+                        # Check all fields that have env_var (both secret and non-secret)
+                        required_fields = [f for f in schema if f.get("env_var")]
+                        if required_fields:
                            print(f"  Missing:")
-                            for s in secrets:
-                                env_var = s.get("env_var", "")
-                                url = s.get("url", "")
+                            for f in required_fields:
+                                env_var = f.get("env_var", "")
+                                url = f.get("url", "")
                                is_set = bool(os.environ.get(env_var))
                                mark = "✓" if is_set else "✗"
                                line = f"    {mark} {env_var}"
@@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
+    "ollama-cloud",
    "custom",
 })

@@ -274,6 +274,11 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
    is_global = False
    explicit_provider = ""

+    # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
+    # A single Unicode dash before a flag keyword becomes "--"
+    import re as _re
+    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
+
    # Extract --global
    if "--global" in raw_args:
        is_global = True
@@ -452,6 +457,7 @@ def switch_model(
        ModelSwitchResult with all information the caller needs.
    """
    from hermes_cli.models import (
+        copilot_model_api_mode,
        detect_provider_for_model,
        validate_requested_model,
        opencode_model_api_mode,
@@ -705,14 +711,38 @@ def switch_model(
            error_message=msg,
        )

+    # Apply auto-correction if validation found a closer match
+    if validation.get("corrected_model"):
+        new_model = validation["corrected_model"]
+
+    # --- Copilot api_mode override ---
+    if target_provider in {"copilot", "github-copilot"}:
+        api_mode = copilot_model_api_mode(new_model, api_key=api_key)
+
    # --- OpenCode api_mode override ---
-    if target_provider in {"opencode-zen", "opencode-go", "opencode", "opencode-go"}:
+    if target_provider in {"opencode-zen", "opencode-go", "opencode"}:
        api_mode = opencode_model_api_mode(target_provider, new_model)

    # --- Determine api_mode if not already set ---
    if not api_mode:
        api_mode = determine_api_mode(target_provider, base_url)

+    # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
+    # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
+    # trailing /v1 so the SDK constructs the correct path (e.g.
+    # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
+    # Mirrors the same logic in hermes_cli.runtime_provider.resolve_runtime_provider;
+    # without it, /model switches into an anthropic_messages-routed OpenCode
+    # model (e.g. `/model minimax-m2.7` on opencode-go, `/model claude-sonnet-4-6`
+    # on opencode-zen) hit a double /v1 and returned OpenCode's website 404 page.
+    if (
+        api_mode == "anthropic_messages"
+        and target_provider in {"opencode-zen", "opencode-go"}
+        and isinstance(base_url, str)
+        and base_url
+    ):
+        base_url = re.sub(r"/v1/?$", "", base_url)
+
    # --- Get capabilities (legacy) ---
    capabilities = get_model_capabilities(target_provider, new_model)

@@ -782,7 +812,8 @@ def list_authenticated_providers(
    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS

    results: List[dict] = []
-    seen_slugs: set = set()
+    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
+    seen_mdev_ids: set = set()  # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)

    data = fetch_models_dev()

@@ -792,9 +823,18 @@ def list_authenticated_providers(
    # "nous" shares OpenRouter's curated list if not separately defined
    if "nous" not in curated:
        curated["nous"] = curated["openrouter"]
+    # Ollama Cloud uses dynamic discovery (no static curated list)
+    if "ollama-cloud" not in curated:
+        from hermes_cli.models import fetch_ollama_cloud_models
+        curated["ollama-cloud"] = fetch_ollama_cloud_models()

    # --- 1. Check Hermes-mapped providers ---
    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
+        # Skip aliases that map to the same models.dev provider (e.g.
+        # kimi-coding and kimi-coding-cn both → kimi-for-coding).
+        # The first one with valid credentials wins (#10526).
+        if mdev_id in seen_mdev_ids:
+            continue
        pdata = data.get(mdev_id)
        if not isinstance(pdata, dict):
            continue
@@ -833,7 +873,8 @@ def list_authenticated_providers(
            "total_models": total,
            "source": "built-in",
        })
-        seen_slugs.add(slug)
+        seen_slugs.add(slug.lower())
+        seen_mdev_ids.add(mdev_id)

    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
    from hermes_cli.providers import HERMES_OVERLAYS
@@ -845,12 +886,12 @@ def list_authenticated_providers(
    _mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}

    for pid, overlay in HERMES_OVERLAYS.items():
-        if pid in seen_slugs:
+        if pid.lower() in seen_slugs:
            continue

        # Resolve Hermes slug — e.g. "github-copilot" → "copilot"
        hermes_slug = _mdev_to_hermes.get(pid, pid)
-        if hermes_slug in seen_slugs:
+        if hermes_slug.lower() in seen_slugs:
            continue

        # Check if credentials exist
@@ -931,8 +972,8 @@ def list_authenticated_providers(
            "total_models": total,
            "source": "hermes",
        })
-        seen_slugs.add(pid)
-        seen_slugs.add(hermes_slug)
+        seen_slugs.add(pid.lower())
+        seen_slugs.add(hermes_slug.lower())

    # --- 2b. Cross-check canonical provider list ---
    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
@@ -944,7 +985,7 @@ def list_authenticated_providers(
        _canon_provs = []

    for _cp in _canon_provs:
-        if _cp.slug in seen_slugs:
+        if _cp.slug.lower() in seen_slugs:
            continue

        # Check credentials via PROVIDER_REGISTRY (auth.py)
@@ -991,7 +1032,7 @@ def list_authenticated_providers(
            "total_models": _cp_total,
            "source": "canonical",
        })
-        seen_slugs.add(_cp.slug)
+        seen_slugs.add(_cp.slug.lower())

    # --- 3. User-defined endpoints from config ---
    if user_providers and isinstance(user_providers, dict):
@@ -1064,7 +1105,7 @@ def list_authenticated_providers(
                groups[slug]["models"].append(default_model)

        for slug, grp in groups.items():
-            if slug in seen_slugs:
+            if slug.lower() in seen_slugs:
                continue
            results.append({
                "slug": slug,
@@ -1076,11 +1117,9 @@ def list_authenticated_providers(
                "source": "user-config",
                "api_url": grp["api_url"],
            })
-            seen_slugs.add(slug)
+            seen_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))

    return results
-
-
@@ -11,7 +11,9 @@ import json
 import os
 import urllib.request
 import urllib.error
+import time
 from difflib import get_close_matches
+from pathlib import Path
 from typing import Any, NamedTuple, Optional

 COPILOT_BASE_URL = "https://api.githubcopilot.com"
@@ -24,11 +26,13 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("anthropic/claude-opus-4.6",       "recommended"),
+    ("anthropic/claude-opus-4.7",       "recommended"),
+    ("anthropic/claude-opus-4.6",       ""),
    ("anthropic/claude-sonnet-4.6",     ""),
    ("qwen/qwen3.6-plus",               ""),
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
+    ("openrouter/elephant-alpha",       "free"),
    ("openai/gpt-5.4",                  ""),
    ("openai/gpt-5.4-mini",             ""),
    ("xiaomi/mimo-v2-pro",               ""),
@@ -43,6 +47,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("minimax/minimax-m2.7",            ""),
    ("minimax/minimax-m2.5",            ""),
    ("z-ai/glm-5.1",                    ""),
+    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
    ("moonshotai/kimi-k2.5",            ""),
    ("x-ai/grok-4.20",                  ""),
@@ -71,6 +76,7 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "xiaomi/mimo-v2-pro",
+        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
@@ -88,6 +94,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "minimax/minimax-m2.7",
        "minimax/minimax-m2.5",
        "z-ai/glm-5.1",
+        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
        "moonshotai/kimi-k2.5",
        "x-ai/grok-4.20-beta",
@@ -97,6 +104,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.4-pro",
        "openai/gpt-5.4-nano",
+        "openrouter/elephant-alpha",
    ],
    "openai-codex": _codex_curated_models(),
    "copilot-acp": [
@@ -129,26 +137,23 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gemma-4-31b-it",
        "gemma-4-26b-it",
    ],
+    "google-gemini-cli": [
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+    ],
    "zai": [
        "glm-5.1",
        "glm-5",
+        "glm-5v-turbo",
        "glm-5-turbo",
        "glm-4.7",
        "glm-4.5",
        "glm-4.5-flash",
    ],
    "xai": [
-        "grok-4.20-0309-reasoning",
-        "grok-4.20-0309-non-reasoning",
-        "grok-4.20-multi-agent-0309",
+        "grok-4.20-reasoning",
        "grok-4-1-fast-reasoning",
-        "grok-4-1-fast-non-reasoning",
-        "grok-4-fast-reasoning",
-        "grok-4-fast-non-reasoning",
-        "grok-4-0709",
-        "grok-code-fast-1",
-        "grok-3",
-        "grok-3-mini",
    ],
    "kimi-coding": [
        "kimi-for-coding",
@@ -183,6 +188,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "MiniMax-M2",
    ],
    "anthropic": [
+        "claude-opus-4-7",
        "claude-opus-4-6",
        "claude-sonnet-4-6",
        "claude-opus-4-5-20251101",
@@ -244,6 +250,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "big-pickle",
    ],
    "opencode-go": [
+        "glm-5.1",
        "glm-5",
        "kimi-k2.5",
        "mimo-v2-pro",
@@ -298,6 +305,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "XiaomiMiMo/MiMo-V2-Flash",
        "moonshotai/Kimi-K2-Thinking",
    ],
+    # AWS Bedrock — static fallback list used when dynamic discovery is
+    # unavailable (no boto3, no credentials, or API error).  The agent
+    # prefers live discovery via ListFoundationModels + ListInferenceProfiles.
+    # Use inference profile IDs (us.*) since most models require them.
+    "bedrock": [
+        "us.anthropic.claude-sonnet-4-6",
+        "us.anthropic.claude-opus-4-6-v1",
+        "us.anthropic.claude-haiku-4-5-20251001-v1:0",
+        "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
+        "us.amazon.nova-pro-v1:0",
+        "us.amazon.nova-lite-v1:0",
+        "us.amazon.nova-micro-v1:0",
+        "deepseek.v3.2",
+        "us.meta.llama4-maverick-17b-instruct-v1:0",
+        "us.meta.llama4-scout-17b-instruct-v1:0",
+    ],
 }

 # ---------------------------------------------------------------------------
@@ -512,31 +535,35 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
+    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
    ProviderEntry("copilot-acp",    "GitHub Copilot ACP",       "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
    ProviderEntry("huggingface",    "Hugging Face",             "Hugging Face Inference Providers (20+ open models)"),
    ProviderEntry("gemini",         "Google AI Studio",         "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
+    ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)",   "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
    ProviderEntry("deepseek",       "DeepSeek",                 "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
    ProviderEntry("xai",            "xAI",                      "xAI (Grok models — direct API)"),
    ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
-    ProviderEntry("kimi-coding",    "Kimi / Moonshot",          "Kimi / Moonshot (Moonshot AI direct API)"),
+    ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
-    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
+    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
-    ProviderEntry("ai-gateway",     "AI Gateway",               "AI Gateway (Vercel — 200+ models, pay-per-use)"),
+    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, pay-per-use)"),
+    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
 ]

 # Derived dicts — used throughout the codebase
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider

+
 _PROVIDER_ALIASES = {
    "glm": "zai",
    "z-ai": "zai",
@@ -577,14 +604,22 @@ _PROVIDER_ALIASES = {
    "qwen": "alibaba",
    "alibaba-cloud": "alibaba",
    "qwen-portal": "qwen-oauth",
+    "gemini-cli": "google-gemini-cli",
+    "gemini-oauth": "google-gemini-cli",
    "hf": "huggingface",
    "hugging-face": "huggingface",
    "huggingface-hub": "huggingface",
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
+    "aws": "bedrock",
+    "aws-bedrock": "bedrock",
+    "amazon-bedrock": "bedrock",
+    "amazon": "bedrock",
    "grok": "xai",
    "x-ai": "xai",
    "x.ai": "xai",
+    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
+    "ollama_cloud": "ollama-cloud",
 }


@@ -1021,7 +1056,7 @@ def detect_provider_for_model(
            return (resolved_provider, default_models[0])

    # Aggregators list other providers' models — never auto-switch TO them
-    _AGGREGATORS = {"nous", "openrouter"}
+    _AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}

    # If the model belongs to the current provider's catalog, don't suggest switching
    current_models = _PROVIDER_MODELS.get(current_provider, [])
@@ -1038,7 +1073,8 @@ def detect_provider_for_model(
            break

    if direct_match:
-        # Check if we have credentials for this provider
+        # Check if we have credentials for this provider — env vars,
+        # credential pool, or auth store entries.
        has_creds = False
        try:
            from hermes_cli.auth import PROVIDER_REGISTRY
@@ -1051,16 +1087,28 @@ def detect_provider_for_model(
                        break
        except Exception:
            pass
+        # Also check credential pool and auth store — covers OAuth,
+        # Claude Code tokens, and other non-env-var credentials (#10300).
+        if not has_creds:
+            try:
+                from agent.credential_pool import load_pool
+                pool = load_pool(direct_match)
+                if pool.has_credentials():
+                    has_creds = True
+            except Exception:
+                pass
+        if not has_creds:
+            try:
+                from hermes_cli.auth import _load_auth_store
+                store = _load_auth_store()
+                if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
+                    has_creds = True
+            except Exception:
+                pass

-        if has_creds:
-            return (direct_match, name)
-
-        # No direct creds — try to find this model on OpenRouter instead
-        or_slug = _find_openrouter_slug(name)
-        if or_slug:
-            return ("openrouter", or_slug)
-        # Still return the direct provider — credential resolution will
-        # give a clear error rather than silently using the wrong provider
+        # Always return the direct provider match.  If credentials are
+        # missing, the client init will give a clear error rather than
+        # silently routing through the wrong provider (#10300).
        return (direct_match, name)

    # --- Step 2: check OpenRouter catalog ---
@@ -1250,6 +1298,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
        live = _fetch_ai_gateway_models()
        if live:
            return live
+    if normalized == "ollama-cloud":
+        live = fetch_ollama_cloud_models(force_refresh=force_refresh)
+        if live:
+            return live
    if normalized == "custom":
        base_url = _get_custom_base_url()
        if base_url:
@@ -1534,6 +1586,11 @@ def copilot_model_api_mode(
    primary signal.  Falls back to the catalog's ``supported_endpoints``
    only for models not covered by the pattern check.
    """
+    # Fetch the catalog once so normalize + endpoint check share it
+    # (avoids two redundant network calls for non-GPT-5 models).
+    if catalog is None and api_key:
+        catalog = fetch_github_model_catalog(api_key=api_key)
+
    normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
    if not normalized:
        return "chat_completions"
@@ -1543,9 +1600,6 @@ def copilot_model_api_mode(
        return "codex_responses"

    # Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
-    if catalog is None and api_key:
-        catalog = fetch_github_model_catalog(api_key=api_key)
-
    if catalog:
        catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
        if isinstance(catalog_entry, dict):
@@ -1760,6 +1814,125 @@ def fetch_api_models(
    return probe_api_models(api_key, base_url, timeout=timeout).get("models")


+# ---------------------------------------------------------------------------
+# Ollama Cloud — merged model discovery with disk cache
+# ---------------------------------------------------------------------------
+
+
+
+_OLLAMA_CLOUD_CACHE_TTL = 3600  # 1 hour
+
+
+def _ollama_cloud_cache_path() -> Path:
+    """Return the path for the Ollama Cloud model cache."""
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "ollama_cloud_models_cache.json"
+
+
+def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]:
+    """Load cached Ollama Cloud models from disk.
+
+    Args:
+        ignore_ttl: If True, return data even if the TTL has expired (stale fallback).
+    """
+    try:
+        cache_path = _ollama_cloud_cache_path()
+        if not cache_path.exists():
+            return None
+        with open(cache_path, encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return None
+        models = data.get("models")
+        if not (isinstance(models, list) and models):
+            return None
+        if not ignore_ttl:
+            cached_at = data.get("cached_at", 0)
+            if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL:
+                return None  # stale
+        return data
+    except Exception:
+        pass
+    return None
+
+
+def _save_ollama_cloud_cache(models: list[str]) -> None:
+    """Persist the merged Ollama Cloud model list to disk."""
+    try:
+        from utils import atomic_json_write
+        cache_path = _ollama_cloud_cache_path()
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None)
+    except Exception:
+        pass
+
+
+def fetch_ollama_cloud_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    *,
+    force_refresh: bool = False,
+) -> list[str]:
+    """Fetch Ollama Cloud models by merging live API + models.dev, with disk cache.
+
+    Resolution order:
+      1. Disk cache (if fresh, < 1 hour, and not force_refresh)
+      2. Live ``/v1/models`` endpoint (primary — freshest source)
+      3. models.dev registry (secondary — fills gaps for unlisted models)
+      4. Merge: live models first, then models.dev additions (deduped)
+
+    Returns a list of model IDs (never None — empty list on total failure).
+    """
+    # 1. Check disk cache
+    if not force_refresh:
+        cached = _load_ollama_cloud_cache()
+        if cached is not None:
+            return cached["models"]
+
+    # 2. Live API probe
+    if not api_key:
+        api_key = os.getenv("OLLAMA_API_KEY", "")
+    if not base_url:
+        base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1"
+
+    live_models: list[str] = []
+    if api_key:
+        result = fetch_api_models(api_key, base_url, timeout=8.0)
+        if result:
+            live_models = result
+
+    # 3. models.dev registry
+    mdev_models: list[str] = []
+    try:
+        from agent.models_dev import list_agentic_models
+        mdev_models = list_agentic_models("ollama-cloud")
+    except Exception:
+        pass
+
+    # 4. Merge: live first, then models.dev additions (deduped, order-preserving)
+    if live_models or mdev_models:
+        seen: set[str] = set()
+        merged: list[str] = []
+        for m in live_models:
+            if m and m not in seen:
+                seen.add(m)
+                merged.append(m)
+        for m in mdev_models:
+            if m and m not in seen:
+                seen.add(m)
+                merged.append(m)
+        if merged:
+            _save_ollama_cloud_cache(merged)
+            return merged
+
+    # Total failure — return stale cache if available (ignore TTL)
+    stale = _load_ollama_cloud_cache(ignore_ttl=True)
+    if stale is not None:
+        return stale["models"]
+
+    return []
+
+
 def validate_requested_model(
    model_name: str,
    provider: Optional[str],
@@ -1818,6 +1991,17 @@ def validate_requested_model(
                    "message": None,
                }

+            # Auto-correct if the top match is very similar (e.g. typo)
+            auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9)
+            if auto:
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "corrected_model": auto[0],
+                    "message": f"Auto-corrected `{requested}` → `{auto[0]}`",
+                }
+
            suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
            suggestion_text = ""
            if suggestions:
@@ -1869,6 +2053,16 @@ def validate_requested_model(
                    "recognized": True,
                    "message": None,
                }
+            # Auto-correct if the top match is very similar (e.g. typo)
+            auto = get_close_matches(requested_for_lookup, codex_models, n=1, cutoff=0.9)
+            if auto:
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "corrected_model": auto[0],
+                    "message": f"Auto-corrected `{requested}` → `{auto[0]}`",
+                }
            suggestions = get_close_matches(requested_for_lookup, codex_models, n=3, cutoff=0.5)
            suggestion_text = ""
            if suggestions:
@@ -1901,6 +2095,18 @@ def validate_requested_model(
            # the user may have access to models not shown in the public
            # listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding
            # endpoints even though it's not in /models).  Warn but allow.
+
+            # Auto-correct if the top match is very similar (e.g. typo)
+            auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9)
+            if auto:
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "corrected_model": auto[0],
+                    "message": f"Auto-corrected `{requested}` → `{auto[0]}`",
+                }
+
            suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
            suggestion_text = ""
            if suggestions:
@@ -1919,6 +2125,42 @@ def validate_requested_model(

    # api_models is None — couldn't reach API.  Accept and persist,
    # but warn so typos don't silently break things.
+
+    # Bedrock: use our own discovery instead of HTTP /models endpoint.
+    # Bedrock's bedrock-runtime URL doesn't support /models — it uses the
+    # AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
+    if normalized == "bedrock":
+        try:
+            from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
+            region = resolve_bedrock_region()
+            discovered = discover_bedrock_models(region)
+            discovered_ids = {m["id"] for m in discovered}
+            if requested in discovered_ids:
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "message": None,
+                }
+            # Not in discovered list — still accept (user may have custom
+            # inference profiles or cross-account access), but warn.
+            suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
+            suggestion_text = ""
+            if suggestions:
+                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": False,
+                "message": (
+                    f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
+                    f"It may still work with custom inference profiles or cross-account access."
+                    f"{suggestion_text}"
+                ),
+            }
+        except Exception:
+            pass  # Fall through to generic warning
+
    provider_label = _PROVIDER_LABELS.get(normalized, normalized)
    return {
        "accepted": True,
@@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
        "openai": "OpenAI TTS",
        "elevenlabs": "ElevenLabs",
        "edge": "Edge TTS",
+        "xai": "xAI TTS",
        "mistral": "Mistral Voxtral TTS",
        "neutts": "NeuTTS",
    }
@@ -257,6 +258,15 @@ def get_nous_subscription_features(
        terminal_cfg.get("modal_mode")
    )

+    # use_gateway flags — when True, the user explicitly opted into the
+    # Tool Gateway via `hermes model`, so direct credentials should NOT
+    # prevent gateway routing.
+    web_use_gateway = bool(web_cfg.get("use_gateway"))
+    tts_use_gateway = bool(tts_cfg.get("use_gateway"))
+    browser_use_gateway = bool(browser_cfg.get("use_gateway"))
+    image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
+    image_use_gateway = bool(image_gen_cfg.get("use_gateway"))
+
    direct_exa = bool(get_env_value("EXA_API_KEY"))
    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
@@ -269,6 +279,21 @@ def get_nous_subscription_features(
    direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
    direct_modal = has_direct_modal_credentials()

+    # When use_gateway is set, suppress direct credentials for managed detection
+    if web_use_gateway:
+        direct_firecrawl = False
+        direct_exa = False
+        direct_parallel = False
+        direct_tavily = False
+    if image_use_gateway:
+        direct_fal = False
+    if tts_use_gateway:
+        direct_openai_tts = False
+        direct_elevenlabs = False
+    if browser_use_gateway:
+        direct_browser_use = False
+        direct_browserbase = False
+
    managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
    managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
@@ -439,37 +464,7 @@ def get_nous_subscription_features(
    )


-def get_nous_subscription_explainer_lines() -> list[str]:
-    if not managed_nous_tools_enabled():
-        return []

-    return [
-        "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.",
-        "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.",
-        "Change these later with: hermes setup tools, hermes setup terminal, or hermes status.",
-    ]
-
-
-def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]:
-    """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`."""
-    if not managed_nous_tools_enabled():
-        return set()
-
-    features = get_nous_subscription_features(config)
-    if not features.provider_is_nous:
-        return set()
-
-    tts_cfg = config.get("tts")
-    if not isinstance(tts_cfg, dict):
-        tts_cfg = {}
-        config["tts"] = tts_cfg
-
-    current_tts = str(tts_cfg.get("provider") or "edge").strip().lower()
-    if current_tts not in {"", "edge"}:
-        return set()
-
-    tts_cfg["provider"] = "openai"
-    return {"tts"}


 def apply_nous_managed_defaults(
@@ -529,3 +524,255 @@ def apply_nous_managed_defaults(
        changed.add("image_gen")

    return changed
+
+
+# ---------------------------------------------------------------------------
+# Tool Gateway offer — single Y/n prompt after model selection
+# ---------------------------------------------------------------------------
+
+_GATEWAY_TOOL_LABELS = {
+    "web": "Web search & extract (Firecrawl)",
+    "image_gen": "Image generation (FAL)",
+    "tts": "Text-to-speech (OpenAI TTS)",
+    "browser": "Browser automation (Browser Use)",
+}
+
+
+def _get_gateway_direct_credentials() -> Dict[str, bool]:
+    """Return a dict of tool_key -> has_direct_credentials."""
+    return {
+        "web": bool(
+            get_env_value("FIRECRAWL_API_KEY")
+            or get_env_value("FIRECRAWL_API_URL")
+            or get_env_value("PARALLEL_API_KEY")
+            or get_env_value("TAVILY_API_KEY")
+            or get_env_value("EXA_API_KEY")
+        ),
+        "image_gen": bool(get_env_value("FAL_KEY")),
+        "tts": bool(
+            resolve_openai_audio_api_key()
+            or get_env_value("ELEVENLABS_API_KEY")
+        ),
+        "browser": bool(
+            get_env_value("BROWSER_USE_API_KEY")
+            or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
+        ),
+    }
+
+
+_GATEWAY_DIRECT_LABELS = {
+    "web": "Firecrawl/Exa/Parallel/Tavily key",
+    "image_gen": "FAL key",
+    "tts": "OpenAI/ElevenLabs key",
+    "browser": "Browser Use/Browserbase key",
+}
+
+_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser")
+
+
+def get_gateway_eligible_tools(
+    config: Optional[Dict[str, object]] = None,
+) -> tuple[list[str], list[str], list[str]]:
+    """Return (unconfigured, has_direct, already_managed) tool key lists.
+
+    - unconfigured: tools with no direct credentials (easy switch)
+    - has_direct: tools where the user has their own API keys
+    - already_managed: tools already routed through the gateway
+
+    All lists are empty when the user is not a paid Nous subscriber or
+    is not using Nous as their provider.
+    """
+    if not managed_nous_tools_enabled():
+        return [], [], []
+
+    if config is None:
+        from hermes_cli.config import load_config
+        config = load_config() or {}
+
+    # Quick provider check without the heavy get_nous_subscription_features call
+    model_cfg = config.get("model")
+    if not isinstance(model_cfg, dict) or str(model_cfg.get("provider") or "").strip().lower() != "nous":
+        return [], [], []
+
+    direct = _get_gateway_direct_credentials()
+
+    # Check which tools the user has explicitly opted into the gateway for.
+    # This is distinct from managed_by_nous which fires implicitly when
+    # no direct keys exist — we only skip the prompt for tools where
+    # use_gateway was explicitly set.
+    opted_in = {
+        "web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")),
+        "image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")),
+        "tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")),
+        "browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")),
+    }
+
+    unconfigured: list[str] = []
+    has_direct: list[str] = []
+    already_managed: list[str] = []
+    for key in _ALL_GATEWAY_KEYS:
+        if opted_in.get(key):
+            already_managed.append(key)
+        elif direct.get(key):
+            has_direct.append(key)
+        else:
+            unconfigured.append(key)
+    return unconfigured, has_direct, already_managed
+
+
+def apply_gateway_defaults(
+    config: Dict[str, object],
+    tool_keys: list[str],
+) -> set[str]:
+    """Apply Tool Gateway config for the given tool keys.
+
+    Sets ``use_gateway: true`` in each tool's config section so the
+    runtime prefers the gateway even when direct API keys are present.
+
+    Returns the set of tools that were actually changed.
+    """
+    changed: set[str] = set()
+
+    web_cfg = config.get("web")
+    if not isinstance(web_cfg, dict):
+        web_cfg = {}
+        config["web"] = web_cfg
+
+    tts_cfg = config.get("tts")
+    if not isinstance(tts_cfg, dict):
+        tts_cfg = {}
+        config["tts"] = tts_cfg
+
+    browser_cfg = config.get("browser")
+    if not isinstance(browser_cfg, dict):
+        browser_cfg = {}
+        config["browser"] = browser_cfg
+
+    if "web" in tool_keys:
+        web_cfg["backend"] = "firecrawl"
+        web_cfg["use_gateway"] = True
+        changed.add("web")
+
+    if "tts" in tool_keys:
+        tts_cfg["provider"] = "openai"
+        tts_cfg["use_gateway"] = True
+        changed.add("tts")
+
+    if "browser" in tool_keys:
+        browser_cfg["cloud_provider"] = "browser-use"
+        browser_cfg["use_gateway"] = True
+        changed.add("browser")
+
+    if "image_gen" in tool_keys:
+        image_cfg = config.get("image_gen")
+        if not isinstance(image_cfg, dict):
+            image_cfg = {}
+            config["image_gen"] = image_cfg
+        image_cfg["use_gateway"] = True
+        changed.add("image_gen")
+
+    return changed
+
+
+def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
+    """If eligible tools exist, prompt the user to enable the Tool Gateway.
+
+    Uses prompt_choice() with a description parameter so the curses TUI
+    shows the tool context alongside the choices.
+
+    Returns the set of tools that were enabled, or empty set if the user
+    declined or no tools were eligible.
+    """
+    unconfigured, has_direct, already_managed = get_gateway_eligible_tools(config)
+    if not unconfigured and not has_direct:
+        return set()
+
+    try:
+        from hermes_cli.setup import prompt_choice
+    except Exception:
+        return set()
+
+    # Build description lines showing full status of all gateway tools
+    desc_parts: list[str] = [
+        "",
+        "  The Tool Gateway gives you access to web search, image generation,",
+        "  text-to-speech, and browser automation through your Nous subscription.",
+        "  No need to sign up for separate API keys — just pick the tools you want.",
+        "",
+    ]
+    if already_managed:
+        for k in already_managed:
+            desc_parts.append(f"  ✓ {_GATEWAY_TOOL_LABELS[k]} — using Tool Gateway")
+    if unconfigured:
+        for k in unconfigured:
+            desc_parts.append(f"  ○ {_GATEWAY_TOOL_LABELS[k]} — not configured")
+    if has_direct:
+        for k in has_direct:
+            desc_parts.append(f"  ○ {_GATEWAY_TOOL_LABELS[k]} — using {_GATEWAY_DIRECT_LABELS[k]}")
+
+    # Build short choice labels — detail is in the description above
+    choices: list[str] = []
+    choice_keys: list[str] = []  # maps choice index -> action
+
+    if unconfigured and has_direct:
+        choices.append("Enable for all tools (existing keys kept, not used)")
+        choice_keys.append("all")
+
+        choices.append("Enable only for tools without existing keys")
+        choice_keys.append("unconfigured")
+
+        choices.append("Skip")
+        choice_keys.append("skip")
+
+    elif unconfigured:
+        choices.append("Enable Tool Gateway")
+        choice_keys.append("unconfigured")
+
+        choices.append("Skip")
+        choice_keys.append("skip")
+
+    else:
+        choices.append("Enable Tool Gateway (existing keys kept, not used)")
+        choice_keys.append("all")
+
+        choices.append("Skip")
+        choice_keys.append("skip")
+
+    description = "\n".join(desc_parts) if desc_parts else None
+    # Default to "Enable" when user has no direct keys (new user),
+    # default to "Skip" when they have existing keys to preserve.
+    default_idx = 0 if not has_direct else len(choices) - 1
+
+    try:
+        idx = prompt_choice(
+            "Your Nous subscription includes the Tool Gateway.",
+            choices,
+            default_idx,
+            description=description,
+        )
+    except (KeyboardInterrupt, EOFError, OSError, SystemExit):
+        return set()
+
+    action = choice_keys[idx]
+    if action == "skip":
+        return set()
+
+    if action == "all":
+        # Apply to switchable tools + ensure already-managed tools also
+        # have use_gateway persisted in config for consistency.
+        to_apply = list(_ALL_GATEWAY_KEYS)
+    else:
+        to_apply = unconfigured
+
+    changed = apply_gateway_defaults(config, to_apply)
+    if changed:
+        from hermes_cli.config import save_config
+        save_config(config)
+        # Only report the tools that actually switched (not already-managed ones)
+        newly_switched = changed - set(already_managed)
+        for key in sorted(newly_switched):
+            label = _GATEWAY_TOOL_LABELS.get(key, key)
+            print(f"  ✓ {label}: enabled via Nous subscription")
+        if already_managed and not newly_switched:
+            print("  (all tools already using Tool Gateway)")
+    return changed
@@ -35,6 +35,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
    ("wecom",          PlatformInfo(label="💬 WeCom",           default_toolset="hermes-wecom")),
    ("wecom_callback", PlatformInfo(label="💬 WeCom Callback",  default_toolset="hermes-wecom-callback")),
    ("weixin",         PlatformInfo(label="💬 Weixin",          default_toolset="hermes-weixin")),
+    ("qqbot",          PlatformInfo(label="💬 QQBot",           default_toolset="hermes-qqbot")),
    ("webhook",        PlatformInfo(label="🔗 Webhook",         default_toolset="hermes-webhook")),
    ("api_server",     PlatformInfo(label="🌐 API Server",      default_toolset="hermes-api-server")),
 ])
@@ -112,6 +112,7 @@ class LoadedPlugin:
    module: Optional[types.ModuleType] = None
    tools_registered: List[str] = field(default_factory=list)
    hooks_registered: List[str] = field(default_factory=list)
+    commands_registered: List[str] = field(default_factory=list)
    enabled: bool = False
    error: Optional[str] = None

@@ -211,6 +212,84 @@ class PluginContext:
        }
        logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)

+    # -- slash command registration -------------------------------------------
+
+    def register_command(
+        self,
+        name: str,
+        handler: Callable,
+        description: str = "",
+    ) -> None:
+        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
+
+        The handler signature is ``fn(raw_args: str) -> str | None``.
+        It may also be an async callable — the gateway dispatch handles both.
+
+        Unlike ``register_cli_command()`` (which creates ``hermes <subcommand>``
+        terminal commands), this registers in-session slash commands that users
+        invoke during a conversation.
+
+        Names conflicting with built-in commands are rejected with a warning.
+        """
+        clean = name.lower().strip().lstrip("/").replace(" ", "-")
+        if not clean:
+            logger.warning(
+                "Plugin '%s' tried to register a command with an empty name.",
+                self.manifest.name,
+            )
+            return
+
+        # Reject if it conflicts with a built-in command
+        try:
+            from hermes_cli.commands import resolve_command
+            if resolve_command(clean) is not None:
+                logger.warning(
+                    "Plugin '%s' tried to register command '/%s' which conflicts "
+                    "with a built-in command. Skipping.",
+                    self.manifest.name, clean,
+                )
+                return
+        except Exception:
+            pass  # If commands module isn't available, skip the check
+
+        self._manager._plugin_commands[clean] = {
+            "handler": handler,
+            "description": description or "Plugin command",
+            "plugin": self.manifest.name,
+        }
+        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
+
+    # -- tool dispatch -------------------------------------------------------
+
+    def dispatch_tool(self, tool_name: str, args: dict, **kwargs) -> str:
+        """Dispatch a tool call through the registry, with parent agent context.
+
+        This is the public interface for plugin slash commands that need to call
+        tools like ``delegate_task`` without reaching into framework internals.
+        The parent agent (if available) is resolved automatically — plugins never
+        need to access the agent directly.
+
+        Args:
+            tool_name: Registry name of the tool (e.g. ``"delegate_task"``).
+            args: Tool arguments dict (same as what the model would pass).
+            **kwargs: Extra keyword args forwarded to the registry dispatch.
+
+        Returns:
+            JSON string from the tool handler (same format as model tool calls).
+        """
+        from tools.registry import registry
+
+        # Wire up parent agent context when available (CLI mode).
+        # In gateway mode _cli_ref is None — tools degrade gracefully
+        # (workspace hints fall back to TERMINAL_CWD, no spinner).
+        if "parent_agent" not in kwargs:
+            cli = self._manager._cli_ref
+            agent = getattr(cli, "agent", None) if cli else None
+            if agent is not None:
+                kwargs["parent_agent"] = agent
+
+        return registry.dispatch(tool_name, args, **kwargs)
+
    # -- context engine registration -----------------------------------------

    def register_context_engine(self, engine) -> None:
@@ -262,6 +341,53 @@ class PluginContext:
        self._manager._hooks.setdefault(hook_name, []).append(callback)
        logger.debug("Plugin %s registered hook: %s", self.manifest.name, hook_name)

+    # -- skill registration -------------------------------------------------
+
+    def register_skill(
+        self,
+        name: str,
+        path: Path,
+        description: str = "",
+    ) -> None:
+        """Register a read-only skill provided by this plugin.
+
+        The skill becomes resolvable as ``'<plugin_name>:<name>'`` via
+        ``skill_view()``.  It does **not** enter the flat
+        ``~/.hermes/skills/`` tree and is **not** listed in the system
+        prompt's ``<available_skills>`` index — plugin skills are
+        opt-in explicit loads only.
+
+        Raises:
+            ValueError: if *name* contains ``':'`` or invalid characters.
+            FileNotFoundError: if *path* does not exist.
+        """
+        from agent.skill_utils import _NAMESPACE_RE
+
+        if ":" in name:
+            raise ValueError(
+                f"Skill name '{name}' must not contain ':' "
+                f"(the namespace is derived from the plugin name "
+                f"'{self.manifest.name}' automatically)."
+            )
+        if not name or not _NAMESPACE_RE.match(name):
+            raise ValueError(
+                f"Invalid skill name '{name}'. Must match [a-zA-Z0-9_-]+."
+            )
+        if not path.exists():
+            raise FileNotFoundError(f"SKILL.md not found at {path}")
+
+        qualified = f"{self.manifest.name}:{name}"
+        self._manager._plugin_skills[qualified] = {
+            "path": path,
+            "plugin": self.manifest.name,
+            "bare_name": name,
+            "description": description,
+        }
+        logger.debug(
+            "Plugin %s registered skill: %s",
+            self.manifest.name, qualified,
+        )
+

 # ---------------------------------------------------------------------------
 # PluginManager
@@ -276,8 +402,11 @@ class PluginManager:
        self._plugin_tool_names: Set[str] = set()
        self._cli_commands: Dict[str, dict] = {}
        self._context_engine = None  # Set by a plugin via register_context_engine()
+        self._plugin_commands: Dict[str, dict] = {}  # Slash commands registered by plugins
        self._discovered: bool = False
        self._cli_ref = None  # Set by CLI after plugin discovery
+        # Plugin skill registry: qualified name → metadata dict.
+        self._plugin_skills: Dict[str, Dict[str, Any]] = {}

    # -----------------------------------------------------------------------
    # Public
@@ -436,6 +565,10 @@ class PluginManager:
                        for h in p.hooks_registered
                    }
                )
+                loaded.commands_registered = [
+                    c for c in self._plugin_commands
+                    if self._plugin_commands[c].get("plugin") == manifest.name
+                ]
                loaded.enabled = True

        except Exception as exc:
@@ -549,11 +682,34 @@ class PluginManager:
                    "enabled": loaded.enabled,
                    "tools": len(loaded.tools_registered),
                    "hooks": len(loaded.hooks_registered),
+                    "commands": len(loaded.commands_registered),
                    "error": loaded.error,
                }
            )
        return result

+    # -----------------------------------------------------------------------
+    # Plugin skill lookups
+    # -----------------------------------------------------------------------
+
+    def find_plugin_skill(self, qualified_name: str) -> Optional[Path]:
+        """Return the ``Path`` to a plugin skill's SKILL.md, or ``None``."""
+        entry = self._plugin_skills.get(qualified_name)
+        return entry["path"] if entry else None
+
+    def list_plugin_skills(self, plugin_name: str) -> List[str]:
+        """Return sorted bare names of all skills registered by *plugin_name*."""
+        prefix = f"{plugin_name}:"
+        return sorted(
+            e["bare_name"]
+            for qn, e in self._plugin_skills.items()
+            if qn.startswith(prefix)
+        )
+
+    def remove_plugin_skill(self, qualified_name: str) -> None:
+        """Remove a stale registry entry (silently ignores missing keys)."""
+        self._plugin_skills.pop(qualified_name, None)
+

 # ---------------------------------------------------------------------------
 # Module-level singleton & convenience functions
@@ -584,11 +740,64 @@ def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:



+def get_pre_tool_call_block_message(
+    tool_name: str,
+    args: Optional[Dict[str, Any]],
+    task_id: str = "",
+    session_id: str = "",
+    tool_call_id: str = "",
+) -> Optional[str]:
+    """Check ``pre_tool_call`` hooks for a blocking directive.
+
+    Plugins that need to enforce policy (rate limiting, security
+    restrictions, approval workflows) can return::
+
+        {"action": "block", "message": "Reason the tool was blocked"}
+
+    from their ``pre_tool_call`` callback.  The first valid block
+    directive wins.  Invalid or irrelevant hook return values are
+    silently ignored so existing observer-only hooks are unaffected.
+    """
+    hook_results = invoke_hook(
+        "pre_tool_call",
+        tool_name=tool_name,
+        args=args if isinstance(args, dict) else {},
+        task_id=task_id,
+        session_id=session_id,
+        tool_call_id=tool_call_id,
+    )
+
+    for result in hook_results:
+        if not isinstance(result, dict):
+            continue
+        if result.get("action") != "block":
+            continue
+        message = result.get("message")
+        if isinstance(message, str) and message:
+            return message
+
+    return None
+
+
 def get_plugin_context_engine():
    """Return the plugin-registered context engine, or None."""
    return get_plugin_manager()._context_engine


+def get_plugin_command_handler(name: str) -> Optional[Callable]:
+    """Return the handler for a plugin-registered slash command, or ``None``."""
+    entry = get_plugin_manager()._plugin_commands.get(name)
+    return entry["handler"] if entry else None
+
+
+def get_plugin_commands() -> Dict[str, dict]:
+    """Return the full plugin commands dict (name → {handler, description, plugin}).
+
+    Safe to call before discovery — returns an empty dict if no plugins loaded.
+    """
+    return get_plugin_manager()._plugin_commands
+
+
 def get_plugin_toolsets() -> List[tuple]:
    """Return plugin toolsets as ``(key, label, description)`` tuples.

@@ -608,7 +817,7 @@ def get_plugin_toolsets() -> List[tuple]:
    toolset_tools: Dict[str, List[str]] = {}
    toolset_plugin: Dict[str, LoadedPlugin] = {}
    for tool_name in manager._plugin_tool_names:
-        entry = registry._tools.get(tool_name)
+        entry = registry.get_entry(tool_name)
        if not entry:
            continue
        ts = entry.toolset
@@ -617,7 +826,7 @@ def get_plugin_toolsets() -> List[tuple]:
    # Map toolsets back to the plugin that registered them
    for _name, loaded in manager._plugins.items():
        for tool_name in loaded.tools_registered:
-            entry = registry._tools.get(tool_name)
+            entry = registry.get_entry(tool_name)
            if entry and entry.toolset in toolset_tools:
                toolset_plugin.setdefault(entry.toolset, loaded)

@@ -64,6 +64,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_override="https://portal.qwen.ai/v1",
        base_url_env_var="HERMES_QWEN_BASE_URL",
    ),
+    "google-gemini-cli": HermesOverlay(
+        transport="openai_chat",
+        auth_type="oauth_external",
+        base_url_override="cloudcode-pa://google",
+    ),
    "copilot-acp": HermesOverlay(
        transport="codex_responses",
        auth_type="external_process",
@@ -128,7 +133,7 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_env_var="HF_BASE_URL",
    ),
    "xai": HermesOverlay(
-        transport="openai_chat",
+        transport="codex_responses",
        base_url_override="https://api.x.ai/v1",
        base_url_env_var="XAI_BASE_URL",
    ),
@@ -141,6 +146,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_override="https://api.arcee.ai/api/v1",
        base_url_env_var="ARCEE_BASE_URL",
    ),
+    "ollama-cloud": HermesOverlay(
+        transport="openai_chat",
+        base_url_env_var="OLLAMA_BASE_URL",
+    ),
 }


@@ -180,6 +189,7 @@ ALIASES: Dict[str, str] = {
    # xai
    "x-ai": "xai",
    "x.ai": "xai",
+    "grok": "xai",

    # kimi-for-coding (models.dev ID)
    "kimi": "kimi-for-coding",
@@ -227,6 +237,11 @@ ALIASES: Dict[str, str] = {
    "qwen": "alibaba",
    "alibaba-cloud": "alibaba",

+    # google-gemini-cli (OAuth + Code Assist)
+    "gemini-cli": "google-gemini-cli",
+    "gemini-oauth": "google-gemini-cli",
+
+
    # huggingface
    "hf": "huggingface",
    "hugging-face": "huggingface",
@@ -236,6 +251,12 @@ ALIASES: Dict[str, str] = {
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",

+    # bedrock
+    "aws": "bedrock",
+    "aws-bedrock": "bedrock",
+    "amazon-bedrock": "bedrock",
+    "amazon": "bedrock",
+
    # arcee
    "arcee-ai": "arcee",
    "arceeai": "arcee",
@@ -244,7 +265,7 @@ ALIASES: Dict[str, str] = {
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
    "lm_studio": "lmstudio",
-    "ollama": "ollama-cloud",
+    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "vllm": "local",
    "llamacpp": "local",
    "llama.cpp": "local",
@@ -262,6 +283,8 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "xiaomi": "Xiaomi MiMo",
    "local": "Local endpoint",
+    "bedrock": "AWS Bedrock",
+    "ollama-cloud": "Ollama Cloud",
 }


@@ -271,6 +294,7 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = {
    "openai_chat": "chat_completions",
    "anthropic_messages": "anthropic_messages",
    "codex_responses": "codex_responses",
+    "bedrock_converse": "bedrock_converse",
 }


@@ -388,6 +412,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    if pdef is not None:
        return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")

+    # Direct provider checks for providers not in HERMES_OVERLAYS
+    if provider == "bedrock":
+        return "bedrock_converse"
+
    # URL-based heuristics for custom / unknown providers
    if base_url:
        url_lower = base_url.rstrip("/").lower()
@@ -395,6 +423,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
            return "anthropic_messages"
        if "api.openai.com" in url_lower:
            return "codex_responses"
+        if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
+            return "bedrock_converse"

    return "chat_completions"

@@ -22,6 +22,7 @@ from hermes_cli.auth import (
    resolve_nous_runtime_credentials,
    resolve_codex_runtime_credentials,
    resolve_qwen_runtime_credentials,
+    resolve_gemini_oauth_runtime_credentials,
    resolve_api_key_provider_credentials,
    resolve_external_process_provider_credentials,
    has_usable_secret,
@@ -41,6 +42,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
    tool calls with reasoning (chat/completions returns 400).
    """
    normalized = (base_url or "").strip().lower().rstrip("/")
+    if "api.x.ai" in normalized:
+        return "codex_responses"
    if "api.openai.com" in normalized and "openrouter" not in normalized:
        return "codex_responses"
    return None
@@ -124,7 +127,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
        return "chat_completions"


-_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"}
+_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}


 def _parse_api_mode(raw: Any) -> Optional[str]:
@@ -154,6 +157,9 @@ def _resolve_runtime_from_pool_entry(
    elif provider == "qwen-oauth":
        api_mode = "chat_completions"
        base_url = base_url or DEFAULT_QWEN_BASE_URL
+    elif provider == "google-gemini-cli":
+        api_mode = "chat_completions"
+        base_url = base_url or "cloudcode-pa://google"
    elif provider == "anthropic":
        api_mode = "anthropic_messages"
        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
@@ -163,10 +169,13 @@ def _resolve_runtime_from_pool_entry(
        base_url = cfg_base_url or base_url or "https://api.anthropic.com"
    elif provider == "openrouter":
        base_url = base_url or OPENROUTER_BASE_URL
+    elif provider == "xai":
+        api_mode = "codex_responses"
    elif provider == "nous":
        api_mode = "chat_completions"
    elif provider == "copilot":
        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
+        base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
    else:
        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
        # Honour model.base_url from config.yaml when the configured provider
@@ -287,6 +296,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            # Resolve the API key from the env var name stored in key_env
            key_env = str(entry.get("key_env", "") or "").strip()
            resolved_api_key = os.getenv(key_env, "").strip() if key_env else ""
+            # Fall back to inline api_key when key_env is absent or unresolvable
+            if not resolved_api_key:
+                resolved_api_key = str(entry.get("api_key", "") or "").strip()

            if requested_norm in {ep_name, name_norm, f"custom:{name_norm}"}:
                # Found match by provider key
@@ -624,6 +636,8 @@ def _resolve_explicit_runtime(
        api_mode = "chat_completions"
        if provider == "copilot":
            api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
+        elif provider == "xai":
+            api_mode = "codex_responses"
        else:
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
            if configured_mode:
@@ -794,6 +808,26 @@ def resolve_runtime_provider(
            logger.info("Qwen OAuth credentials failed; "
                        "falling through to next provider.")

+    if provider == "google-gemini-cli":
+        try:
+            creds = resolve_gemini_oauth_runtime_credentials()
+            return {
+                "provider": "google-gemini-cli",
+                "api_mode": "chat_completions",
+                "base_url": creds.get("base_url", ""),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "google-oauth"),
+                "expires_at_ms": creds.get("expires_at_ms"),
+                "email": creds.get("email", ""),
+                "project_id": creds.get("project_id", ""),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            logger.info("Google Gemini OAuth credentials failed; "
+                        "falling through to next provider.")
+
    if provider == "copilot-acp":
        creds = resolve_external_process_provider_credentials(provider)
        return {
@@ -833,6 +867,77 @@ def resolve_runtime_provider(
            "requested_provider": requested_provider,
        }

+    # AWS Bedrock (native Converse API via boto3)
+    if provider == "bedrock":
+        from agent.bedrock_adapter import (
+            has_aws_credentials,
+            resolve_aws_auth_env_var,
+            resolve_bedrock_region,
+            is_anthropic_bedrock_model,
+        )
+        # When the user explicitly selected bedrock (not auto-detected),
+        # trust boto3's credential chain — it handles IMDS, ECS task roles,
+        # Lambda execution roles, SSO, and other implicit sources that our
+        # env-var check can't detect.
+        is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
+        if not is_explicit and not has_aws_credentials():
+            raise AuthError(
+                "No AWS credentials found for Bedrock. Configure one of:\n"
+                "  - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n"
+                "  - AWS_PROFILE (for SSO / named profiles)\n"
+                "  - IAM instance role (EC2, ECS, Lambda)\n"
+                "Or run 'aws configure' to set up credentials.",
+                code="no_aws_credentials",
+            )
+        # Read bedrock-specific config from config.yaml
+        from hermes_cli.config import load_config as _load_bedrock_config
+        _bedrock_cfg = _load_bedrock_config().get("bedrock", {})
+        # Region priority: config.yaml bedrock.region → env var → us-east-1
+        region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
+        auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
+        # Build guardrail config if configured
+        _gr = _bedrock_cfg.get("guardrail", {})
+        guardrail_config = None
+        if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
+            guardrail_config = {
+                "guardrailIdentifier": _gr["guardrail_identifier"],
+                "guardrailVersion": _gr["guardrail_version"],
+            }
+            if _gr.get("stream_processing_mode"):
+                guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
+            if _gr.get("trace"):
+                guardrail_config["trace"] = _gr["trace"]
+        # Dual-path routing: Claude models use AnthropicBedrock SDK for full
+        # feature parity (prompt caching, thinking budgets, adaptive thinking).
+        # Non-Claude models use the Converse API for multi-model support.
+        _current_model = str(model_cfg.get("default") or "").strip()
+        if is_anthropic_bedrock_model(_current_model):
+            # Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path
+            runtime = {
+                "provider": "bedrock",
+                "api_mode": "anthropic_messages",
+                "base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
+                "api_key": "aws-sdk",
+                "source": auth_source,
+                "region": region,
+                "bedrock_anthropic": True,  # Signal to use AnthropicBedrock client
+                "requested_provider": requested_provider,
+            }
+        else:
+            # Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API
+            runtime = {
+                "provider": "bedrock",
+                "api_mode": "bedrock_converse",
+                "base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
+                "api_key": "aws-sdk",
+                "source": auth_source,
+                "region": region,
+                "requested_provider": requested_provider,
+            }
+        if guardrail_config:
+            runtime["guardrail_config"] = guardrail_config
+        return runtime
+
    # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
@@ -849,6 +954,8 @@ def resolve_runtime_provider(
        api_mode = "chat_completions"
        if provider == "copilot":
            api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
+        elif provider == "xai":
+            api_mode = "codex_responses"
        else:
            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
            # Only honor persisted api_mode when it belongs to the same provider family.
@@ -20,10 +20,7 @@ import copy
 from pathlib import Path
 from typing import Optional, Dict, Any

-from hermes_cli.nous_subscription import (
-    apply_nous_provider_defaults,
-    get_nous_subscription_features,
-)
+from hermes_cli.nous_subscription import get_nous_subscription_features
 from tools.tool_backend_helpers import managed_nous_tools_enabled
 from hermes_constants import get_optional_skills_dir

@@ -105,7 +102,7 @@ _DEFAULT_PROVIDER_MODELS = {
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
+    "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -213,20 +210,20 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
        sys.exit(1)


-def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int:
+def _curses_prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
    """Single-select menu using curses. Delegates to curses_radiolist."""
    from hermes_cli.curses_ui import curses_radiolist
-    return curses_radiolist(question, choices, selected=default, cancel_returns=-1)
+    return curses_radiolist(question, choices, selected=default, cancel_returns=-1, description=description)



-def prompt_choice(question: str, choices: list, default: int = 0) -> int:
+def prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
    """Prompt for a choice from a list with arrow key navigation.

    Escape keeps the current default (skips the question).
    Ctrl+C exits the wizard.
    """
-    idx = _curses_prompt_choice(question, choices, default)
+    idx = _curses_prompt_choice(question, choices, default, description=description)
    if idx >= 0:
        if idx == default:
            print_info("  Skipped (keeping current)")
@@ -433,6 +430,8 @@ def _print_setup_summary(config: dict, hermes_home):
        tool_status.append(("Text-to-Speech (MiniMax)", True, None))
    elif tts_provider == "mistral" and get_env_value("MISTRAL_API_KEY"):
        tool_status.append(("Text-to-Speech (Mistral Voxtral)", True, None))
+    elif tts_provider == "gemini" and (get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY")):
+        tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
    elif tts_provider == "neutts":
        try:
            import importlib.util
@@ -776,7 +775,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
            "minimax": "MiniMax",
            "minimax-cn": "MiniMax CN",
            "anthropic": "Anthropic",
-            "ai-gateway": "AI Gateway",
+            "ai-gateway": "Vercel AI Gateway",
            "custom": "your custom endpoint",
        }
        _prov_display = _prov_names.get(selected_provider, selected_provider or "your provider")
@@ -835,14 +834,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
            print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")


-    if selected_provider == "nous" and nous_subscription_selected:
-        changed_defaults = apply_nous_provider_defaults(config)
-        current_tts = str(config.get("tts", {}).get("provider") or "edge")
-        if "tts" in changed_defaults:
-            print_success("TTS provider set to: OpenAI TTS via your Nous subscription")
-        else:
-            print_info(f"Keeping your existing TTS provider: {current_tts}")
-
+    # Tool Gateway prompt is already shown by _model_flow_nous() above.
    save_config(config)

    if not quick and selected_provider != "nous":
@@ -920,8 +912,10 @@ def _setup_tts_provider(config: dict):
        "edge": "Edge TTS",
        "elevenlabs": "ElevenLabs",
        "openai": "OpenAI TTS",
+        "xai": "xAI TTS",
        "minimax": "MiniMax TTS",
        "mistral": "Mistral Voxtral TTS",
+        "gemini": "Google Gemini TTS",
        "neutts": "NeuTTS",
    }
    current_label = provider_labels.get(current_provider, current_provider)
@@ -941,12 +935,14 @@ def _setup_tts_provider(config: dict):
            "Edge TTS (free, cloud-based, no setup needed)",
            "ElevenLabs (premium quality, needs API key)",
            "OpenAI TTS (good quality, needs API key)",
+            "xAI TTS (Grok voices, needs API key)",
            "MiniMax TTS (high quality with voice cloning, needs API key)",
            "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
+            "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
            "NeuTTS (local on-device, free, ~300MB model download)",
        ]
    )
-    providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
+    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
    choices.append(f"Keep current ({current_label})")
    keep_current_idx = len(choices) - 1
    idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -1012,6 +1008,23 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

+    elif selected == "xai":
+        existing = get_env_value("XAI_API_KEY")
+        if not existing:
+            print()
+            api_key = prompt("xAI API key for TTS", password=True)
+            if api_key:
+                save_env_value("XAI_API_KEY", api_key)
+                print_success("xAI TTS API key saved")
+            else:
+                from hermes_constants import display_hermes_home as _dhh
+                print_warning(
+                    "No xAI API key provided for TTS. Configure XAI_API_KEY via "
+                    f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
+                    "Falling back to Edge TTS."
+                )
+                selected = "edge"
+
    elif selected == "minimax":
        existing = get_env_value("MINIMAX_API_KEY")
        if not existing:
@@ -1036,6 +1049,19 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

+    elif selected == "gemini":
+        existing = get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY")
+        if not existing:
+            print()
+            print_info("Get a free API key at https://aistudio.google.com/app/apikey")
+            api_key = prompt("Gemini API key for TTS", password=True)
+            if api_key:
+                save_env_value("GEMINI_API_KEY", api_key)
+                print_success("Gemini TTS API key saved")
+            else:
+                print_warning("No API key provided. Falling back to Edge TTS.")
+                selected = "edge"
+
    # Save the selection
    if "tts" not in config:
        config["tts"] = {}
@@ -1611,9 +1637,19 @@ def _setup_telegram():
            return

    print_info("Create a bot via @BotFather on Telegram")
-    token = prompt("Telegram bot token", password=True)
-    if not token:
-        return
+    import re
+
+    while True:
+        token = prompt("Telegram bot token", password=True)
+        if not token:
+            return
+        if not re.match(r"^\d+:[A-Za-z0-9_-]{30,}$", token):
+            print_error(
+                "Invalid token format. Expected: <numeric_id>:<alphanumeric_hash> "
+                "(e.g., 123456789:ABCdefGHI-jklMNOpqrSTUvwxYZ)"
+            )
+            continue
+        break
    save_env_value("TELEGRAM_BOT_TOKEN", token)
    print_success("Telegram token saved")

@@ -1969,6 +2005,54 @@ def _setup_wecom_callback():
    _gw_setup()


+def _setup_qqbot():
+    """Configure QQ Bot gateway."""
+    print_header("QQ Bot")
+    existing = get_env_value("QQ_APP_ID")
+    if existing:
+        print_info("QQ Bot: already configured")
+        if not prompt_yes_no("Reconfigure QQ Bot?", False):
+            return
+
+    print_info("Connects Hermes to QQ via the Official QQ Bot API (v2).")
+    print_info("   Requires a QQ Bot application at q.qq.com")
+    print_info("   Reference: https://bot.q.qq.com/wiki/develop/api-v2/")
+    print()
+
+    app_id = prompt("QQ Bot App ID")
+    if not app_id:
+        print_warning("App ID is required — skipping QQ Bot setup")
+        return
+    save_env_value("QQ_APP_ID", app_id.strip())
+
+    client_secret = prompt("QQ Bot App Secret", password=True)
+    if not client_secret:
+        print_warning("App Secret is required — skipping QQ Bot setup")
+        return
+    save_env_value("QQ_CLIENT_SECRET", client_secret)
+    print_success("QQ Bot credentials saved")
+
+    print()
+    print_info("🔒 Security: Restrict who can DM your bot")
+    print_info("   Use QQ user OpenIDs (found in event payloads)")
+    print()
+    allowed_users = prompt("Allowed user OpenIDs (comma-separated, leave empty for open access)")
+    if allowed_users:
+        save_env_value("QQ_ALLOWED_USERS", allowed_users.replace(" ", ""))
+        print_success("QQ Bot allowlist configured")
+    else:
+        print_info("⚠️  No allowlist set — anyone can DM the bot!")
+
+    print()
+    print_info("📬 Home Channel: OpenID for cron job delivery and notifications.")
+    home_channel = prompt("Home channel OpenID (leave empty to set later)")
+    if home_channel:
+        save_env_value("QQ_HOME_CHANNEL", home_channel)
+
+    print()
+    print_success("QQ Bot configured!")
+
+
 def _setup_bluebubbles():
    """Configure BlueBubbles iMessage gateway."""
    print_header("BlueBubbles (iMessage)")
@@ -2034,6 +2118,15 @@ def _setup_bluebubbles():
    print_info("   Install: https://docs.bluebubbles.app/helper-bundle/installation")


+def _setup_qqbot():
+    """Configure QQ Bot (Official API v2) via standard platform setup."""
+    from hermes_cli.gateway import _PLATFORMS
+    qq_platform = next((p for p in _PLATFORMS if p["key"] == "qqbot"), None)
+    if qq_platform:
+        from hermes_cli.gateway import _setup_standard_platform
+        _setup_standard_platform(qq_platform)
+
+
 def _setup_webhooks():
    """Configure webhook integration."""
    print_header("Webhooks")
@@ -2097,6 +2190,7 @@ _GATEWAY_PLATFORMS = [
    ("WeCom Callback (Self-Built App)", "WECOM_CALLBACK_CORP_ID", _setup_wecom_callback),
    ("Weixin (WeChat)", "WEIXIN_ACCOUNT_ID", _setup_weixin),
    ("BlueBubbles (iMessage)", "BLUEBUBBLES_SERVER_URL", _setup_bluebubbles),
+    ("QQ Bot", "QQ_APP_ID", _setup_qqbot),
    ("Webhooks (GitHub, GitLab, etc.)", "WEBHOOK_ENABLED", _setup_webhooks),
 ]

@@ -2148,6 +2242,7 @@ def setup_gateway(config: dict):
        or get_env_value("WECOM_BOT_ID")
        or get_env_value("WEIXIN_ACCOUNT_ID")
        or get_env_value("BLUEBUBBLES_SERVER_URL")
+        or get_env_value("QQ_APP_ID")
        or get_env_value("WEBHOOK_ENABLED")
    )
    if any_messaging:
@@ -2169,6 +2264,8 @@ def setup_gateway(config: dict):
            missing_home.append("Slack")
        if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
            missing_home.append("BlueBubbles")
+        if get_env_value("QQ_APP_ID") and not get_env_value("QQ_HOME_CHANNEL"):
+            missing_home.append("QQBot")

        if missing_home:
            print()
@@ -684,6 +684,51 @@ def do_uninstall(name: str, console: Optional[Console] = None,
        c.print(f"[bold red]Error:[/] {msg}\n")


+def do_reset(name: str, restore: bool = False,
+             console: Optional[Console] = None,
+             skip_confirm: bool = False,
+             invalidate_cache: bool = True) -> None:
+    """Reset a bundled skill's manifest tracking (+ optionally restore from bundled)."""
+    from tools.skills_sync import reset_bundled_skill
+
+    c = console or _console
+
+    if not skip_confirm and restore:
+        c.print(f"\n[bold]Restore '{name}' from bundled source?[/]")
+        c.print("[dim]This will DELETE your current copy and re-copy the bundled version.[/]")
+        try:
+            answer = input("Confirm [y/N]: ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            answer = "n"
+        if answer not in ("y", "yes"):
+            c.print("[dim]Cancelled.[/]\n")
+            return
+
+    result = reset_bundled_skill(name, restore=restore)
+
+    if not result["ok"]:
+        c.print(f"[bold red]Error:[/] {result['message']}\n")
+        return
+
+    c.print(f"[bold green]{result['message']}[/]")
+    synced = result.get("synced") or {}
+    if synced.get("copied"):
+        c.print(f"[dim]Copied: {', '.join(synced['copied'])}[/]")
+    if synced.get("updated"):
+        c.print(f"[dim]Updated: {', '.join(synced['updated'])}[/]")
+    c.print()
+
+    if invalidate_cache:
+        try:
+            from agent.prompt_builder import clear_skills_system_prompt_cache
+            clear_skills_system_prompt_cache(clear_snapshot=True)
+        except Exception:
+            pass
+    else:
+        c.print("[dim]Change will take effect in your next session.[/]")
+        c.print("[dim]Use /reset to start a new session now, or --now to apply immediately (invalidates prompt cache).[/]\n")
+
+
 def do_tap(action: str, repo: str = "", console: Optional[Console] = None) -> None:
    """Manage taps (custom GitHub repo sources)."""
    from tools.skills_hub import TapsManager
@@ -1007,6 +1052,9 @@ def skills_command(args) -> None:
        do_audit(name=getattr(args, "name", None))
    elif action == "uninstall":
        do_uninstall(args.name)
+    elif action == "reset":
+        do_reset(args.name, restore=getattr(args, "restore", False),
+                 skip_confirm=getattr(args, "yes", False))
    elif action == "publish":
        do_publish(
            args.skill_path,
@@ -1029,7 +1077,7 @@ def skills_command(args) -> None:
            return
        do_tap(tap_action, repo=repo)
    else:
-        _console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|publish|snapshot|tap]\n")
+        _console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|reset|publish|snapshot|tap]\n")
        _console.print("Run 'hermes skills <command> --help' for details.\n")


@@ -1175,6 +1223,19 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        do_uninstall(args[0], console=c, skip_confirm=skip_confirm,
                     invalidate_cache=invalidate_cache)

+    elif action == "reset":
+        if not args:
+            c.print("[bold red]Usage:[/] /skills reset <name> [--restore] [--now]\n")
+            c.print("[dim]Clears the bundled-skills manifest entry so future updates stop marking it as user-modified.[/]")
+            c.print("[dim]Pass --restore to also replace the current copy with the bundled version.[/]\n")
+            return
+        name = args[0]
+        restore = "--restore" in args
+        invalidate_cache = "--now" in args
+        # Slash commands can't prompt — --restore in slash mode is implicit consent.
+        do_reset(name, restore=restore, console=c, skip_confirm=True,
+                 invalidate_cache=invalidate_cache)
+
    elif action == "publish":
        if not args:
            c.print("[bold red]Usage:[/] /skills publish <skill-path> [--to github] [--repo owner/repo]\n")
@@ -1231,6 +1292,7 @@ def _print_skills_help(console: Console) -> None:
        "  [cyan]update[/] [name]               Update hub skills with upstream changes\n"
        "  [cyan]audit[/] [name]                Re-scan hub skills for security\n"
        "  [cyan]uninstall[/] <name>            Remove a hub-installed skill\n"
+        "  [cyan]reset[/] <name> [--restore]    Reset bundled-skill tracking (fix 'user-modified' flag)\n"
        "  [cyan]publish[/] <path> --repo <r>   Publish a skill to GitHub via PR\n"
        "  [cyan]snapshot[/] export|import      Export/import skill configurations\n"
        "  [cyan]tap[/] list|add|remove         Manage skill sources\n",
@@ -32,6 +32,12 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      response_border: "#FFD700"         # Response box border (ANSI)
      session_label: "#DAA520"           # Session label color
      session_border: "#8B8682"          # Session ID dim color
+      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
+      voice_status_bg: "#1a1a2e"        # TUI voice status background
+      completion_menu_bg: "#1a1a2e"      # Completion menu background
+      completion_menu_current_bg: "#333355"  # Active completion row background
+      completion_menu_meta_bg: "#1a1a2e"     # Completion meta column background
+      completion_menu_meta_current_bg: "#333355"  # Active completion meta background

    # Spinner: customize the animated spinner during API calls
    spinner:
@@ -87,6 +93,8 @@ BUILT-IN SKINS
 - ``ares``    — Crimson/bronze war-god theme with custom spinner wings
 - ``mono``    — Clean grayscale monochrome
 - ``slate``   — Cool blue developer-focused theme
+- ``daylight`` — Light background theme with dark text and blue accents
+- ``warm-lightmode`` — Warm brown/gold text for light terminal backgrounds

 USER SKINS
 ==========
@@ -304,6 +312,80 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
        },
        "tool_prefix": "┊",
    },
+    "daylight": {
+        "name": "daylight",
+        "description": "Light theme for bright terminals with dark text and cool blue accents",
+        "colors": {
+            "banner_border": "#2563EB",
+            "banner_title": "#0F172A",
+            "banner_accent": "#1D4ED8",
+            "banner_dim": "#475569",
+            "banner_text": "#111827",
+            "ui_accent": "#2563EB",
+            "ui_label": "#0F766E",
+            "ui_ok": "#15803D",
+            "ui_error": "#B91C1C",
+            "ui_warn": "#B45309",
+            "prompt": "#111827",
+            "input_rule": "#93C5FD",
+            "response_border": "#2563EB",
+            "session_label": "#1D4ED8",
+            "session_border": "#64748B",
+            "status_bar_bg": "#E5EDF8",
+            "voice_status_bg": "#E5EDF8",
+            "completion_menu_bg": "#F8FAFC",
+            "completion_menu_current_bg": "#DBEAFE",
+            "completion_menu_meta_bg": "#EEF2FF",
+            "completion_menu_meta_current_bg": "#BFDBFE",
+        },
+        "spinner": {},
+        "branding": {
+            "agent_name": "Hermes Agent",
+            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
+            "goodbye": "Goodbye! ⚕",
+            "response_label": " ⚕ Hermes ",
+            "prompt_symbol": "❯ ",
+            "help_header": "[?] Available Commands",
+        },
+        "tool_prefix": "│",
+    },
+    "warm-lightmode": {
+        "name": "warm-lightmode",
+        "description": "Warm light mode — dark brown/gold text for light terminal backgrounds",
+        "colors": {
+            "banner_border": "#8B6914",
+            "banner_title": "#5C3D11",
+            "banner_accent": "#8B4513",
+            "banner_dim": "#8B7355",
+            "banner_text": "#2C1810",
+            "ui_accent": "#8B4513",
+            "ui_label": "#5C3D11",
+            "ui_ok": "#2E7D32",
+            "ui_error": "#C62828",
+            "ui_warn": "#E65100",
+            "prompt": "#2C1810",
+            "input_rule": "#8B6914",
+            "response_border": "#8B6914",
+            "session_label": "#5C3D11",
+            "session_border": "#A0845C",
+            "status_bar_bg": "#F5F0E8",
+            "voice_status_bg": "#F5F0E8",
+            "completion_menu_bg": "#F5EFE0",
+            "completion_menu_current_bg": "#E8DCC8",
+            "completion_menu_meta_bg": "#F0E8D8",
+            "completion_menu_meta_current_bg": "#DFCFB0",
+        },
+        "spinner": {},
+        "branding": {
+            "agent_name": "Hermes Agent",
+            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
+            "goodbye": "Goodbye! \u2695",
+            "response_label": " \u2695 Hermes ",
+            "prompt_symbol": "\u276f ",
+            "help_header": "(^_^)? Available Commands",
+        },
+        "tool_prefix": "\u250a",
+    },
    "poseidon": {
        "name": "poseidon",
        "description": "Ocean-god theme — deep blue and seafoam",
@@ -626,7 +708,9 @@ def init_skin_from_config(config: dict) -> None:

    Call this once during CLI init with the loaded config dict.
    """
-    display = config.get("display", {})
+    display = config.get("display") or {}
+    if not isinstance(display, dict):
+        display = {}
    skin_name = display.get("skin", "default")
    if isinstance(skin_name, str) and skin_name.strip():
        set_active_skin(skin_name.strip())
@@ -685,6 +769,12 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
    label = skin.get_color("ui_label", title)
    warn = skin.get_color("ui_warn", "#FF8C00")
    error = skin.get_color("ui_error", "#FF6B6B")
+    status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
+    voice_bg = skin.get_color("voice_status_bg", status_bg)
+    menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
+    menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
+    menu_meta_bg = skin.get_color("completion_menu_meta_bg", menu_bg)
+    menu_meta_current_bg = skin.get_color("completion_menu_meta_current_bg", menu_current_bg)

    return {
        "input-area": prompt,
@@ -692,13 +782,20 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "prompt": prompt,
        "prompt-working": f"{dim} italic",
        "hint": f"{dim} italic",
+        "status-bar": f"bg:{status_bg} {text}",
+        "status-bar-strong": f"bg:{status_bg} {title} bold",
+        "status-bar-dim": f"bg:{status_bg} {dim}",
+        "status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
+        "status-bar-warn": f"bg:{status_bg} {warn} bold",
+        "status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
+        "status-bar-critical": f"bg:{status_bg} {error} bold",
        "input-rule": input_rule,
        "image-badge": f"{label} bold",
-        "completion-menu": f"bg:#1a1a2e {text}",
-        "completion-menu.completion": f"bg:#1a1a2e {text}",
-        "completion-menu.completion.current": f"bg:#333355 {title}",
-        "completion-menu.meta.completion": f"bg:#1a1a2e {dim}",
-        "completion-menu.meta.completion.current": f"bg:#333355 {label}",
+        "completion-menu": f"bg:{menu_bg} {text}",
+        "completion-menu.completion": f"bg:{menu_bg} {text}",
+        "completion-menu.completion.current": f"bg:{menu_current_bg} {title}",
+        "completion-menu.meta.completion": f"bg:{menu_meta_bg} {dim}",
+        "completion-menu.meta.completion.current": f"bg:{menu_meta_current_bg} {label}",
        "clarify-border": input_rule,
        "clarify-title": f"{title} bold",
        "clarify-question": f"{text} bold",
@@ -716,4 +813,6 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "approval-cmd": f"{dim} italic",
        "approval-choice": dim,
        "approval-selected": f"{title} bold",
+        "voice-status": f"bg:{voice_bg} {label}",
+        "voice-status-recording": f"bg:{voice_bg} {error} bold",
    }
@@ -212,7 +212,7 @@ def show_status(args):
    if managed_nous_tools_enabled():
        features = get_nous_subscription_features(config)
        print()
-        print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD))
+        print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD))
        if not features.nous_auth_present:
            print("  Nous Portal   ✗ not logged in")
        else:
@@ -230,6 +230,18 @@ def show_status(args):
            else:
                state = "not configured"
            print(f"  {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}")
+    elif nous_logged_in:
+        # Logged into Nous but on the free tier — show upgrade nudge
+        print()
+        print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD))
+        print("  Your free-tier Nous account does not include Tool Gateway access.")
+        print("  Upgrade your subscription to unlock managed web, image, TTS, and browser tools.")
+        try:
+            portal_url = nous_status.get("portal_base_url", "").rstrip("/")
+            if portal_url:
+                print(f"  Upgrade: {portal_url}")
+        except Exception:
+            pass

    # =========================================================================
    # API-Key Providers
@@ -305,6 +317,7 @@ def show_status(args):
        "WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None),
        "Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
        "BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
+        "QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"),
    }
    
    for name, (token_var, home_var) in platforms.items():
@@ -63,6 +63,7 @@ CONFIGURABLE_TOOLSETS = [
    ("clarify",         "❓ Clarifying Questions",      "clarify"),
    ("delegation",      "👥 Task Delegation",           "delegate_task"),
    ("cronjob",         "⏰ Cron Jobs",                 "create/list/update/pause/resume/run, with optional attached skills"),
+    ("messaging",       "📨 Cross-Platform Messaging",  "send_message"),
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
 ]
@@ -121,6 +122,7 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription",
+                "badge": "subscription",
                "tag": "Managed OpenAI TTS billed to your subscription",
                "env_vars": [],
                "tts_provider": "openai",
@@ -130,21 +132,32 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Microsoft Edge TTS",
-                "tag": "Free - no API key needed",
+                "badge": "★ recommended · free",
+                "tag": "Good quality, no API key needed",
                "env_vars": [],
                "tts_provider": "edge",
            },
            {
                "name": "OpenAI TTS",
-                "tag": "Premium - high quality voices",
+                "badge": "paid",
+                "tag": "High quality voices",
                "env_vars": [
                    {"key": "VOICE_TOOLS_OPENAI_KEY", "prompt": "OpenAI API key", "url": "https://platform.openai.com/api-keys"},
                ],
                "tts_provider": "openai",
            },
+            {
+                "name": "xAI TTS",
+                "tag": "Grok voices - requires xAI API key",
+                "env_vars": [
+                    {"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"},
+                ],
+                "tts_provider": "xai",
+            },
            {
                "name": "ElevenLabs",
-                "tag": "Premium - most natural voices",
+                "badge": "paid",
+                "tag": "Most natural voices",
                "env_vars": [
                    {"key": "ELEVENLABS_API_KEY", "prompt": "ElevenLabs API key", "url": "https://elevenlabs.io/app/settings/api-keys"},
                ],
@@ -152,12 +165,22 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Mistral (Voxtral TTS)",
-                "tag": "Multilingual, native Opus, needs MISTRAL_API_KEY",
+                "badge": "paid",
+                "tag": "Multilingual, native Opus",
                "env_vars": [
                    {"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
                ],
                "tts_provider": "mistral",
            },
+            {
+                "name": "Google Gemini TTS",
+                "badge": "preview",
+                "tag": "30 prebuilt voices, controllable via prompts",
+                "env_vars": [
+                    {"key": "GEMINI_API_KEY", "prompt": "Gemini API key", "url": "https://aistudio.google.com/app/apikey"},
+                ],
+                "tts_provider": "gemini",
+            },
        ],
    },
    "web": {
@@ -168,6 +191,7 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription",
+                "badge": "subscription",
                "tag": "Managed Firecrawl billed to your subscription",
                "web_backend": "firecrawl",
                "env_vars": [],
@@ -177,7 +201,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Firecrawl Cloud",
-                "tag": "Hosted service - search, extract, and crawl",
+                "badge": "★ recommended",
+                "tag": "Full-featured search, extract, and crawl",
                "web_backend": "firecrawl",
                "env_vars": [
                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
@@ -185,7 +210,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Exa",
-                "tag": "AI-native search and contents",
+                "badge": "paid",
+                "tag": "Neural search with semantic understanding",
                "web_backend": "exa",
                "env_vars": [
                    {"key": "EXA_API_KEY", "prompt": "Exa API key", "url": "https://exa.ai"},
@@ -193,7 +219,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Parallel",
-                "tag": "AI-native search and extract",
+                "badge": "paid",
+                "tag": "AI-powered search and extract",
                "web_backend": "parallel",
                "env_vars": [
                    {"key": "PARALLEL_API_KEY", "prompt": "Parallel API key", "url": "https://parallel.ai"},
@@ -201,7 +228,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Tavily",
-                "tag": "AI-native search, extract, and crawl",
+                "badge": "free tier",
+                "tag": "Search, extract, and crawl — 1000 free searches/mo",
                "web_backend": "tavily",
                "env_vars": [
                    {"key": "TAVILY_API_KEY", "prompt": "Tavily API key", "url": "https://app.tavily.com/home"},
@@ -209,7 +237,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Firecrawl Self-Hosted",
-                "tag": "Free - run your own instance",
+                "badge": "free · self-hosted",
+                "tag": "Run your own Firecrawl instance (Docker)",
                "web_backend": "firecrawl",
                "env_vars": [
                    {"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
@@ -223,18 +252,22 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription",
+                "badge": "subscription",
                "tag": "Managed FAL image generation billed to your subscription",
                "env_vars": [],
                "requires_nous_auth": True,
                "managed_nous_feature": "image_gen",
                "override_env_vars": ["FAL_KEY"],
+                "imagegen_backend": "fal",
            },
            {
                "name": "FAL.ai",
-                "tag": "FLUX 2 Pro with auto-upscaling",
+                "badge": "paid",
+                "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
                "env_vars": [
                    {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
                ],
+                "imagegen_backend": "fal",
            },
        ],
    },
@@ -244,6 +277,7 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription (Browser Use cloud)",
+                "badge": "subscription",
                "tag": "Managed Browser Use billed to your subscription",
                "env_vars": [],
                "browser_provider": "browser-use",
@@ -254,14 +288,16 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Local Browser",
-                "tag": "Free headless Chromium (no API key needed)",
+                "badge": "★ recommended · free",
+                "tag": "Headless Chromium, no API key needed",
                "env_vars": [],
                "browser_provider": "local",
                "post_setup": "agent_browser",
            },
            {
                "name": "Browserbase",
-                "tag": "Cloud browser with stealth & proxies",
+                "badge": "paid",
+                "tag": "Cloud browser with stealth and proxies",
                "env_vars": [
                    {"key": "BROWSERBASE_API_KEY", "prompt": "Browserbase API key", "url": "https://browserbase.com"},
                    {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
@@ -271,6 +307,7 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Browser Use",
+                "badge": "paid",
                "tag": "Cloud browser with remote execution",
                "env_vars": [
                    {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
@@ -280,6 +317,7 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Firecrawl",
+                "badge": "paid",
                "tag": "Cloud browser with remote execution",
                "env_vars": [
                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
@@ -289,7 +327,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Camofox",
-                "tag": "Local anti-detection browser (Firefox/Camoufox)",
+                "badge": "free · local",
+                "tag": "Anti-detection browser (Firefox/Camoufox)",
                "env_vars": [
                    {"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377",
                     "url": "https://github.com/jo-inc/camofox-browser"},
@@ -362,7 +401,7 @@ def _run_post_setup(post_setup_key: str):
            _print_warning("    Node.js not found - browser tools require: npm install (in hermes-agent directory)")

    elif post_setup_key == "camofox":
-        camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camoufox-browser"
+        camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camofox-browser"
        if not camofox_dir.exists() and shutil.which("npm"):
            _print_info("    Installing Camofox browser server...")
            import subprocess
@@ -376,7 +415,7 @@ def _run_post_setup(post_setup_key: str):
                _print_warning("    npm install failed - run manually: npm install")
        if camofox_dir.exists():
            _print_info("    Start the Camofox server:")
-            _print_info("      npx @askjo/camoufox-browser")
+            _print_info("      npx @askjo/camofox-browser")
            _print_info("    First run downloads the Camoufox engine (~300MB)")
            _print_info("    Or use Docker: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
        elif not shutil.which("npm"):
@@ -426,6 +465,8 @@ def _get_enabled_platforms() -> List[str]:
        enabled.append("slack")
    if get_env_value("WHATSAPP_ENABLED"):
        enabled.append("whatsapp")
+    if get_env_value("QQ_APP_ID"):
+        enabled.append("qqbot")
    return enabled


@@ -836,7 +877,8 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
        # Plain text labels only (no ANSI codes in menu items)
        provider_choices = []
        for p in providers:
-            tag = f" ({p['tag']})" if p.get("tag") else ""
+            badge = f" [{p['badge']}]" if p.get("badge") else ""
+            tag = f" — {p['tag']}" if p.get("tag") else ""
            configured = ""
            env_vars = p.get("env_vars", [])
            if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
@@ -846,7 +888,7 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
                    configured = ""
                else:
                    configured = " [configured]"
-            provider_choices.append(f"{p['name']}{tag}{configured}")
+            provider_choices.append(f"{p['name']}{badge}{tag}{configured}")

        # Add skip option
        provider_choices.append("Skip — keep defaults / configure later")
@@ -910,6 +952,106 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
    return 0


+# ─── Image Generation Model Pickers ───────────────────────────────────────────
+#
+# IMAGEGEN_BACKENDS is a per-backend catalog. Each entry exposes:
+#   - config_key:        top-level config.yaml key for this backend's settings
+#   - model_catalog_fn:  returns an OrderedDict-like {model_id: metadata}
+#   - default_model:     fallback when nothing is configured
+#
+# This prepares for future imagegen backends (Replicate, Stability, etc.):
+# each new backend registers its own entry; the FAL provider entry in
+# TOOL_CATEGORIES tags itself with `imagegen_backend: "fal"` to select the
+# right catalog at picker time.
+
+
+def _fal_model_catalog():
+    """Lazy-load the FAL model catalog from the tool module."""
+    from tools.image_generation_tool import FAL_MODELS, DEFAULT_MODEL
+    return FAL_MODELS, DEFAULT_MODEL
+
+
+IMAGEGEN_BACKENDS = {
+    "fal": {
+        "display": "FAL.ai",
+        "config_key": "image_gen",
+        "catalog_fn": _fal_model_catalog,
+    },
+}
+
+
+def _format_imagegen_model_row(model_id: str, meta: dict, widths: dict) -> str:
+    """Format a single picker row with column-aligned speed / strengths / price."""
+    return (
+        f"{model_id:<{widths['model']}}  "
+        f"{meta.get('speed', ''):<{widths['speed']}}  "
+        f"{meta.get('strengths', ''):<{widths['strengths']}}  "
+        f"{meta.get('price', '')}"
+    )
+
+
+def _configure_imagegen_model(backend_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for the given imagegen backend.
+
+    Writes selection to ``config[backend_config_key]["model"]``. Safe to
+    call even when stdin is not a TTY — curses_radiolist falls back to
+    keeping the current selection.
+    """
+    backend = IMAGEGEN_BACKENDS.get(backend_name)
+    if not backend:
+        return
+
+    catalog, default_model = backend["catalog_fn"]()
+    if not catalog:
+        return
+
+    cfg_key = backend["config_key"]
+    cur_cfg = config.setdefault(cfg_key, {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config[cfg_key] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    # Put current model at the top so the cursor lands on it by default.
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    # Column widths
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {backend['display']} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -923,34 +1065,53 @@ def _configure_provider(provider: dict, config: dict):

    # Set TTS provider in config if applicable
    if provider.get("tts_provider"):
-        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
+        tts_cfg = config.setdefault("tts", {})
+        tts_cfg["provider"] = provider["tts_provider"]
+        tts_cfg["use_gateway"] = bool(managed_feature)

    # Set browser cloud provider in config if applicable
    if "browser_provider" in provider:
        bp = provider["browser_provider"]
+        browser_cfg = config.setdefault("browser", {})
        if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
+            browser_cfg["cloud_provider"] = "local"
            _print_success("  Browser set to local mode")
        elif bp:
-            config.setdefault("browser", {})["cloud_provider"] = bp
+            browser_cfg["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
+        browser_cfg["use_gateway"] = bool(managed_feature)

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
-        config.setdefault("web", {})["backend"] = provider["web_backend"]
+        web_cfg = config.setdefault("web", {})
+        web_cfg["backend"] = provider["web_backend"]
+        web_cfg["use_gateway"] = bool(managed_feature)
        _print_success(f"  Web backend set to: {provider['web_backend']}")

+    # For tools without a specific config key (e.g. image_gen), still
+    # track use_gateway so the runtime knows the user's intent.
+    if managed_feature and managed_feature not in ("web", "tts", "browser"):
+        config.setdefault(managed_feature, {})["use_gateway"] = True
+    elif not managed_feature:
+        # User picked a non-gateway provider — find which category this
+        # belongs to and clear use_gateway if it was previously set.
+        for cat_key, cat in TOOL_CATEGORIES.items():
+            if provider in cat.get("providers", []):
+                section = config.get(cat_key)
+                if isinstance(section, dict) and section.get("use_gateway"):
+                    section["use_gateway"] = False
+                break
+
    if not env_vars:
        if provider.get("post_setup"):
            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
+        # Imagegen backends prompt for model selection after backend pick.
+        backend = provider.get("imagegen_backend")
+        if backend:
+            _configure_imagegen_model(backend, config)
        return

    # Prompt for each required env var
@@ -985,6 +1146,10 @@ def _configure_provider(provider: dict, config: dict):

    if all_configured:
        _print_success(f"  {provider['name']} configured!")
+        # Imagegen backends prompt for model selection after env vars are in.
+        backend = provider.get("imagegen_backend")
+        if backend:
+            _configure_imagegen_model(backend, config)


 def _configure_simple_requirements(ts_key: str):
@@ -1102,7 +1267,8 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):

        provider_choices = []
        for p in providers:
-            tag = f" ({p['tag']})" if p.get("tag") else ""
+            badge = f" [{p['badge']}]" if p.get("badge") else ""
+            tag = f" — {p['tag']}" if p.get("tag") else ""
            configured = ""
            env_vars = p.get("env_vars", [])
            if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
@@ -1112,7 +1278,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
                    configured = ""
                else:
                    configured = " [configured]"
-            provider_choices.append(f"{p['name']}{tag}{configured}")
+            provider_choices.append(f"{p['name']}{badge}{tag}{configured}")

        default_idx = _detect_active_provider_index(providers, config)

@@ -1155,11 +1321,10 @@ def _reconfigure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
+        # Imagegen backends prompt for model selection on reconfig too.
+        backend = provider.get("imagegen_backend")
+        if backend:
+            _configure_imagegen_model(backend, config)
        return

    for var in env_vars:
@@ -1177,6 +1342,11 @@ def _reconfigure_provider(provider: dict, config: dict):
        else:
            _print_info("    Kept current")

+    # Imagegen backends prompt for model selection on reconfig too.
+    backend = provider.get("imagegen_backend")
+    if backend:
+        _configure_imagegen_model(backend, config)
+

 def _reconfigure_simple_requirements(ts_key: str):
    """Reconfigure simple env var requirements."""
@@ -10,8 +10,11 @@ Usage:
 """

 import asyncio
+import hmac
+import importlib.util
 import json
 import logging
+import os
 import secrets
 import sys
 import threading
@@ -47,7 +50,7 @@ from gateway.status import get_running_pid, read_runtime_status
 try:
    from fastapi import FastAPI, HTTPException, Request
    from fastapi.middleware.cors import CORSMiddleware
-    from fastapi.responses import FileResponse, JSONResponse
+    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
    from fastapi.staticfiles import StaticFiles
    from pydantic import BaseModel
 except ImportError:
@@ -84,6 +87,47 @@ app.add_middleware(
    allow_headers=["*"],
 )

+# ---------------------------------------------------------------------------
+# Endpoints that do NOT require the session token.  Everything else under
+# /api/ is gated by the auth middleware below.  Keep this list minimal —
+# only truly non-sensitive, read-only endpoints belong here.
+# ---------------------------------------------------------------------------
+_PUBLIC_API_PATHS: frozenset = frozenset({
+    "/api/status",
+    "/api/config/defaults",
+    "/api/config/schema",
+    "/api/model/info",
+    "/api/dashboard/themes",
+    "/api/dashboard/plugins",
+    "/api/dashboard/plugins/rescan",
+})
+
+
+def _require_token(request: Request) -> None:
+    """Validate the ephemeral session token.  Raises 401 on mismatch.
+
+    Uses ``hmac.compare_digest`` to prevent timing side-channels.
+    """
+    auth = request.headers.get("authorization", "")
+    expected = f"Bearer {_SESSION_TOKEN}"
+    if not hmac.compare_digest(auth.encode(), expected.encode()):
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+
+@app.middleware("http")
+async def auth_middleware(request: Request, call_next):
+    """Require the session token on all /api/ routes except the public list."""
+    path = request.url.path
+    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
+        auth = request.headers.get("authorization", "")
+        expected = f"Bearer {_SESSION_TOKEN}"
+        if not hmac.compare_digest(auth.encode(), expected.encode()):
+            return JSONResponse(
+                status_code=401,
+                content={"detail": "Unauthorized"},
+            )
+    return await call_next(request)
+

 # ---------------------------------------------------------------------------
 # Config schema — auto-generated from DEFAULT_CONFIG
@@ -96,6 +140,11 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
        "description": "Default model (e.g. anthropic/claude-sonnet-4.6)",
        "category": "general",
    },
+    "model_context_length": {
+        "type": "number",
+        "description": "Context window override (0 = auto-detect from model metadata)",
+        "category": "general",
+    },
    "terminal.backend": {
        "type": "select",
        "description": "Terminal execution backend",
@@ -121,6 +170,11 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
        "description": "CLI visual theme",
        "options": ["default", "ares", "mono", "slate"],
    },
+    "dashboard.theme": {
+        "type": "select",
+        "description": "Web dashboard visual theme",
+        "options": ["default", "midnight", "ember", "mono", "cyberpunk", "rose"],
+    },
    "display.resume_display": {
        "type": "select",
        "description": "How resumed sessions display history",
@@ -179,6 +233,7 @@ _CATEGORY_MERGE: Dict[str, str] = {
    "approvals": "security",
    "human_delay": "display",
    "smart_model_routing": "agent",
+    "dashboard": "display",
 }

 # Display order for tabs — unlisted categories sort alphabetically after these.
@@ -246,6 +301,17 @@ def _build_schema_from_config(

 CONFIG_SCHEMA = _build_schema_from_config(DEFAULT_CONFIG)

+# Inject virtual fields that don't live in DEFAULT_CONFIG but are surfaced
+# by the normalize/denormalize cycle.  Insert model_context_length right after
+# the "model" key so it renders adjacent in the frontend.
+_mcl_entry = _SCHEMA_OVERRIDES["model_context_length"]
+_ordered_schema: Dict[str, Dict[str, Any]] = {}
+for _k, _v in CONFIG_SCHEMA.items():
+    _ordered_schema[_k] = _v
+    if _k == "model":
+        _ordered_schema["model_context_length"] = _mcl_entry
+CONFIG_SCHEMA = _ordered_schema
+

 class ConfigUpdate(BaseModel):
    config: dict
@@ -264,12 +330,68 @@ class EnvVarReveal(BaseModel):
    key: str


+_GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
+_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
+
+
+def _probe_gateway_health() -> tuple[bool, dict | None]:
+    """Probe the gateway via its HTTP health endpoint (cross-container).
+
+    Uses ``/health/detailed`` first (returns full state), falling back to
+    the simpler ``/health`` endpoint.  Returns ``(is_alive, body_dict)``.
+
+    Accepts any of these as ``GATEWAY_HEALTH_URL``:
+    - ``http://gateway:8642``                (base URL — recommended)
+    - ``http://gateway:8642/health``         (explicit health path)
+    - ``http://gateway:8642/health/detailed`` (explicit detailed path)
+
+    This is a **blocking** call — run via ``run_in_executor`` from async code.
+    """
+    if not _GATEWAY_HEALTH_URL:
+        return False, None
+
+    # Normalise to base URL so we always probe the right paths regardless of
+    # whether the user included /health or /health/detailed in the env var.
+    base = _GATEWAY_HEALTH_URL.rstrip("/")
+    if base.endswith("/health/detailed"):
+        base = base[: -len("/health/detailed")]
+    elif base.endswith("/health"):
+        base = base[: -len("/health")]
+
+    for path in (f"{base}/health/detailed", f"{base}/health"):
+        try:
+            req = urllib.request.Request(path, method="GET")
+            with urllib.request.urlopen(req, timeout=_GATEWAY_HEALTH_TIMEOUT) as resp:
+                if resp.status == 200:
+                    body = json.loads(resp.read())
+                    return True, body
+        except Exception:
+            continue
+    return False, None
+
+
@app.get("/api/status")
 async def get_status():
    current_ver, latest_ver = check_config_version()

+    # --- Gateway liveness detection ---
+    # Try local PID check first (same-host).  If that fails and a remote
+    # GATEWAY_HEALTH_URL is configured, probe the gateway over HTTP so the
+    # dashboard works when the gateway runs in a separate container.
    gateway_pid = get_running_pid()
    gateway_running = gateway_pid is not None
+    remote_health_body: dict | None = None
+
+    if not gateway_running and _GATEWAY_HEALTH_URL:
+        loop = asyncio.get_event_loop()
+        alive, remote_health_body = await loop.run_in_executor(
+            None, _probe_gateway_health
+        )
+        if alive:
+            gateway_running = True
+            # PID from the remote container (display only — not locally valid)
+            if remote_health_body:
+                gateway_pid = remote_health_body.get("pid")

    gateway_state = None
    gateway_platforms: dict = {}
@@ -286,7 +408,12 @@ async def get_status():
    except Exception:
        configured_gateway_platforms = None

+    # Prefer the detailed health endpoint response (has full state) when the
+    # local runtime status file is absent or stale (cross-container).
    runtime = read_runtime_status()
+    if runtime is None and remote_health_body and remote_health_body.get("gateway_state"):
+        runtime = remote_health_body
+
    if runtime:
        gateway_state = runtime.get("gateway_state")
        gateway_platforms = runtime.get("platforms") or {}
@@ -301,6 +428,17 @@ async def get_status():
        if not gateway_running:
            gateway_state = gateway_state if gateway_state in ("stopped", "startup_failed") else "stopped"
            gateway_platforms = {}
+        elif gateway_running and remote_health_body is not None:
+            # The health probe confirmed the gateway is alive, but the local
+            # runtime status file may be stale (cross-container).  Override
+            # stopped/None state so the dashboard shows the correct badge.
+            if gateway_state in (None, "stopped"):
+                gateway_state = "running"
+
+    # If there was no runtime info at all but the health probe confirmed alive,
+    # ensure we still report the gateway as running (no shared volume scenario).
+    if gateway_running and gateway_state is None and remote_health_body is not None:
+        gateway_state = "running"

    active_sessions = 0
    try:
@@ -329,6 +467,7 @@ async def get_status():
        "latest_config_version": latest_ver,
        "gateway_running": gateway_running,
        "gateway_pid": gateway_pid,
+        "gateway_health_url": _GATEWAY_HEALTH_URL,
        "gateway_state": gateway_state,
        "gateway_platforms": gateway_platforms,
        "gateway_exit_reason": gateway_exit_reason,
@@ -408,11 +547,19 @@ def _normalize_config_for_web(config: Dict[str, Any]) -> Dict[str, Any]:
    or a dict (``{default: ..., provider: ..., base_url: ...}``).  The schema is built
    from DEFAULT_CONFIG where ``model`` is a string, but user configs often have the
    dict form.  Normalize to the string form so the frontend schema matches.
+
+    Also surfaces ``model_context_length`` as a top-level field so the web UI can
+    display and edit it.  A value of 0 means "auto-detect".
    """
    config = dict(config)  # shallow copy
    model_val = config.get("model")
    if isinstance(model_val, dict):
+        # Extract context_length before flattening the dict
+        ctx_len = model_val.get("context_length", 0)
        config["model"] = model_val.get("default", model_val.get("name", ""))
+        config["model_context_length"] = ctx_len if isinstance(ctx_len, int) else 0
+    else:
+        config["model_context_length"] = 0
    return config


@@ -433,6 +580,93 @@ async def get_schema():
    return {"fields": CONFIG_SCHEMA, "category_order": _CATEGORY_ORDER}


+_EMPTY_MODEL_INFO: dict = {
+    "model": "",
+    "provider": "",
+    "auto_context_length": 0,
+    "config_context_length": 0,
+    "effective_context_length": 0,
+    "capabilities": {},
+}
+
+
+@app.get("/api/model/info")
+def get_model_info():
+    """Return resolved model metadata for the currently configured model.
+
+    Calls the same context-length resolution chain the agent uses, so the
+    frontend can display "Auto-detected: 200K" alongside the override field.
+    Also returns model capabilities (vision, reasoning, tools) when available.
+    """
+    try:
+        cfg = load_config()
+        model_cfg = cfg.get("model", "")
+
+        # Extract model name and provider from the config
+        if isinstance(model_cfg, dict):
+            model_name = model_cfg.get("default", model_cfg.get("name", ""))
+            provider = model_cfg.get("provider", "")
+            base_url = model_cfg.get("base_url", "")
+            config_ctx = model_cfg.get("context_length")
+        else:
+            model_name = str(model_cfg) if model_cfg else ""
+            provider = ""
+            base_url = ""
+            config_ctx = None
+
+        if not model_name:
+            return dict(_EMPTY_MODEL_INFO, provider=provider)
+
+        # Resolve auto-detected context length (pass config_ctx=None to get
+        # purely auto-detected value, then separately report the override)
+        try:
+            from agent.model_metadata import get_model_context_length
+            auto_ctx = get_model_context_length(
+                model=model_name,
+                base_url=base_url,
+                provider=provider,
+                config_context_length=None,  # ignore override — we want auto value
+            )
+        except Exception:
+            auto_ctx = 0
+
+        config_ctx_int = 0
+        if isinstance(config_ctx, int) and config_ctx > 0:
+            config_ctx_int = config_ctx
+
+        # Effective is what the agent actually uses
+        effective_ctx = config_ctx_int if config_ctx_int > 0 else auto_ctx
+
+        # Try to get model capabilities from models.dev
+        caps = {}
+        try:
+            from agent.models_dev import get_model_capabilities
+            mc = get_model_capabilities(provider=provider, model=model_name)
+            if mc is not None:
+                caps = {
+                    "supports_tools": mc.supports_tools,
+                    "supports_vision": mc.supports_vision,
+                    "supports_reasoning": mc.supports_reasoning,
+                    "context_window": mc.context_window,
+                    "max_output_tokens": mc.max_output_tokens,
+                    "model_family": mc.model_family,
+                }
+        except Exception:
+            pass
+
+        return {
+            "model": model_name,
+            "provider": provider,
+            "auto_context_length": auto_ctx,
+            "config_context_length": config_ctx_int,
+            "effective_context_length": effective_ctx,
+            "capabilities": caps,
+        }
+    except Exception:
+        _log.exception("GET /api/model/info failed")
+        return dict(_EMPTY_MODEL_INFO)
+
+
 def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
    """Reverse _normalize_config_for_web before saving.

@@ -440,12 +674,24 @@ def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
    to recover model subkeys (provider, base_url, api_mode, etc.) that were
    stripped from the GET response.  The frontend only sees model as a flat
    string; the rest is preserved transparently.
+
+    Also handles ``model_context_length`` — writes it back into the model dict
+    as ``context_length``.  A value of 0 or absent means "auto-detect" (omitted
+    from the dict so get_model_context_length() uses its normal resolution).
    """
    config = dict(config)
    # Remove any _model_meta that might have leaked in (shouldn't happen
    # with the stripped GET response, but be defensive)
    config.pop("_model_meta", None)

+    # Extract and remove model_context_length before processing model
+    ctx_override = config.pop("model_context_length", 0)
+    if not isinstance(ctx_override, int):
+        try:
+            ctx_override = int(ctx_override)
+        except (TypeError, ValueError):
+            ctx_override = 0
+
    model_val = config.get("model")
    if isinstance(model_val, str) and model_val:
        # Read the current disk config to recover model subkeys
@@ -455,7 +701,20 @@ def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
            if isinstance(disk_model, dict):
                # Preserve all subkeys, update default with the new value
                disk_model["default"] = model_val
+                # Write context_length into the model dict (0 = remove/auto)
+                if ctx_override > 0:
+                    disk_model["context_length"] = ctx_override
+                else:
+                    disk_model.pop("context_length", None)
                config["model"] = disk_model
+            else:
+                # Model was previously a bare string — upgrade to dict if
+                # user is setting a context_length override
+                if ctx_override > 0:
+                    config["model"] = {
+                        "default": model_val,
+                        "context_length": ctx_override,
+                    }
        except Exception:
            pass  # can't read disk config — just use the string form
    return config
@@ -471,17 +730,6 @@ async def update_config(body: ConfigUpdate):
        raise HTTPException(status_code=500, detail="Internal server error")


-@app.get("/api/auth/session-token")
-async def get_session_token():
-    """Return the ephemeral session token for this server instance.
-
-    The token protects sensitive endpoints (reveal).  It's served to the SPA
-    which stores it in memory — it's never persisted and dies when the server
-    process exits.  CORS already restricts this to localhost origins.
-    """
-    return {"token": _SESSION_TOKEN}
-
-
@app.get("/api/env")
 async def get_env_vars():
    env_on_disk = load_env()
@@ -535,9 +783,7 @@ async def reveal_env_var(body: EnvVarReveal, request: Request):
    - Audit logging
    """
    # --- Token check ---
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
+    _require_token(request)

    # --- Rate limit ---
    now = time.time()
@@ -808,9 +1054,7 @@ async def list_oauth_providers():
@app.delete("/api/providers/oauth/{provider_id}")
 async def disconnect_oauth_provider(provider_id: str, request: Request):
    """Disconnect an OAuth provider. Token-protected (matches /env/reveal)."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
+    _require_token(request)

    valid_ids = {p["id"] for p in _OAUTH_PROVIDER_CATALOG}
    if provider_id not in valid_ids:
@@ -1382,9 +1626,7 @@ def _codex_full_login_worker(session_id: str) -> None:
@app.post("/api/providers/oauth/{provider_id}/start")
 async def start_oauth_login(provider_id: str, request: Request):
    """Initiate an OAuth login flow. Token-protected."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
+    _require_token(request)
    _gc_oauth_sessions()
    valid = {p["id"] for p in _OAUTH_PROVIDER_CATALOG}
    if provider_id not in valid:
@@ -1416,9 +1658,7 @@ class OAuthSubmitBody(BaseModel):
@app.post("/api/providers/oauth/{provider_id}/submit")
 async def submit_oauth_code(provider_id: str, body: OAuthSubmitBody, request: Request):
    """Submit the auth code for PKCE flows. Token-protected."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
+    _require_token(request)
    if provider_id == "anthropic":
        return await asyncio.get_event_loop().run_in_executor(
            None, _submit_anthropic_pkce, body.session_id, body.code,
@@ -1446,9 +1686,7 @@ async def poll_oauth_session(provider_id: str, session_id: str):
@app.delete("/api/providers/oauth/sessions/{session_id}")
 async def cancel_oauth_session(session_id: str, request: Request):
    """Cancel a pending OAuth session. Token-protected."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
+    _require_token(request)
    with _oauth_sessions_lock:
        sess = _oauth_sessions.pop(session_id, None)
    if sess is None:
@@ -1796,7 +2034,12 @@ async def get_usage_analytics(days: int = 30):


 def mount_spa(application: FastAPI):
-    """Mount the built SPA. Falls back to index.html for client-side routing."""
+    """Mount the built SPA. Falls back to index.html for client-side routing.
+
+    The session token is injected into index.html via a ``<script>`` tag so
+    the SPA can authenticate against protected API endpoints without a
+    separate (unauthenticated) token-dispensing endpoint.
+    """
    if not WEB_DIST.exists():
        @application.get("/{full_path:path}")
        async def no_frontend(full_path: str):
@@ -1806,6 +2049,20 @@ def mount_spa(application: FastAPI):
            )
        return

+    _index_path = WEB_DIST / "index.html"
+
+    def _serve_index():
+        """Return index.html with the session token injected."""
+        html = _index_path.read_text()
+        token_script = (
+            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";</script>'
+        )
+        html = html.replace("</head>", f"{token_script}</head>", 1)
+        return HTMLResponse(
+            html,
+            headers={"Cache-Control": "no-store, no-cache, must-revalidate"},
+        )
+
    application.mount("/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets")

    @application.get("/{full_path:path}")
@@ -1819,24 +2076,263 @@ def mount_spa(application: FastAPI):
            and file_path.is_file()
        ):
            return FileResponse(file_path)
-        return FileResponse(
-            WEB_DIST / "index.html",
-            headers={"Cache-Control": "no-store, no-cache, must-revalidate"},
-        )
+        return _serve_index()


+# ---------------------------------------------------------------------------
+# Dashboard theme endpoints
+# ---------------------------------------------------------------------------
+
+# Built-in dashboard themes — label + description only.  The actual color
+# definitions live in the frontend (web/src/themes/presets.ts).
+_BUILTIN_DASHBOARD_THEMES = [
+    {"name": "default",   "label": "Hermes Teal",  "description": "Classic dark teal — the canonical Hermes look"},
+    {"name": "midnight",  "label": "Midnight",      "description": "Deep blue-violet with cool accents"},
+    {"name": "ember",     "label": "Ember",          "description": "Warm crimson and bronze — forge vibes"},
+    {"name": "mono",      "label": "Mono",           "description": "Clean grayscale — minimal and focused"},
+    {"name": "cyberpunk", "label": "Cyberpunk",      "description": "Neon green on black — matrix terminal"},
+    {"name": "rose",      "label": "Rosé",           "description": "Soft pink and warm ivory — easy on the eyes"},
+]
+
+
+def _discover_user_themes() -> list:
+    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
+    themes_dir = get_hermes_home() / "dashboard-themes"
+    if not themes_dir.is_dir():
+        return []
+    result = []
+    for f in sorted(themes_dir.glob("*.yaml")):
+        try:
+            data = yaml.safe_load(f.read_text(encoding="utf-8"))
+            if isinstance(data, dict) and data.get("name"):
+                result.append({
+                    "name": data["name"],
+                    "label": data.get("label", data["name"]),
+                    "description": data.get("description", ""),
+                })
+        except Exception:
+            continue
+    return result
+
+
+@app.get("/api/dashboard/themes")
+async def get_dashboard_themes():
+    """Return available themes and the currently active one."""
+    config = load_config()
+    active = config.get("dashboard", {}).get("theme", "default")
+    user_themes = _discover_user_themes()
+    # Merge built-in + user, user themes override built-in by name.
+    seen = set()
+    themes = []
+    for t in _BUILTIN_DASHBOARD_THEMES:
+        seen.add(t["name"])
+        themes.append(t)
+    for t in user_themes:
+        if t["name"] not in seen:
+            themes.append(t)
+            seen.add(t["name"])
+    return {"themes": themes, "active": active}
+
+
+class ThemeSetBody(BaseModel):
+    name: str
+
+
+@app.put("/api/dashboard/theme")
+async def set_dashboard_theme(body: ThemeSetBody):
+    """Set the active dashboard theme (persists to config.yaml)."""
+    config = load_config()
+    if "dashboard" not in config:
+        config["dashboard"] = {}
+    config["dashboard"]["theme"] = body.name
+    save_config(config)
+    return {"ok": True, "theme": body.name}
+
+
+# ---------------------------------------------------------------------------
+# Dashboard plugin system
+# ---------------------------------------------------------------------------
+
+def _discover_dashboard_plugins() -> list:
+    """Scan plugins/*/dashboard/manifest.json for dashboard extensions.
+
+    Checks three plugin sources (same as hermes_cli.plugins):
+    1. User plugins:    ~/.hermes/plugins/<name>/dashboard/manifest.json
+    2. Bundled plugins: <repo>/plugins/<name>/dashboard/manifest.json  (memory/, etc.)
+    3. Project plugins: ./.hermes/plugins/  (only if HERMES_ENABLE_PROJECT_PLUGINS)
+    """
+    plugins = []
+    seen_names: set = set()
+
+    search_dirs = [
+        (get_hermes_home() / "plugins", "user"),
+        (PROJECT_ROOT / "plugins" / "memory", "bundled"),
+        (PROJECT_ROOT / "plugins", "bundled"),
+    ]
+    if os.environ.get("HERMES_ENABLE_PROJECT_PLUGINS"):
+        search_dirs.append((Path.cwd() / ".hermes" / "plugins", "project"))
+
+    for plugins_root, source in search_dirs:
+        if not plugins_root.is_dir():
+            continue
+        for child in sorted(plugins_root.iterdir()):
+            if not child.is_dir():
+                continue
+            manifest_file = child / "dashboard" / "manifest.json"
+            if not manifest_file.exists():
+                continue
+            try:
+                data = json.loads(manifest_file.read_text(encoding="utf-8"))
+                name = data.get("name", child.name)
+                if name in seen_names:
+                    continue
+                seen_names.add(name)
+                plugins.append({
+                    "name": name,
+                    "label": data.get("label", name),
+                    "description": data.get("description", ""),
+                    "icon": data.get("icon", "Puzzle"),
+                    "version": data.get("version", "0.0.0"),
+                    "tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
+                    "entry": data.get("entry", "dist/index.js"),
+                    "css": data.get("css"),
+                    "has_api": bool(data.get("api")),
+                    "source": source,
+                    "_dir": str(child / "dashboard"),
+                    "_api_file": data.get("api"),
+                })
+            except Exception as exc:
+                _log.warning("Bad dashboard plugin manifest %s: %s", manifest_file, exc)
+                continue
+    return plugins
+
+
+# Cache discovered plugins per-process (refresh on explicit re-scan).
+_dashboard_plugins_cache: Optional[list] = None
+
+
+def _get_dashboard_plugins(force_rescan: bool = False) -> list:
+    global _dashboard_plugins_cache
+    if _dashboard_plugins_cache is None or force_rescan:
+        _dashboard_plugins_cache = _discover_dashboard_plugins()
+    return _dashboard_plugins_cache
+
+
+@app.get("/api/dashboard/plugins")
+async def get_dashboard_plugins():
+    """Return discovered dashboard plugins."""
+    plugins = _get_dashboard_plugins()
+    # Strip internal fields before sending to frontend.
+    return [
+        {k: v for k, v in p.items() if not k.startswith("_")}
+        for p in plugins
+    ]
+
+
+@app.get("/api/dashboard/plugins/rescan")
+async def rescan_dashboard_plugins():
+    """Force re-scan of dashboard plugins."""
+    plugins = _get_dashboard_plugins(force_rescan=True)
+    return {"ok": True, "count": len(plugins)}
+
+
+@app.get("/dashboard-plugins/{plugin_name}/{file_path:path}")
+async def serve_plugin_asset(plugin_name: str, file_path: str):
+    """Serve static assets from a dashboard plugin directory.
+
+    Only serves files from the plugin's ``dashboard/`` subdirectory.
+    Path traversal is blocked by checking ``resolve().is_relative_to()``.
+    """
+    plugins = _get_dashboard_plugins()
+    plugin = next((p for p in plugins if p["name"] == plugin_name), None)
+    if not plugin:
+        raise HTTPException(status_code=404, detail="Plugin not found")
+
+    base = Path(plugin["_dir"])
+    target = (base / file_path).resolve()
+
+    if not target.is_relative_to(base.resolve()):
+        raise HTTPException(status_code=403, detail="Path traversal blocked")
+    if not target.exists() or not target.is_file():
+        raise HTTPException(status_code=404, detail="File not found")
+
+    # Guess content type
+    suffix = target.suffix.lower()
+    content_types = {
+        ".js": "application/javascript",
+        ".mjs": "application/javascript",
+        ".css": "text/css",
+        ".json": "application/json",
+        ".html": "text/html",
+        ".svg": "image/svg+xml",
+        ".png": "image/png",
+        ".jpg": "image/jpeg",
+        ".woff2": "font/woff2",
+        ".woff": "font/woff",
+    }
+    media_type = content_types.get(suffix, "application/octet-stream")
+    return FileResponse(target, media_type=media_type)
+
+
+def _mount_plugin_api_routes():
+    """Import and mount backend API routes from plugins that declare them.
+
+    Each plugin's ``api`` field points to a Python file that must expose
+    a ``router`` (FastAPI APIRouter).  Routes are mounted under
+    ``/api/plugins/<name>/``.
+    """
+    for plugin in _get_dashboard_plugins():
+        api_file_name = plugin.get("_api_file")
+        if not api_file_name:
+            continue
+        api_path = Path(plugin["_dir"]) / api_file_name
+        if not api_path.exists():
+            _log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
+            continue
+        try:
+            spec = importlib.util.spec_from_file_location(
+                f"hermes_dashboard_plugin_{plugin['name']}", api_path,
+            )
+            if spec is None or spec.loader is None:
+                continue
+            mod = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(mod)
+            router = getattr(mod, "router", None)
+            if router is None:
+                _log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
+                continue
+            app.include_router(router, prefix=f"/api/plugins/{plugin['name']}")
+            _log.info("Mounted plugin API routes: /api/plugins/%s/", plugin["name"])
+        except Exception as exc:
+            _log.warning("Failed to load plugin %s API routes: %s", plugin["name"], exc)
+
+
+# Mount plugin API routes before the SPA catch-all.
+_mount_plugin_api_routes()
+
 mount_spa(app)


-def start_server(host: str = "127.0.0.1", port: int = 9119, open_browser: bool = True):
+def start_server(
+    host: str = "127.0.0.1",
+    port: int = 9119,
+    open_browser: bool = True,
+    allow_public: bool = False,
+):
    """Start the web UI server."""
    import uvicorn

-    if host not in ("127.0.0.1", "localhost", "::1"):
-        import logging
-        logging.warning(
-            "Binding to %s — the web UI exposes config and API keys. "
-            "Only bind to non-localhost if you trust all users on the network.", host,
+    _LOCALHOST = ("127.0.0.1", "localhost", "::1")
+    if host not in _LOCALHOST and not allow_public:
+        raise SystemExit(
+            f"Refusing to bind to {host} — the dashboard exposes API keys "
+            f"and config without robust authentication.\n"
+            f"Use --insecure to override (NOT recommended on untrusted networks)."
+        )
+    if host not in _LOCALHOST:
+        _log.warning(
+            "Binding to %s with --insecure — the dashboard has no robust "
+            "authentication. Only use on trusted networks.", host,
        )

    if open_browser:
@@ -78,6 +78,10 @@ def set_session_context(session_id: str) -> None:
    _session_context.session_id = session_id


+def clear_session_context() -> None:
+    """Clear the session ID for the current thread."""
+    _session_context.session_id = None
+

 # ---------------------------------------------------------------------------
 # Record factory — injects session_tag into every LogRecord at creation
@@ -354,6 +358,7 @@ def _add_rotating_handler(
    path.parent.mkdir(parents=True, exist_ok=True)
    handler = _ManagedRotatingFileHandler(
        str(path), maxBytes=max_bytes, backupCount=backup_count,
+        encoding="utf-8",
    )
    handler.setLevel(level)
    handler.setFormatter(formatter)
--- a/Show More
+++ b/Show More