perf(skills-page): lazy-fetch the catalog instead of bundling 34MB into JS (#33809)
PR #33748 grew the live skills index from ~2k skills to ~69k, which made the previous build-time bundling strategy untenable: the skills page's JS chunk was about to balloon from ~1MB to ~35MB. Initial page load on mobile became unusable, search lagged on every keystroke against the 68k-item array, and JSON.parse blocked the main thread at startup. Three changes: 1. extract-skills.py writes skills.json + skills-meta.json into website/static/api/ instead of website/src/data/. Static-served by Vercel as /docs/api/skills.json (gzipped on the wire), same CDN that already serves skills-index.json. 2. skills/index.tsx drops the static import and fetches both files in parallel on mount. Loading state shows '…' for the count; failures surface a small error pill instead of blanking the page. 3. Search is debounced 150ms and runs against a precomputed lowercase haystack stamped onto each row at load time. Before: array-join + toLowerCase per row per keystroke on a 68k array. After: single .includes() per row, deferred until typing settles. Validation: | | before | after | |---|---|---| | skills.json location | src/data/ (bundled) | static/api/ (CDN) | | Largest JS chunk | would be ~35MB at 68k skills | 659 KB | | Initial page render | wait for full parse | immediate, fetch async | | Per-keystroke filter | join+lowercase x 68k rows | single includes x 68k rows | | Debounce | none | 150ms | Built locally for both en and zh-Hans locales; the 34MB skills.json now lives in build/api/ and is served separately rather than inlined into the page's bundle. skills.json and skills-meta.json added to .gitignore — they were already build artifacts, but the gitignore only listed skills-index.json before.
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -78,6 +78,12 @@ mini-swe-agent/
|
|||||||
.nix-stamps/
|
.nix-stamps/
|
||||||
result
|
result
|
||||||
website/static/api/skills-index.json
|
website/static/api/skills-index.json
|
||||||
|
# skills.json + skills-meta.json are build artifacts emitted by
|
||||||
|
# website/scripts/extract-skills.py during prebuild — keep them out of
|
||||||
|
# git for the same reason as skills-index.json (large, generated, change
|
||||||
|
# every build).
|
||||||
|
website/static/api/skills.json
|
||||||
|
website/static/api/skills-meta.json
|
||||||
models-dev-upstream/
|
models-dev-upstream/
|
||||||
hermes_cli/tui_dist/*
|
hermes_cli/tui_dist/*
|
||||||
hermes_cli/scripts/
|
hermes_cli/scripts/
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Extract skill metadata into website/src/data/skills.json for the Skills Hub page.
|
"""Extract skill metadata into website/static/api/skills.json for the Skills Hub page.
|
||||||
|
|
||||||
Two data sources:
|
Two data sources:
|
||||||
|
|
||||||
@@ -32,8 +32,12 @@ LOCAL_SKILL_DIRS = [
|
|||||||
]
|
]
|
||||||
UNIFIED_INDEX_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
|
UNIFIED_INDEX_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
|
||||||
LEGACY_INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
|
LEGACY_INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
|
||||||
OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills.json")
|
# Output to static/api/ so the file is CDN-served at /api/skills.json
|
||||||
META_OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills-meta.json")
|
# rather than bundled into the page's JS chunk. At 50k+ skills the
|
||||||
|
# bundled payload was ~26 MB; lazy-fetch keeps the initial page load
|
||||||
|
# fast and shrinks the JS chunk back to a few hundred KB.
|
||||||
|
OUTPUT = os.path.join(REPO_ROOT, "website", "static", "api", "skills.json")
|
||||||
|
META_OUTPUT = os.path.join(REPO_ROOT, "website", "static", "api", "skills-meta.json")
|
||||||
|
|
||||||
CATEGORY_LABELS = {
|
CATEGORY_LABELS = {
|
||||||
"apple": "Apple",
|
"apple": "Apple",
|
||||||
@@ -531,7 +535,9 @@ def main():
|
|||||||
|
|
||||||
os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
|
os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
|
||||||
with open(OUTPUT, "w", encoding="utf-8") as f:
|
with open(OUTPUT, "w", encoding="utf-8") as f:
|
||||||
json.dump(all_skills, f, indent=2)
|
# Minified — file is served over the wire, not read by humans.
|
||||||
|
# At 50k+ skills the indented version was ~30% larger.
|
||||||
|
json.dump(all_skills, f, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
|
||||||
# Sidecar meta file so the page can render a "Last refreshed" badge
|
# Sidecar meta file so the page can render a "Last refreshed" badge
|
||||||
# without changing the shape of skills.json.
|
# without changing the shape of skills.json.
|
||||||
@@ -547,7 +553,7 @@ def main():
|
|||||||
if index_meta:
|
if index_meta:
|
||||||
meta.update(index_meta)
|
meta.update(index_meta)
|
||||||
with open(META_OUTPUT, "w", encoding="utf-8") as f:
|
with open(META_OUTPUT, "w", encoding="utf-8") as f:
|
||||||
json.dump(meta, f, indent=2)
|
json.dump(meta, f, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
|
||||||
print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
|
print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
|
||||||
print(f" {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
|
print(f" {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
#!/usr/bin/env node
|
#!/usr/bin/env node
|
||||||
// Runs website/scripts/extract-skills.py and generate-llms-txt.py before
|
// Runs website/scripts/extract-skills.py and generate-llms-txt.py before
|
||||||
// docusaurus build/start so that:
|
// docusaurus build/start so that:
|
||||||
// - website/src/data/skills.json (imported by src/pages/skills/index.tsx)
|
// - website/static/api/skills.json (lazy-fetched by src/pages/skills/index.tsx)
|
||||||
|
// - website/static/api/skills-meta.json (sidecar metadata for the Skills Hub)
|
||||||
// - website/static/llms.txt (agent-friendly short docs index)
|
// - website/static/llms.txt (agent-friendly short docs index)
|
||||||
// - website/static/llms-full.txt (full docs concat for LLM context)
|
// - website/static/llms-full.txt (full docs concat for LLM context)
|
||||||
// all exist without contributors remembering to run Python scripts manually.
|
// all exist without contributors remembering to run Python scripts manually.
|
||||||
@@ -30,7 +31,7 @@ const scriptDir = dirname(fileURLToPath(import.meta.url));
|
|||||||
const websiteDir = resolve(scriptDir, "..");
|
const websiteDir = resolve(scriptDir, "..");
|
||||||
const extractScript = join(scriptDir, "extract-skills.py");
|
const extractScript = join(scriptDir, "extract-skills.py");
|
||||||
const llmsScript = join(scriptDir, "generate-llms-txt.py");
|
const llmsScript = join(scriptDir, "generate-llms-txt.py");
|
||||||
const outputFile = join(websiteDir, "src", "data", "skills.json");
|
const outputFile = join(websiteDir, "static", "api", "skills.json");
|
||||||
const unifiedIndexFile = join(websiteDir, "static", "api", "skills-index.json");
|
const unifiedIndexFile = join(websiteDir, "static", "api", "skills-index.json");
|
||||||
const UNIFIED_INDEX_URL =
|
const UNIFIED_INDEX_URL =
|
||||||
"https://hermes-agent.nousresearch.com/docs/api/skills-index.json";
|
"https://hermes-agent.nousresearch.com/docs/api/skills-index.json";
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
import React, { useState, useMemo, useCallback, useRef, useEffect } from "react";
|
import React, { useState, useMemo, useCallback, useRef, useEffect } from "react";
|
||||||
import Layout from "@theme/Layout";
|
import Layout from "@theme/Layout";
|
||||||
import skills from "../../data/skills.json";
|
|
||||||
import meta from "../../data/skills-meta.json";
|
|
||||||
import styles from "./styles.module.css";
|
import styles from "./styles.module.css";
|
||||||
|
|
||||||
interface Skill {
|
interface Skill {
|
||||||
@@ -21,9 +19,14 @@ interface Skill {
|
|||||||
docsPath?: string;
|
docsPath?: string;
|
||||||
identifier?: string;
|
identifier?: string;
|
||||||
installCmd?: string;
|
installCmd?: string;
|
||||||
|
/** Lowercase pre-joined haystack used by the search filter.
|
||||||
|
* Built once at load time so per-keystroke filtering is a single
|
||||||
|
* `.includes()` per skill instead of array-join + toLowerCase on
|
||||||
|
* every render. Skipped on the wire — added in the loader. */
|
||||||
|
_search?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const allSkills: Skill[] = skills as Skill[];
|
const allSkills: Skill[] = [];
|
||||||
|
|
||||||
interface IndexMeta {
|
interface IndexMeta {
|
||||||
extractedAt?: string;
|
extractedAt?: string;
|
||||||
@@ -32,7 +35,7 @@ interface IndexMeta {
|
|||||||
externalSource?: string;
|
externalSource?: string;
|
||||||
bySource?: Record<string, number>;
|
bySource?: Record<string, number>;
|
||||||
}
|
}
|
||||||
const indexMeta: IndexMeta = meta as IndexMeta;
|
const indexMeta: IndexMeta = {};
|
||||||
|
|
||||||
function formatRelativeTime(iso?: string): string | null {
|
function formatRelativeTime(iso?: string): string | null {
|
||||||
if (!iso) return null;
|
if (!iso) return null;
|
||||||
@@ -398,8 +401,43 @@ function StatCard({ value, label, color }: { value: number; label: string; color
|
|||||||
|
|
||||||
const PAGE_SIZE = 60;
|
const PAGE_SIZE = 60;
|
||||||
|
|
||||||
|
// Routes Docusaurus serves the static API JSON from. `baseUrl` is `/docs/`,
|
||||||
|
// `static/api/` ends up at `/docs/api/`. Hardcoding here is fine because the
|
||||||
|
// same `baseUrl` is enforced repo-wide; if it ever changes, this is the only
|
||||||
|
// place that needs to follow.
|
||||||
|
const SKILLS_URL = "/docs/api/skills.json";
|
||||||
|
const META_URL = "/docs/api/skills-meta.json";
|
||||||
|
|
||||||
|
function buildSearchHaystack(s: Skill): string {
|
||||||
|
// Pre-compute the lowercase blob the search filter scans. Done once at
|
||||||
|
// load time instead of per-keystroke per-skill. With 50k+ skills the
|
||||||
|
// per-keystroke variant was unusably slow.
|
||||||
|
return [
|
||||||
|
s.name,
|
||||||
|
s.description,
|
||||||
|
s.overview,
|
||||||
|
s.categoryLabel,
|
||||||
|
s.author,
|
||||||
|
...(s.tags || []),
|
||||||
|
]
|
||||||
|
.filter(Boolean)
|
||||||
|
.join(" ")
|
||||||
|
.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
export default function SkillsDashboard() {
|
export default function SkillsDashboard() {
|
||||||
|
// Lazy-loaded data. Was bundled into the JS chunk (~22 MB at 50k skills,
|
||||||
|
// which made the initial page load unusable on mobile). Now fetched on
|
||||||
|
// mount from the same CDN that serves the docs.
|
||||||
|
const [data, setData] = useState<{ skills: Skill[]; meta: IndexMeta } | null>(null);
|
||||||
|
const [loadError, setLoadError] = useState<string | null>(null);
|
||||||
|
|
||||||
const [search, setSearch] = useState("");
|
const [search, setSearch] = useState("");
|
||||||
|
// Debounced copy of `search` — used by the filter. Without the debounce,
|
||||||
|
// typing into the search box ran .filter() over the whole catalog on
|
||||||
|
// every keystroke, which on a 50k-item list felt like the page had
|
||||||
|
// hung. 150ms gives a snappy feel without lagging behind the user.
|
||||||
|
const [debouncedSearch, setDebouncedSearch] = useState("");
|
||||||
const [sourceFilter, setSourceFilter] = useState("all");
|
const [sourceFilter, setSourceFilter] = useState("all");
|
||||||
const [categoryFilter, setCategoryFilter] = useState("all");
|
const [categoryFilter, setCategoryFilter] = useState("all");
|
||||||
const [expandedCard, setExpandedCard] = useState<string | null>(null);
|
const [expandedCard, setExpandedCard] = useState<string | null>(null);
|
||||||
@@ -408,6 +446,42 @@ export default function SkillsDashboard() {
|
|||||||
const searchRef = useRef<HTMLInputElement>(null);
|
const searchRef = useRef<HTMLInputElement>(null);
|
||||||
const gridRef = useRef<HTMLDivElement>(null);
|
const gridRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
let cancelled = false;
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
const [sk, mt] = await Promise.all([
|
||||||
|
fetch(SKILLS_URL).then((r) => {
|
||||||
|
if (!r.ok) throw new Error(`skills.json HTTP ${r.status}`);
|
||||||
|
return r.json();
|
||||||
|
}),
|
||||||
|
fetch(META_URL).then((r) => (r.ok ? r.json() : {})).catch(() => ({})),
|
||||||
|
]);
|
||||||
|
if (cancelled) return;
|
||||||
|
const skillsArr = Array.isArray(sk) ? (sk as Skill[]) : [];
|
||||||
|
// Stamp the precomputed search haystack onto each row.
|
||||||
|
for (const s of skillsArr) s._search = buildSearchHaystack(s);
|
||||||
|
setData({ skills: skillsArr, meta: mt || {} });
|
||||||
|
} catch (err) {
|
||||||
|
if (cancelled) return;
|
||||||
|
setLoadError(err instanceof Error ? err.message : String(err));
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
return () => {
|
||||||
|
cancelled = true;
|
||||||
|
};
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Debounce the search input — 150ms feels instant while preventing the
|
||||||
|
// filter from running on every individual keystroke.
|
||||||
|
useEffect(() => {
|
||||||
|
const t = setTimeout(() => setDebouncedSearch(search), 150);
|
||||||
|
return () => clearTimeout(t);
|
||||||
|
}, [search]);
|
||||||
|
|
||||||
|
const allSkillsLocal: Skill[] = data?.skills ?? [];
|
||||||
|
const indexMetaLocal: IndexMeta = data?.meta ?? indexMeta;
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const handler = (e: KeyboardEvent) => {
|
const handler = (e: KeyboardEvent) => {
|
||||||
if (e.key === "/" && document.activeElement?.tagName !== "INPUT") {
|
if (e.key === "/" && document.activeElement?.tagName !== "INPUT") {
|
||||||
@@ -424,15 +498,15 @@ export default function SkillsDashboard() {
|
|||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const sources = useMemo(() => {
|
const sources = useMemo(() => {
|
||||||
const set = new Set(allSkills.map((s) => s.source));
|
const set = new Set(allSkillsLocal.map((s) => s.source));
|
||||||
return SOURCE_ORDER.filter((s) => s === "all" || set.has(s));
|
return SOURCE_ORDER.filter((s) => s === "all" || set.has(s));
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const categoryEntries = useMemo(() => {
|
const categoryEntries = useMemo(() => {
|
||||||
const pool =
|
const pool =
|
||||||
sourceFilter === "all"
|
sourceFilter === "all"
|
||||||
? allSkills
|
? allSkillsLocal
|
||||||
: allSkills.filter((s) => s.source === sourceFilter);
|
: allSkillsLocal.filter((s) => s.source === sourceFilter);
|
||||||
const map = new Map<string, { label: string; count: number }>();
|
const map = new Map<string, { label: string; count: number }>();
|
||||||
for (const s of pool) {
|
for (const s of pool) {
|
||||||
const key = s.category || "uncategorized";
|
const key = s.category || "uncategorized";
|
||||||
@@ -452,24 +526,22 @@ export default function SkillsDashboard() {
|
|||||||
}, [sourceFilter]);
|
}, [sourceFilter]);
|
||||||
|
|
||||||
const filtered = useMemo(() => {
|
const filtered = useMemo(() => {
|
||||||
const q = search.toLowerCase().trim();
|
const q = debouncedSearch.toLowerCase().trim();
|
||||||
return allSkills.filter((s) => {
|
return allSkillsLocal.filter((s) => {
|
||||||
if (sourceFilter !== "all" && s.source !== sourceFilter) return false;
|
if (sourceFilter !== "all" && s.source !== sourceFilter) return false;
|
||||||
if (categoryFilter !== "all" && s.category !== categoryFilter) return false;
|
if (categoryFilter !== "all" && s.category !== categoryFilter) return false;
|
||||||
if (q) {
|
if (q) {
|
||||||
const haystack = [s.name, s.description, s.overview, s.categoryLabel, s.author, ...(s.tags || [])]
|
// _search is pre-built in the load effect — single .includes() per row.
|
||||||
.join(" ")
|
return (s._search || "").includes(q);
|
||||||
.toLowerCase();
|
|
||||||
return haystack.includes(q);
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
}, [search, sourceFilter, categoryFilter]);
|
}, [debouncedSearch, sourceFilter, categoryFilter, allSkillsLocal]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setVisibleCount(PAGE_SIZE);
|
setVisibleCount(PAGE_SIZE);
|
||||||
setExpandedCard(null);
|
setExpandedCard(null);
|
||||||
}, [search, sourceFilter, categoryFilter]);
|
}, [debouncedSearch, sourceFilter, categoryFilter]);
|
||||||
|
|
||||||
const visible = filtered.slice(0, visibleCount);
|
const visible = filtered.slice(0, visibleCount);
|
||||||
const hasMore = visibleCount < filtered.length;
|
const hasMore = visibleCount < filtered.length;
|
||||||
@@ -512,15 +584,22 @@ export default function SkillsDashboard() {
|
|||||||
<h1 className={styles.heroTitle}>Skills Hub</h1>
|
<h1 className={styles.heroTitle}>Skills Hub</h1>
|
||||||
<p className={styles.heroSub}>
|
<p className={styles.heroSub}>
|
||||||
Discover, search, and install from{" "}
|
Discover, search, and install from{" "}
|
||||||
<strong className={styles.heroAccent}>{allSkills.length}</strong> skills
|
<strong className={styles.heroAccent}>
|
||||||
across {sources.length - 1} registries
|
{data ? allSkillsLocal.length.toLocaleString() : "…"}
|
||||||
|
</strong>{" "}
|
||||||
|
skills across {sources.length - 1} registries
|
||||||
|
{loadError && (
|
||||||
|
<span style={{ color: "#f87171", marginLeft: 8 }}>
|
||||||
|
· failed to load catalog ({loadError})
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</p>
|
</p>
|
||||||
{(indexMeta?.indexGeneratedAt || indexMeta?.extractedAt) && (
|
{(indexMetaLocal?.indexGeneratedAt || indexMetaLocal?.extractedAt) && (
|
||||||
<p className={styles.heroSub} style={{ fontSize: "0.85rem", opacity: 0.75 }}>
|
<p className={styles.heroSub} style={{ fontSize: "0.85rem", opacity: 0.75 }}>
|
||||||
Catalog refreshed{" "}
|
Catalog refreshed{" "}
|
||||||
<span title={indexMeta.indexGeneratedAt || indexMeta.extractedAt}>
|
<span title={indexMetaLocal.indexGeneratedAt || indexMetaLocal.extractedAt}>
|
||||||
{formatRelativeTime(
|
{formatRelativeTime(
|
||||||
indexMeta.indexGeneratedAt || indexMeta.extractedAt,
|
indexMetaLocal.indexGeneratedAt || indexMetaLocal.extractedAt,
|
||||||
) || "recently"}
|
) || "recently"}
|
||||||
</span>
|
</span>
|
||||||
{" "}· auto-rebuilt twice daily
|
{" "}· auto-rebuilt twice daily
|
||||||
@@ -529,18 +608,18 @@ export default function SkillsDashboard() {
|
|||||||
|
|
||||||
<div className={styles.statsRow}>
|
<div className={styles.statsRow}>
|
||||||
<StatCard
|
<StatCard
|
||||||
value={allSkills.filter((s) => s.source === "built-in").length}
|
value={allSkillsLocal.filter((s) => s.source === "built-in").length}
|
||||||
label="Built-in"
|
label="Built-in"
|
||||||
color="#4ade80"
|
color="#4ade80"
|
||||||
/>
|
/>
|
||||||
<StatCard
|
<StatCard
|
||||||
value={allSkills.filter((s) => s.source === "optional").length}
|
value={allSkillsLocal.filter((s) => s.source === "optional").length}
|
||||||
label="Optional"
|
label="Optional"
|
||||||
color="#fbbf24"
|
color="#fbbf24"
|
||||||
/>
|
/>
|
||||||
<StatCard
|
<StatCard
|
||||||
value={
|
value={
|
||||||
allSkills.filter(
|
allSkillsLocal.filter(
|
||||||
(s) => s.source !== "built-in" && s.source !== "optional"
|
(s) => s.source !== "built-in" && s.source !== "optional"
|
||||||
).length
|
).length
|
||||||
}
|
}
|
||||||
@@ -548,7 +627,7 @@ export default function SkillsDashboard() {
|
|||||||
color="#60a5fa"
|
color="#60a5fa"
|
||||||
/>
|
/>
|
||||||
<StatCard
|
<StatCard
|
||||||
value={new Set(allSkills.map((s) => s.category)).size}
|
value={new Set(allSkillsLocal.map((s) => s.category)).size}
|
||||||
label="Categories"
|
label="Categories"
|
||||||
color="#a78bfa"
|
color="#a78bfa"
|
||||||
/>
|
/>
|
||||||
@@ -592,8 +671,8 @@ export default function SkillsDashboard() {
|
|||||||
const conf = SOURCE_CONFIG[src];
|
const conf = SOURCE_CONFIG[src];
|
||||||
const count =
|
const count =
|
||||||
src === "all"
|
src === "all"
|
||||||
? allSkills.length
|
? allSkillsLocal.length
|
||||||
: allSkills.filter((s) => s.source === src).length;
|
: allSkillsLocal.filter((s) => s.source === src).length;
|
||||||
return (
|
return (
|
||||||
<button
|
<button
|
||||||
key={src}
|
key={src}
|
||||||
|
|||||||
Reference in New Issue
Block a user