feat: add llm_debug_log table for persistent LLM failure diagnostics

Captures full prompt content, response text, HTTP status, and finish reason on every LLM failure path. Replaces silent null returns in openRouterClient with structured CompletionFailure objects so error details propagate up through the queue and service layers. Also removes recipeList from backstoryAndDesires prompt (not useful for backstory generation) and adds schema migration v3. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 18:35:44 +00:00
parent 18f649d766
commit 087426e894
12 changed files with 153 additions and 26 deletions
--- a/server/src/llm/tests/backstoryGenerator.test.ts
+++ b/server/src/llm/tests/backstoryGenerator.test.ts
@@ -236,7 +236,6 @@ describe('generateBackstoryAndDesires', () => {
    expect(mockLlm.generate).toHaveBeenCalledWith('backstoryAndDesires', expect.objectContaining({
      npcName: 'Ava',
      resourceTypes: expect.stringContaining('Log'),
-      recipeList: expect.stringContaining('craft_wooden_axe'),
      structureList: 'none',
    }));
  });
--- a/server/src/llm/tests/openRouterClient.test.ts
+++ b/server/src/llm/tests/openRouterClient.test.ts
@@ -1,5 +1,5 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import { createOpenRouterClient } from '../openRouterClient.js';
+import { createOpenRouterClient, isFailed } from '../openRouterClient.js';
 import type { LlmConfig } from '../../config/llmConfig.js';

 const mockConfig: LlmConfig = {
@@ -114,7 +114,7 @@ describe('openRouterClient', () => {
    });
  });

-  it('returns null on non-429 HTTP errors', async () => {
+  it('returns failure on non-429 HTTP errors', async () => {
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: false,
      status: 500,
@@ -127,10 +127,13 @@ describe('openRouterClient', () => {
      user: 'usr',
    });

-    expect(result).toBeNull();
+    expect(isFailed(result)).toBe(true);
+    if (isFailed(result)) {
+      expect(result.httpStatus).toBe(500);
+    }
  });

-  it('returns null on network error', async () => {
+  it('returns failure on network error', async () => {
    globalThis.fetch = vi.fn().mockRejectedValue(new Error('Network error'));

    const client = createOpenRouterClient(mockConfig);
@@ -139,10 +142,13 @@ describe('openRouterClient', () => {
      user: 'usr',
    });

-    expect(result).toBeNull();
+    expect(isFailed(result)).toBe(true);
+    if (isFailed(result)) {
+      expect(result.errorMessage).toBe('Network error');
+    }
  });

-  it('returns null when response has no choices', async () => {
+  it('returns failure when response has no choices', async () => {
    globalThis.fetch = vi.fn().mockResolvedValue({
      ok: true,
      json: () => Promise.resolve({ choices: [] }),
@@ -154,7 +160,10 @@ describe('openRouterClient', () => {
      user: 'usr',
    });

-    expect(result).toBeNull();
+    expect(isFailed(result)).toBe(true);
+    if (isFailed(result)) {
+      expect(result.httpStatus).toBe(200);
+    }
  });

  it('includes retries: 0 on first success', async () => {
--- a/server/src/llm/backstoryGenerator.ts
+++ b/server/src/llm/backstoryGenerator.ts
@@ -5,6 +5,7 @@ import type { ItemRegistry } from '../industry/itemRegistry.js';
 import type { RecipeRegistry } from '../industry/recipeRegistry.js';
 import type { StructureData } from '../systems/buildingSystem.js';
 import type { LogService } from '../services/logService.js';
+import { logLlmDebug } from './llmDebugLog.js';

 const STAT_KEYS: (keyof Stats)[] = [
  'strength', 'dexterity', 'constitution', 'intelligence', 'perception',
@@ -96,25 +97,32 @@ export async function generateBackstoryAndDesires(

  const context = getWorldContext(world);

-  const result = await llmService.generate('backstoryAndDesires', {
+  const promptVars = {
    npcName: name,
    stats: formatStatsForPrompt(stats),
    resourceTypes: context.resourceTypes,
-    recipeList: context.recipeList,
    structureList: context.structureList,
-  });
+  };
+
+  const promptSize = Object.values(promptVars).reduce((sum, v) => sum + v.length, 0);
+  console.log(`[Backstory] requesting for ${name} (${entityId}) promptVarSize=${promptSize} resources=${context.resourceTypes.length}ch structures=${context.structureList.length}ch`);
+
+  const result = await llmService.generate('backstoryAndDesires', promptVars);

  if (!result) {
-    logService?.log('warning', 'LLM', `Backstory generation returned null for ${name} (entity ${entityId})`);
+    logService?.log('warning', 'LLM', `Backstory generation returned null for ${name} (entity ${entityId}), promptSize=${promptSize}`);
    return;
  }

+  console.log(`[Backstory] got response for ${name} (${entityId}), length=${result.length}`);
+
  let parsed: { backstory?: string; desires?: unknown[] } | null = null;
  try {
    parsed = JSON.parse(result);
  } catch {
    // JSON parse failed — use raw string as backstory fallback
    logService?.log('warning', 'LLM', `JSON parse failed for backstory (entity ${entityId}), using raw string`);
+    logLlmDebug({ templateName: 'backstoryAndDesires', stage: 'parse_failed', response: result });
    world.addComponent<string>(entityId, 'backstory', result);
    return;
  }
--- a/server/src/llm/generationQueue.ts
+++ b/server/src/llm/generationQueue.ts
@@ -1,4 +1,4 @@
-import type { OpenRouterClient, CompletionRequest, CompletionResult } from './openRouterClient.js';
+import type { OpenRouterClient, CompletionRequest, CompletionResult, CompletionFailure } from './openRouterClient.js';

 interface QueueItem {
  request: CompletionRequest;
@@ -33,7 +33,7 @@ export function createGenerationQueue(
      item.resolve(result);
    }).catch((err) => {
      console.warn('[LLM Queue] Request failed:', err?.message ?? err);
-      item.resolve(null);
+      item.resolve({ failed: true, errorMessage: err?.message ?? String(err) });
    }).finally(() => {
      processing = false;
      if (queue.length > 0) {
--- a/server/src/llm/llmDebugLog.ts
+++ b/server/src/llm/llmDebugLog.ts
@@ -0,0 +1,36 @@
+import { getDatabase } from '../persistence/database.js';
+
+export interface LlmDebugEntry {
+  templateName: string;
+  stage: string;
+  systemPrompt?: string;
+  userPrompt?: string;
+  response?: string;
+  error?: string;
+  model?: string;
+  httpStatus?: number;
+  finishReason?: string;
+}
+
+export function logLlmDebug(entry: LlmDebugEntry): void {
+  try {
+    const db = getDatabase();
+    db.prepare(`
+      INSERT INTO llm_debug_log (timestamp, template_name, stage, system_prompt, user_prompt, response, error, model, http_status, finish_reason)
+      VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    `).run(
+      new Date().toISOString(),
+      entry.templateName,
+      entry.stage,
+      entry.systemPrompt ?? null,
+      entry.userPrompt ?? null,
+      entry.response ?? null,
+      entry.error ?? null,
+      entry.model ?? null,
+      entry.httpStatus ?? null,
+      entry.finishReason ?? null,
+    );
+  } catch {
+    // Don't break gameplay if debug logging fails
+  }
+}
--- a/server/src/llm/llmService.ts
+++ b/server/src/llm/llmService.ts
@@ -1,5 +1,5 @@
 import { getLlmConfig } from '../config/llmConfig.js';
-import { createOpenRouterClient, isRateLimited, isSuccess } from './openRouterClient.js';
+import { createOpenRouterClient, isRateLimited, isSuccess, isFailed } from './openRouterClient.js';
 import { createGenerationQueue } from './generationQueue.js';
 import { createUsageCounters } from './usageCounters.js';
 import { renderTemplate } from './promptTemplate.js';
@@ -7,6 +7,7 @@ import { templates } from './templates.js';
 import { createTokenTracker, type TokenTracker } from './tokenTracker.js';
 import type { LogService } from '../services/logService.js';
 import type { LlmStatsService } from './llmStatsService.js';
+import { logLlmDebug } from './llmDebugLog.js';

 export interface LlmService {
  generate(templateName: string, variables: Record<string, string>): Promise<string | null>;
@@ -93,6 +94,10 @@ export function createLlmService(logService?: LogService, statsService?: LlmStat

      if (isRateLimited(result)) {
        logService?.log('warning', 'LLM', 'Rate limited, switching to fallback');
+        logLlmDebug({
+          templateName, stage: 'rate_limited', model: currentModel,
+          systemPrompt: rendered.system, userPrompt: rendered.user,
+        });
        switchToFallback(result.resetAt);
        statsService?.record(templateName, 0, 0, currentModel, 0, true, 0);
        return null;
@@ -115,9 +120,25 @@ export function createLlmService(logService?: LogService, statsService?: LlmStat
        return result.content;
      }

-      // null result = failure
+      // failure result
+      if (isFailed(result)) {
+        console.log(`[LLM] generate('${templateName}') failed: http=${result.httpStatus} finish=${result.finishReason} err=${result.errorMessage}`);
+        logLlmDebug({
+          templateName, stage: 'api_failure', model: currentModel,
+          systemPrompt: rendered.system, userPrompt: rendered.user,
+          error: result.errorMessage,
+          httpStatus: result.httpStatus,
+          finishReason: result.finishReason,
+        });
+      } else {
+        console.log(`[LLM] generate('${templateName}') returned null from queue`);
+        logLlmDebug({
+          templateName, stage: 'queue_null', model: currentModel,
+          systemPrompt: rendered.system, userPrompt: rendered.user,
+        });
+      }
      statsService?.record(templateName, 0, 0, currentModel, 0, true, 0);
-      return result;
+      return null;
    },

    queueDepth(): number {
--- a/server/src/llm/openRouterClient.ts
+++ b/server/src/llm/openRouterClient.ts
@@ -38,7 +38,14 @@ export interface CompletionSuccess {
  retries: number;
 }

-export type CompletionResult = CompletionSuccess | null | RateLimitInfo;
+export interface CompletionFailure {
+  failed: true;
+  httpStatus?: number;
+  finishReason?: string;
+  errorMessage?: string;
+}
+
+export type CompletionResult = CompletionSuccess | null | RateLimitInfo | CompletionFailure;

 export function isRateLimited(result: CompletionResult): result is RateLimitInfo {
  return result != null && typeof result === 'object' && 'rateLimited' in result;
@@ -48,6 +55,10 @@ export function isSuccess(result: CompletionResult): result is CompletionSuccess
  return result != null && typeof result === 'object' && 'content' in result;
 }

+export function isFailed(result: CompletionResult): result is CompletionFailure {
+  return result != null && typeof result === 'object' && 'failed' in result;
+}
+
 export interface OpenRouterClient {
  complete(request: CompletionRequest): Promise<CompletionResult>;
 }
@@ -97,12 +108,17 @@ export function createOpenRouterClient(config: LlmConfig, logService?: LogServic
            }
            console.warn(`OpenRouter API error: ${response.status} ${response.statusText}`);
            logService?.log('error', 'LLM', `OpenRouter API error: ${response.status} ${response.statusText}`);
-            return null;
+            return { failed: true, httpStatus: response.status, errorMessage: response.statusText };
          }

          const data = await response.json();
          const content = data?.choices?.[0]?.message?.content;
-          if (!content) return null;
+          if (!content) {
+            const finishReason = data?.choices?.[0]?.finish_reason;
+            const errMsg = data?.error?.message;
+            console.log(`[LLM] API 200 but no content: finish_reason=${finishReason} error=${errMsg ?? 'none'} model=${request.model ?? 'default'}`);
+            return { failed: true, httpStatus: 200, finishReason, errorMessage: errMsg };
+          }
          const usage = data?.usage;
          return {
            content,
@@ -126,7 +142,7 @@ export function createOpenRouterClient(config: LlmConfig, logService?: LogServic

      console.warn('OpenRouter request failed:', lastError?.message);
      logService?.log('error', 'LLM', `OpenRouter request failed: ${lastError?.message}`);
-      return null;
+      return { failed: true, errorMessage: lastError?.message };
    },
  };
 }
--- a/server/src/llm/templates.ts
+++ b/server/src/llm/templates.ts
@@ -80,7 +80,6 @@ export const templates: Record<string, PromptTemplate> = {
      'Stats (each ranges 3-18, 10 is average): {{stats}}\n\n' +
      'Current world state:\n' +
      '- Available resources: {{resourceTypes}}\n' +
-      '- Known recipes: {{recipeList}}\n' +
      '- Existing structures: {{structureList}}\n\n' +
      'Respond with JSON:\n' +
      '{"backstory": "1-2 sentence backstory", "desires": [' +
--- a/server/src/persistence/tests/database.test.ts
+++ b/server/src/persistence/tests/database.test.ts
@@ -53,7 +53,7 @@ describe('database', () => {
    openDatabase(dbPath);
    const version = getSchemaVersion();
    expect(version).toBe(CURRENT_SCHEMA_VERSION);
-    expect(version).toBe(2);
+    expect(version).toBe(3);
  });

  it('returns same db on multiple getDatabase calls', () => {
@@ -102,8 +102,8 @@ describe('database', () => {
    expect(result.journal_mode).toBe('wal');
  });

-  it('CURRENT_SCHEMA_VERSION is 2', () => {
-    expect(CURRENT_SCHEMA_VERSION).toBe(2);
+  it('CURRENT_SCHEMA_VERSION is 3', () => {
+    expect(CURRENT_SCHEMA_VERSION).toBe(3);
  });

  it('creates llm_call_log table in new databases', () => {
--- a/server/src/persistence/database.ts
+++ b/server/src/persistence/database.ts
@@ -1,6 +1,6 @@
 import Database from 'better-sqlite3';

-export const CURRENT_SCHEMA_VERSION = 2;
+export const CURRENT_SCHEMA_VERSION = 3;

 let db: Database.Database | null = null;

@@ -117,6 +117,23 @@ CREATE TABLE IF NOT EXISTS llm_call_log (
 );

 CREATE INDEX IF NOT EXISTS idx_llm_call_log_template ON llm_call_log(template_name);
+
+CREATE TABLE IF NOT EXISTS llm_debug_log (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  timestamp TEXT NOT NULL,
+  template_name TEXT NOT NULL,
+  stage TEXT NOT NULL,
+  system_prompt TEXT,
+  user_prompt TEXT,
+  response TEXT,
+  error TEXT,
+  model TEXT,
+  http_status INTEGER,
+  finish_reason TEXT
+);
+
+CREATE INDEX IF NOT EXISTS idx_llm_debug_log_template ON llm_debug_log(template_name);
+CREATE INDEX IF NOT EXISTS idx_llm_debug_log_stage ON llm_debug_log(stage);
 `;

 export function openDatabase(filepath: string): void {
--- a/server/src/persistence/saveManager.ts
+++ b/server/src/persistence/saveManager.ts
@@ -47,6 +47,25 @@ export class SaveManager {
        `);
        db.prepare("INSERT OR IGNORE INTO metadata (key, value) VALUES ('llm_tracking_enabled', '1')").run();
      }
+      if (version < 3) {
+        db.exec(`
+          CREATE TABLE IF NOT EXISTS llm_debug_log (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            timestamp TEXT NOT NULL,
+            template_name TEXT NOT NULL,
+            stage TEXT NOT NULL,
+            system_prompt TEXT,
+            user_prompt TEXT,
+            response TEXT,
+            error TEXT,
+            model TEXT,
+            http_status INTEGER,
+            finish_reason TEXT
+          );
+          CREATE INDEX IF NOT EXISTS idx_llm_debug_log_template ON llm_debug_log(template_name);
+          CREATE INDEX IF NOT EXISTS idx_llm_debug_log_stage ON llm_debug_log(stage);
+        `);
+      }
      db.prepare(
        "UPDATE metadata SET value = ? WHERE key = 'schema_version'",
      ).run(String(CURRENT_SCHEMA_VERSION));
--- a/server/src/systems/inventionSystem.ts
+++ b/server/src/systems/inventionSystem.ts
@@ -12,6 +12,7 @@ import { validateInvention } from '../industry/inventionValidator.js';
 import { registerInvention } from '../industry/inventionRegistrar.js';
 import { industryConfig } from '../config/industryConfig.js';
 import { ENERGY_DECAY_PER_TICK, DAY_NIGHT_RATIO } from '@dflike/shared';
+import { logLlmDebug } from '../llm/llmDebugLog.js';

 export interface InventionSystem {
  update(world: World, tick: number): void;
@@ -126,12 +127,14 @@ export function createInventionSystem(
          const parsed = parseInventionResponse(response);
          if (!parsed) {
            console.log(`[Invention] parse failed for npc=${name} (${entityId}), response=${response.substring(0, 200)}`);
+            logLlmDebug({ templateName: 'invention', stage: 'parse_failed', response });
            return;
          }

          const validation = validateInvention(parsed, itemRegistry);
          if (!validation.valid) {
            console.log(`[Invention] validation failed for npc=${name} (${entityId}): ${validation.error} (suggested: "${parsed.name}" cat=${parsed.category} inputs=${JSON.stringify(parsed.inputs)})`);
+            logLlmDebug({ templateName: 'invention', stage: 'validation_failed', response, error: validation.error });
            return;
          }