diff --git a/server/.env.example b/server/.env.example index f528ace..fd18c1d 100644 --- a/server/.env.example +++ b/server/.env.example @@ -2,5 +2,9 @@ # LLM features are disabled when this is not set. OPENROUTER_API_KEY= -# Optional: override the default model -# LLM_MODEL=arcee-ai/trinity-large-preview:free +# Primary model (free tier) +LLM_MODEL=arcee-ai/trinity-large-preview:free + +# Fallback model — used automatically when primary hits rate limit (429) +# Costs per-token. Remove or leave empty to disable fallback. +# LLM_FALLBACK_MODEL=openai/gpt-oss-120b diff --git a/server/src/config/__tests__/llmConfig.test.ts b/server/src/config/__tests__/llmConfig.test.ts index d9b09a3..d41e249 100644 --- a/server/src/config/__tests__/llmConfig.test.ts +++ b/server/src/config/__tests__/llmConfig.test.ts @@ -13,12 +13,14 @@ describe('llmConfig', () => { }); it('returns default config values', () => { + delete process.env.LLM_MODEL; + delete process.env.LLM_FALLBACK_MODEL; const config = getLlmConfig(); - expect(config.model).toBe('arcee-ai/trinity-large-preview:free'); + expect(config.model).toBe(''); + expect(config.fallbackModel).toBeNull(); expect(config.maxTokens).toBe(200); expect(config.temperature).toBe(0.8); expect(config.requestsPerMinute).toBe(20); - expect(config.requestsPerDay).toBe(1000); expect(config.timeoutMs).toBe(15000); expect(config.enabled).toBe(false); }); @@ -43,4 +45,24 @@ describe('llmConfig', () => { const config = getLlmConfig(); expect(config.model).toBe('custom/model:free'); }); + + it('requires LLM_MODEL from environment (no hardcoded default)', () => { + delete process.env.LLM_MODEL; + const config = getLlmConfig(); + expect(config.model).toBe(''); + }); + + it('reads fallback model from environment', () => { + process.env.OPENROUTER_API_KEY = 'key'; + process.env.LLM_MODEL = 'free/model:free'; + process.env.LLM_FALLBACK_MODEL = 'openai/gpt-oss-120b'; + const config = getLlmConfig(); + expect(config.fallbackModel).toBe('openai/gpt-oss-120b'); + }); + + it('fallbackModel is null when not set', () => { + delete process.env.LLM_FALLBACK_MODEL; + const config = getLlmConfig(); + expect(config.fallbackModel).toBeNull(); + }); }); diff --git a/server/src/config/llmConfig.ts b/server/src/config/llmConfig.ts index d1a2d87..e60f271 100644 --- a/server/src/config/llmConfig.ts +++ b/server/src/config/llmConfig.ts @@ -1,10 +1,10 @@ export interface LlmConfig { apiKey: string; model: string; + fallbackModel: string | null; maxTokens: number; temperature: number; requestsPerMinute: number; - requestsPerDay: number; timeoutMs: number; enabled: boolean; } @@ -13,11 +13,11 @@ export function getLlmConfig(): LlmConfig { const apiKey = process.env.OPENROUTER_API_KEY ?? ''; return { apiKey, - model: process.env.LLM_MODEL ?? 'arcee-ai/trinity-large-preview:free', + model: process.env.LLM_MODEL ?? '', + fallbackModel: process.env.LLM_FALLBACK_MODEL ?? null, maxTokens: 200, temperature: 0.8, requestsPerMinute: 20, - requestsPerDay: 1000, timeoutMs: 15000, enabled: apiKey.length > 0, }; diff --git a/server/src/llm/__tests__/openRouterClient.test.ts b/server/src/llm/__tests__/openRouterClient.test.ts index 860864d..3518c9e 100644 --- a/server/src/llm/__tests__/openRouterClient.test.ts +++ b/server/src/llm/__tests__/openRouterClient.test.ts @@ -5,6 +5,7 @@ import type { LlmConfig } from '../../config/llmConfig.js'; const mockConfig: LlmConfig = { apiKey: 'test-key', model: 'arcee-ai/trinity-large-preview:free', + fallbackModel: null, maxTokens: 200, temperature: 0.8, requestsPerMinute: 60, diff --git a/server/src/llm/llmService.ts b/server/src/llm/llmService.ts index bd5eb5d..bb8dd92 100644 --- a/server/src/llm/llmService.ts +++ b/server/src/llm/llmService.ts @@ -26,7 +26,6 @@ export function createLlmService(): LlmService { const client = createOpenRouterClient(config); const queue = createGenerationQueue(client, { requestsPerMinute: config.requestsPerMinute, - requestsPerDay: config.requestsPerDay, }); return {