// ═══════════════════════════════════════════════════════════════════════════ // vector/llm/llm-service.js - 修复 prefill 传递方式 // ═══════════════════════════════════════════════════════════════════════════ import { xbLog } from '../../../../core/debug-core.js'; import { getVectorConfig } from '../../data/config.js'; const MODULE_ID = 'vector-llm-service'; const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B'; const DEFAULT_L0_API_URL = 'https://api.siliconflow.cn/v1'; let callCounter = 0; const activeL0SessionIds = new Set(); let l0KeyIndex = 0; function getStreamingModule() { const mod = window.xiaobaixStreamingGeneration; return mod?.xbgenrawCommand ? mod : null; } function generateUniqueId(prefix = 'llm') { callCounter = (callCounter + 1) % 100000; return `${prefix}-${callCounter}-${Date.now().toString(36)}`; } function b64UrlEncode(str) { const utf8 = new TextEncoder().encode(String(str)); let bin = ''; utf8.forEach(b => bin += String.fromCharCode(b)); return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, ''); } function getL0ApiConfig() { const cfg = getVectorConfig() || {}; return cfg.l0Api || { provider: 'siliconflow', url: DEFAULT_L0_API_URL, key: '', model: DEFAULT_L0_MODEL, }; } function normalizeL0ApiConfig(apiConfig = null) { const fallback = getL0ApiConfig(); const next = apiConfig || {}; return { provider: String(next.provider || fallback.provider || 'siliconflow').trim(), url: String(next.url || fallback.url || DEFAULT_L0_API_URL).trim(), key: String(next.key || fallback.key || '').trim(), model: String(next.model || fallback.model || DEFAULT_L0_MODEL).trim(), }; } function getNextKey(rawKey) { const keys = String(rawKey || '') .split(/[,;|\n]+/) .map(k => k.trim()) .filter(Boolean); if (!keys.length) return ''; if (keys.length === 1) return keys[0]; const idx = l0KeyIndex % keys.length; l0KeyIndex = (l0KeyIndex + 1) % keys.length; return keys[idx]; } /** * 统一LLM调用 - 走酒馆后端(非流式) * 临时改为标准 messages 调用,避免 bottomassistant prefill 兼容性问题。 */ export async function callLLM(messages, options = {}) { const { temperature = 0.2, max_tokens = 500, timeout = 40000, apiConfig = null, } = options; const mod = getStreamingModule(); if (!mod) throw new Error('Streaming module not ready'); const apiCfg = normalizeL0ApiConfig(apiConfig); const apiKey = getNextKey(apiCfg.key); if (!apiKey) { throw new Error('L0 requires siliconflow API key'); } const topMessages = [...messages].filter(msg => msg?.role !== 'assistant'); const top64 = b64UrlEncode(JSON.stringify(topMessages)); const uniqueId = generateUniqueId('l0'); const args = { as: 'user', nonstream: 'true', top64, id: uniqueId, temperature: String(temperature), max_tokens: String(max_tokens), api: 'openai', apiurl: String(apiCfg.url || DEFAULT_L0_API_URL).trim(), apipassword: apiKey, model: String(apiCfg.model || DEFAULT_L0_MODEL).trim(), }; const isQwen3 = String(args.model || '').includes('Qwen3'); if (isQwen3) { args.enable_thinking = 'false'; } try { activeL0SessionIds.add(uniqueId); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`L0 request timeout after ${timeout}ms`)), timeout); }); const result = await Promise.race([ mod.xbgenrawCommand(args, ''), timeoutPromise, ]); return String(result ?? ''); } catch (e) { xbLog.error(MODULE_ID, 'LLM调用失败', e); throw e; } finally { activeL0SessionIds.delete(uniqueId); } } export async function testL0Service(apiConfig = {}) { if (!apiConfig?.key) { throw new Error('请配置 L0 API Key'); } const result = await callLLM([ { role: 'system', content: '你是一个测试助手。请只输出 OK。' }, { role: 'user', content: '只输出 OK' }, ], { apiConfig, temperature: 0, max_tokens: 16, timeout: 15000, }); const text = String(result || '').trim(); if (!text) throw new Error('返回为空'); return { success: true, message: `连接成功:${text.slice(0, 60)}` }; } export function cancelAllL0Requests() { const mod = getStreamingModule(); if (!mod?.cancel) return; for (const sessionId of activeL0SessionIds) { try { mod.cancel(sessionId); } catch {} } activeL0SessionIds.clear(); } export function parseJson(text) { if (!text) return null; let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/i, '').trim(); try { return JSON.parse(s); } catch { } const i = s.indexOf('{'), j = s.lastIndexOf('}'); if (i !== -1 && j > i) try { return JSON.parse(s.slice(i, j + 1)); } catch { } return null; }