2.0变量 , 向量总结正式推送
This commit is contained in:
376
modules/story-summary/vector/llm/atom-extraction.js
Normal file
376
modules/story-summary/vector/llm/atom-extraction.js
Normal file
@@ -0,0 +1,376 @@
|
||||
// ============================================================================
|
||||
// atom-extraction.js - L0 场景锚点提取(v2 - 场景摘要 + 图结构)
|
||||
//
|
||||
// 设计依据:
|
||||
// - BGE-M3 (BAAI, 2024): 自然语言段落检索精度最高 → semantic = 纯自然语言
|
||||
// - TransE (Bordes, 2013): s/t/r 三元组方向性 → edges 格式
|
||||
//
|
||||
// 每楼层 1-2 个场景锚点(非碎片原子),60-100 字场景摘要
|
||||
// ============================================================================
|
||||
|
||||
import { callLLM, parseJson } from './llm-service.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
|
||||
const MODULE_ID = 'atom-extraction';
|
||||
|
||||
const CONCURRENCY = 10;
|
||||
const RETRY_COUNT = 2;
|
||||
const RETRY_DELAY = 500;
|
||||
const DEFAULT_TIMEOUT = 20000;
|
||||
const STAGGER_DELAY = 80;
|
||||
|
||||
let batchCancelled = false;
|
||||
|
||||
export function cancelBatchExtraction() {
|
||||
batchCancelled = true;
|
||||
}
|
||||
|
||||
export function isBatchCancelled() {
|
||||
return batchCancelled;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// L0 提取 Prompt
|
||||
// ============================================================================
|
||||
|
||||
const SYSTEM_PROMPT = `你是场景摘要器。从一轮对话中提取1-2个场景锚点,用于语义检索和关系追踪。
|
||||
|
||||
输入格式:
|
||||
<round>
|
||||
<user name="用户名">...</user>
|
||||
<assistant>...</assistant>
|
||||
</round>
|
||||
|
||||
只输出严格JSON:
|
||||
{"anchors":[
|
||||
{
|
||||
"scene": "60-100字完整场景描述",
|
||||
"edges": [{"s":"施事方","t":"受事方","r":"互动行为"}],
|
||||
"where": "地点"
|
||||
}
|
||||
]}
|
||||
|
||||
## scene 写法
|
||||
- 纯自然语言,像旁白或日记,不要任何标签/标记/枚举值
|
||||
- 必须包含:角色名、动作、情感氛围、关键细节
|
||||
- 读者只看 scene 就能复原这一幕
|
||||
- 60-100字,信息密集但流畅
|
||||
|
||||
## edges(关系三元组)
|
||||
- s=施事方 t=受事方 r=互动行为(建议 6-12 字,最多 20 字)
|
||||
- s/t 必须是参与互动的角色正式名称,不用代词或别称
|
||||
- 只从正文内容中识别角色名,不要把标签名(如 user、assistant)当作角色
|
||||
- r 使用动作模板短语:“动作+对象/结果”(例:“提出交易条件”、“拒绝对方请求”、“当众揭露秘密”、“安抚对方情绪”)
|
||||
- r 不要写人名,不要复述整句,不要写心理描写或评价词
|
||||
- r 正例(合格):提出交易条件、拒绝对方请求、当众揭露秘密、安抚对方情绪、强行打断发言、转移谈话焦点
|
||||
- r 反例(不合格):我觉得她现在很害怕、他突然非常生气地大喊起来、user开始说话、assistant解释了很多细节
|
||||
- 每个锚点 1-3 条
|
||||
|
||||
## where
|
||||
- 场景地点,无明确地点时空字符串
|
||||
|
||||
## 数量规则
|
||||
- 最多2个。1个够时不凑2个
|
||||
- 明显场景切换(地点/时间/对象变化)时才2个
|
||||
- 同一场景不拆分
|
||||
- 无角色互动时返回 {"anchors":[]}
|
||||
|
||||
## 示例
|
||||
输入:艾拉在火山口举起圣剑刺穿古龙心脏,龙血溅满她的铠甲,她跪倒在地痛哭
|
||||
输出:
|
||||
{"anchors":[{"scene":"火山口上艾拉举起圣剑刺穿古龙的心脏,龙血溅满铠甲,古龙轰然倒地,艾拉跪倒在滚烫的岩石上痛哭,完成了她不得不做的弑杀","edges":[{"s":"艾拉","t":"古龙","r":"以圣剑刺穿心脏"}],"where":"火山口"}]}`;
|
||||
|
||||
const JSON_PREFILL = '{"anchors":[';
|
||||
|
||||
// ============================================================================
|
||||
// 睡眠工具
|
||||
// ============================================================================
|
||||
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
const ACTION_STRIP_WORDS = [
|
||||
'突然', '非常', '有些', '有点', '轻轻', '悄悄', '缓缓', '立刻',
|
||||
'马上', '然后', '并且', '而且', '开始', '继续', '再次', '正在',
|
||||
];
|
||||
|
||||
function clamp(v, min, max) {
|
||||
return Math.max(min, Math.min(max, v));
|
||||
}
|
||||
|
||||
function sanitizeActionPhrase(raw) {
|
||||
let text = String(raw || '')
|
||||
.normalize('NFKC')
|
||||
.replace(/[\u200B-\u200D\uFEFF]/g, '')
|
||||
.trim();
|
||||
if (!text) return '';
|
||||
|
||||
text = text
|
||||
.replace(/[,。!?、;:,.!?;:"'“”‘’()()[\]{}<>《》]/g, '')
|
||||
.replace(/\s+/g, '');
|
||||
|
||||
for (const word of ACTION_STRIP_WORDS) {
|
||||
text = text.replaceAll(word, '');
|
||||
}
|
||||
|
||||
text = text.replace(/(地|得|了|着|过)+$/g, '');
|
||||
|
||||
if (text.length < 2) return '';
|
||||
if (text.length > 12) text = text.slice(0, 12);
|
||||
return text;
|
||||
}
|
||||
|
||||
function calcAtomQuality(scene, edges, where) {
|
||||
const sceneLen = String(scene || '').length;
|
||||
const sceneScore = clamp(sceneLen / 80, 0, 1);
|
||||
const edgeScore = clamp((edges?.length || 0) / 3, 0, 1);
|
||||
const whereScore = where ? 1 : 0;
|
||||
const quality = 0.55 * sceneScore + 0.35 * edgeScore + 0.10 * whereScore;
|
||||
return Number(quality.toFixed(3));
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 清洗与构建
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* 清洗 edges 三元组
|
||||
* @param {object[]} raw
|
||||
* @returns {object[]}
|
||||
*/
|
||||
function sanitizeEdges(raw) {
|
||||
if (!Array.isArray(raw)) return [];
|
||||
return raw
|
||||
.filter(e => e && typeof e === 'object')
|
||||
.map(e => ({
|
||||
s: String(e.s || '').trim(),
|
||||
t: String(e.t || '').trim(),
|
||||
r: sanitizeActionPhrase(e.r),
|
||||
}))
|
||||
.filter(e => e.s && e.t && e.r)
|
||||
.slice(0, 3);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将解析后的 anchor 转换为 atom 存储对象
|
||||
*
|
||||
* semantic = scene(纯自然语言,直接用于 embedding)
|
||||
*
|
||||
* @param {object} anchor - LLM 输出的 anchor 对象
|
||||
* @param {number} aiFloor - AI 消息楼层号
|
||||
* @param {number} idx - 同楼层序号(0 或 1)
|
||||
* @returns {object|null} atom 对象
|
||||
*/
|
||||
function anchorToAtom(anchor, aiFloor, idx) {
|
||||
const scene = String(anchor.scene || '').trim();
|
||||
if (!scene) return null;
|
||||
|
||||
// scene 过短(< 15 字)可能是噪音
|
||||
if (scene.length < 15) return null;
|
||||
const edges = sanitizeEdges(anchor.edges);
|
||||
const where = String(anchor.where || '').trim();
|
||||
const quality = calcAtomQuality(scene, edges, where);
|
||||
|
||||
return {
|
||||
atomId: `atom-${aiFloor}-${idx}`,
|
||||
floor: aiFloor,
|
||||
source: 'ai',
|
||||
|
||||
// ═══ 检索层(embedding 的唯一入口) ═══
|
||||
semantic: scene,
|
||||
|
||||
// ═══ 图结构层(扩散的 key) ═══
|
||||
edges,
|
||||
where,
|
||||
quality,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 单轮提取(带重试)
|
||||
// ============================================================================
|
||||
|
||||
async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options = {}) {
|
||||
const { timeout = DEFAULT_TIMEOUT } = options;
|
||||
|
||||
if (!aiMessage?.mes?.trim()) return [];
|
||||
|
||||
const parts = [];
|
||||
const userName = userMessage?.name || '用户';
|
||||
|
||||
if (userMessage?.mes?.trim()) {
|
||||
const userText = filterText(userMessage.mes);
|
||||
parts.push(`<user name="${userName}">\n${userText}\n</user>`);
|
||||
}
|
||||
|
||||
const aiText = filterText(aiMessage.mes);
|
||||
parts.push(`<assistant>\n${aiText}\n</assistant>`);
|
||||
|
||||
const input = `<round>\n${parts.join('\n')}\n</round>`;
|
||||
|
||||
for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
|
||||
if (batchCancelled) return [];
|
||||
|
||||
try {
|
||||
const response = await callLLM([
|
||||
{ role: 'system', content: SYSTEM_PROMPT },
|
||||
{ role: 'user', content: input },
|
||||
{ role: 'assistant', content: JSON_PREFILL },
|
||||
], {
|
||||
temperature: 0.3,
|
||||
max_tokens: 600,
|
||||
timeout,
|
||||
});
|
||||
|
||||
const rawText = String(response || '');
|
||||
if (!rawText.trim()) {
|
||||
if (attempt < RETRY_COUNT) {
|
||||
await sleep(RETRY_DELAY);
|
||||
continue;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const fullJson = JSON_PREFILL + rawText;
|
||||
|
||||
let parsed;
|
||||
try {
|
||||
parsed = parseJson(fullJson);
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败 (attempt ${attempt})`);
|
||||
if (attempt < RETRY_COUNT) {
|
||||
await sleep(RETRY_DELAY);
|
||||
continue;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// 兼容:优先 anchors,回退 atoms
|
||||
const rawAnchors = parsed?.anchors;
|
||||
if (!rawAnchors || !Array.isArray(rawAnchors)) {
|
||||
if (attempt < RETRY_COUNT) {
|
||||
await sleep(RETRY_DELAY);
|
||||
continue;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// 转换为 atom 存储格式(最多 2 个)
|
||||
const atoms = rawAnchors
|
||||
.slice(0, 2)
|
||||
.map((a, idx) => anchorToAtom(a, aiFloor, idx))
|
||||
.filter(Boolean);
|
||||
|
||||
return atoms;
|
||||
|
||||
} catch (e) {
|
||||
if (batchCancelled) return null;
|
||||
|
||||
if (attempt < RETRY_COUNT) {
|
||||
await sleep(RETRY_DELAY * (attempt + 1));
|
||||
continue;
|
||||
}
|
||||
xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function extractAtomsForRound(userMessage, aiMessage, aiFloor, options = {}) {
|
||||
return extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 批量提取
|
||||
// ============================================================================
|
||||
|
||||
export async function batchExtractAtoms(chat, onProgress) {
|
||||
if (!chat?.length) return [];
|
||||
|
||||
batchCancelled = false;
|
||||
|
||||
const pairs = [];
|
||||
for (let i = 0; i < chat.length; i++) {
|
||||
if (!chat[i].is_user) {
|
||||
const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
|
||||
pairs.push({ userMsg, aiMsg: chat[i], aiFloor: i });
|
||||
}
|
||||
}
|
||||
|
||||
if (!pairs.length) return [];
|
||||
|
||||
const allAtoms = [];
|
||||
let completed = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (let i = 0; i < pairs.length; i += CONCURRENCY) {
|
||||
if (batchCancelled) break;
|
||||
|
||||
const batch = pairs.slice(i, i + CONCURRENCY);
|
||||
|
||||
if (i === 0) {
|
||||
const promises = batch.map((pair, idx) => (async () => {
|
||||
await sleep(idx * STAGGER_DELAY);
|
||||
|
||||
if (batchCancelled) return;
|
||||
|
||||
try {
|
||||
const atoms = await extractAtomsForRoundWithRetry(
|
||||
pair.userMsg,
|
||||
pair.aiMsg,
|
||||
pair.aiFloor,
|
||||
{ timeout: DEFAULT_TIMEOUT }
|
||||
);
|
||||
if (atoms?.length) {
|
||||
allAtoms.push(...atoms);
|
||||
} else if (atoms === null) {
|
||||
failed++;
|
||||
}
|
||||
} catch {
|
||||
failed++;
|
||||
}
|
||||
completed++;
|
||||
onProgress?.(completed, pairs.length, failed);
|
||||
})());
|
||||
await Promise.all(promises);
|
||||
} else {
|
||||
const promises = batch.map(pair =>
|
||||
extractAtomsForRoundWithRetry(
|
||||
pair.userMsg,
|
||||
pair.aiMsg,
|
||||
pair.aiFloor,
|
||||
{ timeout: DEFAULT_TIMEOUT }
|
||||
)
|
||||
.then(atoms => {
|
||||
if (batchCancelled) return;
|
||||
if (atoms?.length) {
|
||||
allAtoms.push(...atoms);
|
||||
} else if (atoms === null) {
|
||||
failed++;
|
||||
}
|
||||
completed++;
|
||||
onProgress?.(completed, pairs.length, failed);
|
||||
})
|
||||
.catch(() => {
|
||||
if (batchCancelled) return;
|
||||
failed++;
|
||||
completed++;
|
||||
onProgress?.(completed, pairs.length, failed);
|
||||
})
|
||||
);
|
||||
|
||||
await Promise.all(promises);
|
||||
}
|
||||
|
||||
if (i + CONCURRENCY < pairs.length && !batchCancelled) {
|
||||
await sleep(30);
|
||||
}
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `批量提取完成: ${allAtoms.length} atoms, ${failed} 失败`);
|
||||
|
||||
return allAtoms;
|
||||
}
|
||||
|
||||
99
modules/story-summary/vector/llm/llm-service.js
Normal file
99
modules/story-summary/vector/llm/llm-service.js
Normal file
@@ -0,0 +1,99 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// vector/llm/llm-service.js - 修复 prefill 传递方式
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
|
||||
const MODULE_ID = 'vector-llm-service';
|
||||
const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1';
|
||||
const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
|
||||
|
||||
let callCounter = 0;
|
||||
|
||||
function getStreamingModule() {
|
||||
const mod = window.xiaobaixStreamingGeneration;
|
||||
return mod?.xbgenrawCommand ? mod : null;
|
||||
}
|
||||
|
||||
function generateUniqueId(prefix = 'llm') {
|
||||
callCounter = (callCounter + 1) % 100000;
|
||||
return `${prefix}-${callCounter}-${Date.now().toString(36)}`;
|
||||
}
|
||||
|
||||
function b64UrlEncode(str) {
|
||||
const utf8 = new TextEncoder().encode(String(str));
|
||||
let bin = '';
|
||||
utf8.forEach(b => bin += String.fromCharCode(b));
|
||||
return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* 统一LLM调用 - 走酒馆后端(非流式)
|
||||
* assistant prefill 用 bottomassistant 参数传递
|
||||
*/
|
||||
export async function callLLM(messages, options = {}) {
|
||||
const {
|
||||
temperature = 0.2,
|
||||
max_tokens = 500,
|
||||
} = options;
|
||||
|
||||
const mod = getStreamingModule();
|
||||
if (!mod) throw new Error('Streaming module not ready');
|
||||
|
||||
const cfg = getVectorConfig();
|
||||
const apiKey = cfg?.online?.key || '';
|
||||
if (!apiKey) {
|
||||
throw new Error('L0 requires siliconflow API key');
|
||||
}
|
||||
|
||||
// 分离 assistant prefill
|
||||
let topMessages = [...messages];
|
||||
let assistantPrefill = '';
|
||||
|
||||
if (topMessages.length > 0 && topMessages[topMessages.length - 1]?.role === 'assistant') {
|
||||
const lastMsg = topMessages.pop();
|
||||
assistantPrefill = lastMsg.content || '';
|
||||
}
|
||||
|
||||
const top64 = b64UrlEncode(JSON.stringify(topMessages));
|
||||
const uniqueId = generateUniqueId('l0');
|
||||
|
||||
const args = {
|
||||
as: 'user',
|
||||
nonstream: 'true',
|
||||
top64,
|
||||
id: uniqueId,
|
||||
temperature: String(temperature),
|
||||
max_tokens: String(max_tokens),
|
||||
api: 'openai',
|
||||
apiurl: SILICONFLOW_API_URL,
|
||||
apipassword: apiKey,
|
||||
model: DEFAULT_L0_MODEL,
|
||||
};
|
||||
const isQwen3 = String(DEFAULT_L0_MODEL || '').includes('Qwen3');
|
||||
if (isQwen3) {
|
||||
args.enable_thinking = 'false';
|
||||
}
|
||||
|
||||
// ★ 用 bottomassistant 参数传递 prefill
|
||||
if (assistantPrefill) {
|
||||
args.bottomassistant = assistantPrefill;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await mod.xbgenrawCommand(args, '');
|
||||
return String(result ?? '');
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, 'LLM调用失败', e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
export function parseJson(text) {
|
||||
if (!text) return null;
|
||||
let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/i, '').trim();
|
||||
try { return JSON.parse(s); } catch { }
|
||||
const i = s.indexOf('{'), j = s.lastIndexOf('}');
|
||||
if (i !== -1 && j > i) try { return JSON.parse(s.slice(i, j + 1)); } catch { }
|
||||
return null;
|
||||
}
|
||||
266
modules/story-summary/vector/llm/reranker.js
Normal file
266
modules/story-summary/vector/llm/reranker.js
Normal file
@@ -0,0 +1,266 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Reranker - 硅基 bge-reranker-v2-m3
|
||||
// 对候选文档进行精排,过滤与 query 不相关的内容
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { getApiKey } from './siliconflow.js';
|
||||
|
||||
const MODULE_ID = 'reranker';
|
||||
const RERANK_URL = 'https://api.siliconflow.cn/v1/rerank';
|
||||
const RERANK_MODEL = 'BAAI/bge-reranker-v2-m3';
|
||||
const DEFAULT_TIMEOUT = 15000;
|
||||
const MAX_DOCUMENTS = 100; // API 限制
|
||||
const RERANK_BATCH_SIZE = 20;
|
||||
const RERANK_MAX_CONCURRENCY = 5;
|
||||
|
||||
/**
|
||||
* 对文档列表进行 Rerank 精排
|
||||
*
|
||||
* @param {string} query - 查询文本
|
||||
* @param {Array<string>} documents - 文档文本列表
|
||||
* @param {object} options - 选项
|
||||
* @param {number} options.topN - 返回前 N 个结果,默认 40
|
||||
* @param {number} options.timeout - 超时时间,默认 15000ms
|
||||
* @param {AbortSignal} options.signal - 取消信号
|
||||
* @returns {Promise<Array<{index: number, relevance_score: number}>>} 排序后的结果
|
||||
*/
|
||||
export async function rerank(query, documents, options = {}) {
|
||||
const { topN = 40, timeout = DEFAULT_TIMEOUT, signal } = options;
|
||||
|
||||
if (!query?.trim()) {
|
||||
xbLog.warn(MODULE_ID, 'query 为空,跳过 rerank');
|
||||
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
|
||||
}
|
||||
|
||||
if (!documents?.length) {
|
||||
return { results: [], failed: false };
|
||||
}
|
||||
|
||||
const key = getApiKey();
|
||||
if (!key) {
|
||||
xbLog.warn(MODULE_ID, '未配置 API Key,跳过 rerank');
|
||||
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
|
||||
}
|
||||
|
||||
// 截断超长文档列表
|
||||
const truncatedDocs = documents.slice(0, MAX_DOCUMENTS);
|
||||
if (documents.length > MAX_DOCUMENTS) {
|
||||
xbLog.warn(MODULE_ID, `文档数 ${documents.length} 超过限制 ${MAX_DOCUMENTS},已截断`);
|
||||
}
|
||||
|
||||
// 过滤空文档,记录原始索引
|
||||
const validDocs = [];
|
||||
const indexMap = []; // validDocs index → original index
|
||||
|
||||
for (let i = 0; i < truncatedDocs.length; i++) {
|
||||
const text = String(truncatedDocs[i] || '').trim();
|
||||
if (text) {
|
||||
validDocs.push(text);
|
||||
indexMap.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (!validDocs.length) {
|
||||
xbLog.warn(MODULE_ID, '无有效文档,跳过 rerank');
|
||||
return { results: [], failed: false };
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
try {
|
||||
const T0 = performance.now();
|
||||
|
||||
const response = await fetch(RERANK_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: RERANK_MODEL,
|
||||
// Zero-darkbox: do not silently truncate query.
|
||||
query,
|
||||
documents: validDocs,
|
||||
top_n: Math.min(topN, validDocs.length),
|
||||
return_documents: false,
|
||||
}),
|
||||
signal: signal || controller.signal,
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text().catch(() => '');
|
||||
throw new Error(`Rerank API ${response.status}: ${errorText.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const results = data.results || [];
|
||||
|
||||
// 映射回原始索引
|
||||
const mapped = results.map(r => ({
|
||||
index: indexMap[r.index],
|
||||
relevance_score: r.relevance_score ?? 0,
|
||||
}));
|
||||
|
||||
const elapsed = Math.round(performance.now() - T0);
|
||||
xbLog.info(MODULE_ID, `Rerank 完成: ${validDocs.length} docs → ${results.length} selected (${elapsed}ms)`);
|
||||
|
||||
return { results: mapped, failed: false };
|
||||
|
||||
} catch (e) {
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (e?.name === 'AbortError') {
|
||||
xbLog.warn(MODULE_ID, 'Rerank 超时或取消');
|
||||
} else {
|
||||
xbLog.error(MODULE_ID, 'Rerank 失败', e);
|
||||
}
|
||||
|
||||
// 降级:返回原顺序,分数均匀分布
|
||||
return {
|
||||
results: documents.slice(0, topN).map((_, i) => ({
|
||||
index: i,
|
||||
relevance_score: 0,
|
||||
})),
|
||||
failed: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 对 chunk 对象列表进行 Rerank
|
||||
*
|
||||
* @param {string} query - 查询文本
|
||||
* @param {Array<object>} chunks - chunk 对象列表,需要有 text 字段
|
||||
* @param {object} options - 选项
|
||||
* @returns {Promise<Array<object>>} 排序后的 chunk 列表,带 _rerankScore 字段
|
||||
*/
|
||||
export async function rerankChunks(query, chunks, options = {}) {
|
||||
const { topN = 40, minScore = 0.1 } = options;
|
||||
|
||||
if (!chunks?.length) return [];
|
||||
|
||||
const texts = chunks.map(c => c.text || c.semantic || '');
|
||||
|
||||
// ─── 单批:直接调用 ───
|
||||
if (texts.length <= RERANK_BATCH_SIZE) {
|
||||
const { results, failed } = await rerank(query, texts, {
|
||||
topN: Math.min(topN, texts.length),
|
||||
timeout: options.timeout,
|
||||
signal: options.signal,
|
||||
});
|
||||
|
||||
if (failed) {
|
||||
return chunks.map(c => ({ ...c, _rerankScore: 0, _rerankFailed: true }));
|
||||
}
|
||||
|
||||
return results
|
||||
.filter(r => r.relevance_score >= minScore)
|
||||
.sort((a, b) => b.relevance_score - a.relevance_score)
|
||||
.slice(0, topN)
|
||||
.map(r => ({
|
||||
...chunks[r.index],
|
||||
_rerankScore: r.relevance_score,
|
||||
}));
|
||||
}
|
||||
|
||||
// ─── 多批:拆分 → 并发 → 合并 ───
|
||||
const batches = [];
|
||||
for (let i = 0; i < texts.length; i += RERANK_BATCH_SIZE) {
|
||||
batches.push({
|
||||
texts: texts.slice(i, i + RERANK_BATCH_SIZE),
|
||||
offset: i,
|
||||
});
|
||||
}
|
||||
|
||||
const concurrency = Math.min(batches.length, RERANK_MAX_CONCURRENCY);
|
||||
xbLog.info(MODULE_ID, `并发 Rerank: ${batches.length} 批 × ≤${RERANK_BATCH_SIZE} docs, concurrency=${concurrency}`);
|
||||
|
||||
const batchResults = new Array(batches.length);
|
||||
let failedBatches = 0;
|
||||
|
||||
const runBatch = async (batchIdx) => {
|
||||
const batch = batches[batchIdx];
|
||||
const { results, failed } = await rerank(query, batch.texts, {
|
||||
topN: batch.texts.length,
|
||||
timeout: options.timeout,
|
||||
signal: options.signal,
|
||||
});
|
||||
|
||||
if (failed) {
|
||||
failedBatches++;
|
||||
// 单批降级:保留原始顺序,score=0
|
||||
batchResults[batchIdx] = batch.texts.map((_, i) => ({
|
||||
globalIndex: batch.offset + i,
|
||||
relevance_score: 0,
|
||||
_batchFailed: true,
|
||||
}));
|
||||
} else {
|
||||
batchResults[batchIdx] = results.map(r => ({
|
||||
globalIndex: batch.offset + r.index,
|
||||
relevance_score: r.relevance_score,
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
// 并发池
|
||||
let nextIdx = 0;
|
||||
const worker = async () => {
|
||||
while (nextIdx < batches.length) {
|
||||
const idx = nextIdx++;
|
||||
await runBatch(idx);
|
||||
}
|
||||
};
|
||||
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
||||
|
||||
// 全部失败 → 整体降级
|
||||
if (failedBatches === batches.length) {
|
||||
xbLog.warn(MODULE_ID, `全部 ${batches.length} 批 rerank 失败,整体降级`);
|
||||
return chunks.slice(0, topN).map(c => ({
|
||||
...c,
|
||||
_rerankScore: 0,
|
||||
_rerankFailed: true,
|
||||
}));
|
||||
}
|
||||
|
||||
// 合并所有批次结果
|
||||
const merged = batchResults.flat();
|
||||
|
||||
const selected = merged
|
||||
.filter(r => r._batchFailed || r.relevance_score >= minScore)
|
||||
.sort((a, b) => b.relevance_score - a.relevance_score)
|
||||
.slice(0, topN)
|
||||
.map(r => ({
|
||||
...chunks[r.globalIndex],
|
||||
_rerankScore: r.relevance_score,
|
||||
...(r._batchFailed ? { _rerankFailed: true } : {}),
|
||||
}));
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`Rerank 合并: ${merged.length} candidates, ${failedBatches}/${batches.length} 批失败, 选中 ${selected.length}`
|
||||
);
|
||||
|
||||
return selected;
|
||||
}
|
||||
/**
|
||||
* 测试 Rerank 服务连接
|
||||
*/
|
||||
export async function testRerankService() {
|
||||
const key = getApiKey();
|
||||
if (!key) {
|
||||
throw new Error('请配置硅基 API Key');
|
||||
}
|
||||
|
||||
try {
|
||||
const { results } = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 });
|
||||
return {
|
||||
success: true,
|
||||
message: `连接成功,返回 ${results.length} 个结果`,
|
||||
};
|
||||
} catch (e) {
|
||||
throw new Error(`连接失败: ${e.message}`);
|
||||
}
|
||||
}
|
||||
59
modules/story-summary/vector/llm/siliconflow.js
Normal file
59
modules/story-summary/vector/llm/siliconflow.js
Normal file
@@ -0,0 +1,59 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// siliconflow.js - 仅保留 Embedding
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
const BASE_URL = 'https://api.siliconflow.cn';
|
||||
const EMBEDDING_MODEL = 'BAAI/bge-m3';
|
||||
|
||||
export function getApiKey() {
|
||||
try {
|
||||
const raw = localStorage.getItem('summary_panel_config');
|
||||
if (raw) {
|
||||
const parsed = JSON.parse(raw);
|
||||
return parsed.vector?.online?.key || null;
|
||||
}
|
||||
} catch { }
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function embed(texts, options = {}) {
|
||||
if (!texts?.length) return [];
|
||||
|
||||
const key = getApiKey();
|
||||
if (!key) throw new Error('未配置硅基 API Key');
|
||||
|
||||
const { timeout = 30000, signal } = options;
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${BASE_URL}/v1/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: EMBEDDING_MODEL,
|
||||
input: texts,
|
||||
}),
|
||||
signal: signal || controller.signal,
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text().catch(() => '');
|
||||
throw new Error(`Embedding ${response.status}: ${errorText.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return (data.data || [])
|
||||
.sort((a, b) => a.index - b.index)
|
||||
.map(item => Array.isArray(item.embedding) ? item.embedding : Array.from(item.embedding));
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
export { EMBEDDING_MODEL as MODELS };
|
||||
Reference in New Issue
Block a user