2026-02-08 12:22:45 +08:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// atom-extraction.js - L0 叙事锚点提取(三层 themes 版)
|
2026-02-06 11:22:02 +08:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
import { callLLM, parseJson } from './llm-service.js';
|
|
|
|
|
|
import { xbLog } from '../../../../core/debug-core.js';
|
|
|
|
|
|
import { filterText } from '../utils/text-filter.js';
|
|
|
|
|
|
|
|
|
|
|
|
const MODULE_ID = 'atom-extraction';
|
|
|
|
|
|
|
|
|
|
|
|
const CONCURRENCY = 10;
|
|
|
|
|
|
const RETRY_COUNT = 2;
|
|
|
|
|
|
const RETRY_DELAY = 500;
|
|
|
|
|
|
const DEFAULT_TIMEOUT = 20000;
|
2026-02-08 12:22:45 +08:00
|
|
|
|
const STAGGER_DELAY = 80;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
let batchCancelled = false;
|
|
|
|
|
|
|
|
|
|
|
|
export function cancelBatchExtraction() {
|
|
|
|
|
|
batchCancelled = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export function isBatchCancelled() {
|
|
|
|
|
|
return batchCancelled;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-08 12:22:45 +08:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// L0 提取 Prompt(三层 themes)
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话中提取4-8个关键锚点,用于后续语义检索。
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
2026-02-06 15:08:20 +08:00
|
|
|
|
输入格式:
|
|
|
|
|
|
<round>
|
2026-02-08 12:22:45 +08:00
|
|
|
|
<user name="用户名">...</user>
|
|
|
|
|
|
<assistant name="角色名">...</assistant>
|
2026-02-06 15:08:20 +08:00
|
|
|
|
</round>
|
|
|
|
|
|
|
2026-02-08 12:22:45 +08:00
|
|
|
|
只输出严格JSON:
|
|
|
|
|
|
{"atoms":[{"t":"类型","s":"主体","o":"客体","v":"谓词","l":"地点","f":"来源","th":{"fn":[],"pt":[],"kw":[]}}]}
|
|
|
|
|
|
|
|
|
|
|
|
## 类型(t)
|
|
|
|
|
|
- emo: 情绪状态变化
|
|
|
|
|
|
- act: 关键动作/行为
|
|
|
|
|
|
- rev: 揭示/发现/真相
|
|
|
|
|
|
- dec: 决定/承诺/宣言
|
|
|
|
|
|
- ten: 冲突/张力/对立
|
|
|
|
|
|
- loc: 场景/地点变化
|
|
|
|
|
|
|
|
|
|
|
|
## 字段说明
|
|
|
|
|
|
- s: 主体(必填)
|
|
|
|
|
|
- o: 客体(可空)
|
|
|
|
|
|
- v: 谓词,15字内(必填)
|
|
|
|
|
|
- l: 地点(可空)
|
|
|
|
|
|
- f: "u"=用户 / "a"=角色(必填)
|
|
|
|
|
|
- th: 主题标签(必填,结构化对象)
|
|
|
|
|
|
|
|
|
|
|
|
## th 三层结构
|
|
|
|
|
|
fn(叙事功能)1-2个,枚举:
|
|
|
|
|
|
establish=建立设定 | escalate=升级加剧 | reveal=揭示发现 | challenge=挑战试探
|
|
|
|
|
|
commit=承诺锁定 | conflict=冲突对抗 | resolve=解决收束 | transform=转变逆转
|
|
|
|
|
|
bond=连接羁绊 | break=断裂破坏
|
|
|
|
|
|
|
|
|
|
|
|
pt(互动模式)1-3个,枚举:
|
|
|
|
|
|
power_down=上对下 | power_up=下对上 | power_equal=对等 | power_contest=争夺
|
|
|
|
|
|
asymmetric=信息不对称 | witnessed=有观众 | secluded=隔绝私密
|
|
|
|
|
|
ritual=仪式正式 | routine=日常惯例 | triangular=三方介入
|
|
|
|
|
|
|
|
|
|
|
|
kw(具体关键词)1-3个,自由格式
|
|
|
|
|
|
|
|
|
|
|
|
## 示例输出
|
|
|
|
|
|
{"atoms":[
|
|
|
|
|
|
{"t":"act","s":"艾拉","o":"古龙","v":"用圣剑刺穿心脏","l":"火山口","f":"a",
|
|
|
|
|
|
"th":{"fn":["commit"],"pt":["power_down","ritual"],"kw":["战斗","牺牲"]}},
|
|
|
|
|
|
{"t":"emo","s":"林夏","o":"陆远","v":"意识到自己喜欢他","l":"","f":"a",
|
|
|
|
|
|
"th":{"fn":["reveal","escalate"],"pt":["asymmetric","secluded"],"kw":["心动","暗恋"]}},
|
|
|
|
|
|
{"t":"dec","s":"凯尔","o":"王国","v":"放弃王位继承权","l":"王座厅","f":"a",
|
|
|
|
|
|
"th":{"fn":["commit","break"],"pt":["ritual","witnessed"],"kw":["抉择","自由"]}},
|
|
|
|
|
|
{"t":"rev","s":"","o":"","v":"管家其实是间谍","l":"","f":"a",
|
|
|
|
|
|
"th":{"fn":["reveal"],"pt":["asymmetric"],"kw":["背叛","真相"]}},
|
|
|
|
|
|
{"t":"ten","s":"兄弟二人","o":"","v":"为遗产反目","l":"","f":"a",
|
|
|
|
|
|
"th":{"fn":["conflict","break"],"pt":["power_contest"],"kw":["冲突","亲情破裂"]}}
|
|
|
|
|
|
]}
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
规则:
|
|
|
|
|
|
- 只提取对未来检索有价值的锚点
|
2026-02-08 12:22:45 +08:00
|
|
|
|
- fn 回答"这在故事里推动了什么"
|
|
|
|
|
|
- pt 回答"这是什么结构的互动"
|
|
|
|
|
|
- kw 用于细粒度检索
|
|
|
|
|
|
- 无明显锚点时返回 {"atoms":[]}`;
|
|
|
|
|
|
|
|
|
|
|
|
const JSON_PREFILL = '{"atoms":[';
|
|
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Semantic 构建
|
|
|
|
|
|
// ============================================================================
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
function buildSemantic(atom, userName, aiName) {
|
2026-02-08 12:22:45 +08:00
|
|
|
|
const type = atom.t || 'act';
|
|
|
|
|
|
const subject = atom.s || (atom.f === 'u' ? userName : aiName);
|
|
|
|
|
|
const object = atom.o || '';
|
|
|
|
|
|
const verb = atom.v || '';
|
|
|
|
|
|
const location = atom.l || '';
|
|
|
|
|
|
|
|
|
|
|
|
// 三层 themes 合并
|
|
|
|
|
|
const th = atom.th || {};
|
|
|
|
|
|
const tags = [
|
|
|
|
|
|
...(Array.isArray(th.fn) ? th.fn : []),
|
|
|
|
|
|
...(Array.isArray(th.pt) ? th.pt : []),
|
|
|
|
|
|
...(Array.isArray(th.kw) ? th.kw : []),
|
|
|
|
|
|
].filter(Boolean);
|
|
|
|
|
|
|
|
|
|
|
|
const typePart = `<${type}>`;
|
|
|
|
|
|
const themePart = tags.length > 0 ? ` [${tags.join('/')}]` : '';
|
|
|
|
|
|
const locPart = location ? ` 在${location}` : '';
|
|
|
|
|
|
const objPart = object ? ` -> ${object}` : '';
|
|
|
|
|
|
|
|
|
|
|
|
let semantic = '';
|
|
|
|
|
|
switch (type) {
|
|
|
|
|
|
case 'emo':
|
|
|
|
|
|
semantic = object
|
|
|
|
|
|
? `${typePart} ${subject} -> ${verb} (对${object})${locPart}`
|
|
|
|
|
|
: `${typePart} ${subject} -> ${verb}${locPart}`;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'act':
|
|
|
|
|
|
semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'rev':
|
|
|
|
|
|
semantic = object
|
|
|
|
|
|
? `${typePart} 揭示: ${verb} (关于${object})${locPart}`
|
|
|
|
|
|
: `${typePart} 揭示: ${verb}${locPart}`;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'dec':
|
|
|
|
|
|
semantic = object
|
|
|
|
|
|
? `${typePart} ${subject} -> ${verb} (对${object})${locPart}`
|
|
|
|
|
|
: `${typePart} ${subject} -> ${verb}${locPart}`;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'ten':
|
|
|
|
|
|
semantic = object
|
|
|
|
|
|
? `${typePart} ${subject} <-> ${object}: ${verb}${locPart}`
|
|
|
|
|
|
: `${typePart} ${subject}: ${verb}${locPart}`;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'loc':
|
|
|
|
|
|
semantic = location
|
|
|
|
|
|
? `${typePart} 场景: ${location} - ${verb}`
|
|
|
|
|
|
: `${typePart} 场景: ${verb}`;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
2026-02-08 12:22:45 +08:00
|
|
|
|
|
|
|
|
|
|
return semantic + themePart;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-08 12:22:45 +08:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// 睡眠工具
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
2026-02-06 11:22:02 +08:00
|
|
|
|
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
|
|
|
|
|
|
2026-02-08 12:22:45 +08:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// 单轮提取(带重试)
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
2026-02-06 11:22:02 +08:00
|
|
|
|
async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options = {}) {
|
|
|
|
|
|
const { timeout = DEFAULT_TIMEOUT } = options;
|
|
|
|
|
|
|
|
|
|
|
|
if (!aiMessage?.mes?.trim()) return [];
|
|
|
|
|
|
|
|
|
|
|
|
const parts = [];
|
|
|
|
|
|
const userName = userMessage?.name || '用户';
|
|
|
|
|
|
const aiName = aiMessage.name || '角色';
|
|
|
|
|
|
|
|
|
|
|
|
if (userMessage?.mes?.trim()) {
|
|
|
|
|
|
const userText = filterText(userMessage.mes);
|
2026-02-06 15:08:20 +08:00
|
|
|
|
parts.push(`<user name="${userName}">\n${userText}\n</user>`);
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const aiText = filterText(aiMessage.mes);
|
2026-02-06 15:08:20 +08:00
|
|
|
|
parts.push(`<assistant name="${aiName}">\n${aiText}\n</assistant>`);
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
2026-02-06 15:08:20 +08:00
|
|
|
|
const input = `<round>\n${parts.join('\n')}\n</round>`;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
|
|
|
|
|
|
if (batchCancelled) return [];
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
const response = await callLLM([
|
|
|
|
|
|
{ role: 'system', content: SYSTEM_PROMPT },
|
|
|
|
|
|
{ role: 'user', content: input },
|
2026-02-08 12:22:45 +08:00
|
|
|
|
{ role: 'assistant', content: JSON_PREFILL },
|
2026-02-06 11:22:02 +08:00
|
|
|
|
], {
|
|
|
|
|
|
temperature: 0.2,
|
2026-02-08 12:22:45 +08:00
|
|
|
|
max_tokens: 1000,
|
2026-02-06 11:22:02 +08:00
|
|
|
|
timeout,
|
|
|
|
|
|
});
|
|
|
|
|
|
|
2026-02-06 15:08:20 +08:00
|
|
|
|
const rawText = String(response || '');
|
|
|
|
|
|
if (!rawText.trim()) {
|
2026-02-06 11:22:02 +08:00
|
|
|
|
if (attempt < RETRY_COUNT) {
|
|
|
|
|
|
await sleep(RETRY_DELAY);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
2026-02-06 15:08:20 +08:00
|
|
|
|
return null;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-08 12:22:45 +08:00
|
|
|
|
const fullJson = JSON_PREFILL + rawText;
|
|
|
|
|
|
|
2026-02-06 11:22:02 +08:00
|
|
|
|
let parsed;
|
|
|
|
|
|
try {
|
2026-02-08 12:22:45 +08:00
|
|
|
|
parsed = parseJson(fullJson);
|
2026-02-06 11:22:02 +08:00
|
|
|
|
} catch (e) {
|
2026-02-08 12:22:45 +08:00
|
|
|
|
xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败`);
|
2026-02-06 11:22:02 +08:00
|
|
|
|
if (attempt < RETRY_COUNT) {
|
|
|
|
|
|
await sleep(RETRY_DELAY);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
2026-02-06 15:08:20 +08:00
|
|
|
|
return null;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!parsed?.atoms || !Array.isArray(parsed.atoms)) {
|
|
|
|
|
|
if (attempt < RETRY_COUNT) {
|
|
|
|
|
|
await sleep(RETRY_DELAY);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
2026-02-06 15:08:20 +08:00
|
|
|
|
return null;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-06 15:08:20 +08:00
|
|
|
|
const filtered = parsed.atoms
|
2026-02-06 11:22:02 +08:00
|
|
|
|
.filter(a => a?.t && a?.v)
|
|
|
|
|
|
.map((a, idx) => ({
|
|
|
|
|
|
atomId: `atom-${aiFloor}-${idx}`,
|
|
|
|
|
|
floor: aiFloor,
|
|
|
|
|
|
type: a.t,
|
|
|
|
|
|
subject: a.s || null,
|
2026-02-08 12:22:45 +08:00
|
|
|
|
object: a.o || null,
|
|
|
|
|
|
value: String(a.v).slice(0, 50),
|
|
|
|
|
|
location: a.l || null,
|
2026-02-06 11:22:02 +08:00
|
|
|
|
source: a.f === 'u' ? 'user' : 'ai',
|
2026-02-08 12:22:45 +08:00
|
|
|
|
themes: a.th || { fn: [], pt: [], kw: [] },
|
2026-02-06 11:22:02 +08:00
|
|
|
|
semantic: buildSemantic(a, userName, aiName),
|
|
|
|
|
|
}));
|
2026-02-08 12:22:45 +08:00
|
|
|
|
|
2026-02-06 15:08:20 +08:00
|
|
|
|
return filtered;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
} catch (e) {
|
2026-02-06 15:08:20 +08:00
|
|
|
|
if (batchCancelled) return null;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
if (attempt < RETRY_COUNT) {
|
|
|
|
|
|
await sleep(RETRY_DELAY * (attempt + 1));
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e);
|
2026-02-06 15:08:20 +08:00
|
|
|
|
return null;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-06 15:08:20 +08:00
|
|
|
|
return null;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
export async function extractAtomsForRound(userMessage, aiMessage, aiFloor, options = {}) {
|
|
|
|
|
|
return extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-08 12:22:45 +08:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// 批量提取
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
2026-02-06 11:22:02 +08:00
|
|
|
|
export async function batchExtractAtoms(chat, onProgress) {
|
|
|
|
|
|
if (!chat?.length) return [];
|
|
|
|
|
|
|
|
|
|
|
|
batchCancelled = false;
|
|
|
|
|
|
|
|
|
|
|
|
const pairs = [];
|
|
|
|
|
|
for (let i = 0; i < chat.length; i++) {
|
|
|
|
|
|
if (!chat[i].is_user) {
|
|
|
|
|
|
const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
|
|
|
|
|
|
pairs.push({ userMsg, aiMsg: chat[i], aiFloor: i });
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!pairs.length) return [];
|
|
|
|
|
|
|
|
|
|
|
|
const allAtoms = [];
|
|
|
|
|
|
let completed = 0;
|
|
|
|
|
|
let failed = 0;
|
|
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < pairs.length; i += CONCURRENCY) {
|
2026-02-08 12:22:45 +08:00
|
|
|
|
if (batchCancelled) break;
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
const batch = pairs.slice(i, i + CONCURRENCY);
|
|
|
|
|
|
|
|
|
|
|
|
if (i === 0) {
|
|
|
|
|
|
const promises = batch.map((pair, idx) => (async () => {
|
|
|
|
|
|
await sleep(idx * STAGGER_DELAY);
|
|
|
|
|
|
|
|
|
|
|
|
if (batchCancelled) return;
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
2026-02-08 12:22:45 +08:00
|
|
|
|
const atoms = await extractAtomsForRoundWithRetry(
|
|
|
|
|
|
pair.userMsg,
|
|
|
|
|
|
pair.aiMsg,
|
|
|
|
|
|
pair.aiFloor,
|
|
|
|
|
|
{ timeout: DEFAULT_TIMEOUT }
|
|
|
|
|
|
);
|
2026-02-06 11:22:02 +08:00
|
|
|
|
if (atoms?.length) {
|
|
|
|
|
|
allAtoms.push(...atoms);
|
2026-02-08 12:22:45 +08:00
|
|
|
|
} else if (atoms === null) {
|
2026-02-06 11:22:02 +08:00
|
|
|
|
failed++;
|
|
|
|
|
|
}
|
|
|
|
|
|
} catch {
|
|
|
|
|
|
failed++;
|
|
|
|
|
|
}
|
|
|
|
|
|
completed++;
|
|
|
|
|
|
onProgress?.(completed, pairs.length, failed);
|
|
|
|
|
|
})());
|
|
|
|
|
|
await Promise.all(promises);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
const promises = batch.map(pair =>
|
2026-02-08 12:22:45 +08:00
|
|
|
|
extractAtomsForRoundWithRetry(
|
|
|
|
|
|
pair.userMsg,
|
|
|
|
|
|
pair.aiMsg,
|
|
|
|
|
|
pair.aiFloor,
|
|
|
|
|
|
{ timeout: DEFAULT_TIMEOUT }
|
|
|
|
|
|
)
|
2026-02-06 11:22:02 +08:00
|
|
|
|
.then(atoms => {
|
|
|
|
|
|
if (batchCancelled) return;
|
|
|
|
|
|
if (atoms?.length) {
|
|
|
|
|
|
allAtoms.push(...atoms);
|
2026-02-08 12:22:45 +08:00
|
|
|
|
} else if (atoms === null) {
|
2026-02-06 11:22:02 +08:00
|
|
|
|
failed++;
|
|
|
|
|
|
}
|
|
|
|
|
|
completed++;
|
|
|
|
|
|
onProgress?.(completed, pairs.length, failed);
|
|
|
|
|
|
})
|
|
|
|
|
|
.catch(() => {
|
|
|
|
|
|
if (batchCancelled) return;
|
|
|
|
|
|
failed++;
|
|
|
|
|
|
completed++;
|
|
|
|
|
|
onProgress?.(completed, pairs.length, failed);
|
|
|
|
|
|
})
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
await Promise.all(promises);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (i + CONCURRENCY < pairs.length && !batchCancelled) {
|
|
|
|
|
|
await sleep(30);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-08 12:22:45 +08:00
|
|
|
|
xbLog.info(MODULE_ID, `批量提取完成: ${allAtoms.length} atoms, ${failed} 失败`);
|
2026-02-06 11:22:02 +08:00
|
|
|
|
|
|
|
|
|
|
return allAtoms;
|
|
|
|
|
|
}
|