feat(summary): update prompt display, metrics lexical gate, and edge sanitization
This commit is contained in:
@@ -1,5 +1,13 @@
|
||||
// ============================================================================
|
||||
// atom-extraction.js - L0 叙事锚点提取(三层 themes 版)
|
||||
// atom-extraction.js - L0 场景锚点提取(v2 - 场景摘要 + 图结构)
|
||||
//
|
||||
// 设计依据:
|
||||
// - BGE-M3 (BAAI, 2024): 自然语言段落检索精度最高 → semantic = 纯自然语言
|
||||
// - Interpersonal Circumplex (Kiesler, 1983): 权力轴+情感轴 → dynamics 枚举
|
||||
// - Labov Narrative Structure (1972): 叙事功能轴 → dynamics 枚举补充
|
||||
// - TransE (Bordes, 2013): s/t/r 三元组方向性 → edges 格式
|
||||
//
|
||||
// 每楼层 1-2 个场景锚点(非碎片原子),60-100 字场景摘要
|
||||
// ============================================================================
|
||||
|
||||
import { callLLM, parseJson } from './llm-service.js';
|
||||
@@ -25,10 +33,30 @@ export function isBatchCancelled() {
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// L0 提取 Prompt(三层 themes)
|
||||
// dynamics 封闭枚举(8 个标签,两轴四象限 + 叙事轴)
|
||||
// ============================================================================
|
||||
|
||||
const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话中提取4-8个关键锚点,用于后续语义检索。
|
||||
const VALID_DYNAMICS = new Set([
|
||||
// 权力轴 (Interpersonal Circumplex: Dominance-Submission)
|
||||
'支配', // 控制、命令、审视、威慑、主导
|
||||
'让渡', // 顺从、服从、屈服、被动、配合
|
||||
|
||||
// 情感轴 (Interpersonal Circumplex: Hostility-Friendliness)
|
||||
'亲密', // 温柔、关怀、依赖、信任、连接
|
||||
'敌意', // 对抗、拒绝、攻击、嘲讽、排斥
|
||||
|
||||
// 叙事轴 (Labov Narrative Structure)
|
||||
'揭示', // 真相、发现、告白、暴露、秘密
|
||||
'决意', // 选择、承诺、放弃、宣言、转折
|
||||
'张力', // 悬念、对峙、暗涌、不安、等待
|
||||
'丧失', // 分离、死亡、破碎、遗憾、崩塌
|
||||
]);
|
||||
|
||||
// ============================================================================
|
||||
// L0 提取 Prompt
|
||||
// ============================================================================
|
||||
|
||||
const SYSTEM_PROMPT = `你是场景摘要器。从一轮对话中提取1-2个场景锚点,用于语义检索和关系追踪。
|
||||
|
||||
输入格式:
|
||||
<round>
|
||||
@@ -37,126 +65,50 @@ const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话中提取4-8
|
||||
</round>
|
||||
|
||||
只输出严格JSON:
|
||||
{"atoms":[{"t":"类型","s":"主体","o":"客体","v":"谓词","l":"地点","f":"来源","th":{"fn":[],"pt":[],"kw":[]}}]}
|
||||
|
||||
## 类型(t)
|
||||
- emo: 情绪状态变化
|
||||
- act: 关键动作/行为
|
||||
- rev: 揭示/发现/真相
|
||||
- dec: 决定/承诺/宣言
|
||||
- ten: 冲突/张力/对立
|
||||
- loc: 场景/地点变化
|
||||
|
||||
## 字段说明
|
||||
- s: 主体(必填)
|
||||
- o: 客体(可空)
|
||||
- v: 谓词,15字内(必填)
|
||||
- l: 地点(可空)
|
||||
- f: "u"=用户 / "a"=角色(必填)
|
||||
- th: 主题标签(必填,结构化对象)
|
||||
|
||||
## th 三层结构
|
||||
fn(叙事功能)1-2个,枚举:
|
||||
establish=建立设定 | escalate=升级加剧 | reveal=揭示发现 | challenge=挑战试探
|
||||
commit=承诺锁定 | conflict=冲突对抗 | resolve=解决收束 | transform=转变逆转
|
||||
bond=连接羁绊 | break=断裂破坏
|
||||
|
||||
pt(互动模式)1-3个,枚举:
|
||||
power_down=上对下 | power_up=下对上 | power_equal=对等 | power_contest=争夺
|
||||
asymmetric=信息不对称 | witnessed=有观众 | secluded=隔绝私密
|
||||
ritual=仪式正式 | routine=日常惯例 | triangular=三方介入
|
||||
|
||||
kw(具体关键词)1-3个,自由格式
|
||||
|
||||
## 示例输出
|
||||
{"atoms":[
|
||||
{"t":"act","s":"艾拉","o":"古龙","v":"用圣剑刺穿心脏","l":"火山口","f":"a",
|
||||
"th":{"fn":["commit"],"pt":["power_down","ritual"],"kw":["战斗","牺牲"]}},
|
||||
{"t":"emo","s":"林夏","o":"陆远","v":"意识到自己喜欢他","l":"","f":"a",
|
||||
"th":{"fn":["reveal","escalate"],"pt":["asymmetric","secluded"],"kw":["心动","暗恋"]}},
|
||||
{"t":"dec","s":"凯尔","o":"王国","v":"放弃王位继承权","l":"王座厅","f":"a",
|
||||
"th":{"fn":["commit","break"],"pt":["ritual","witnessed"],"kw":["抉择","自由"]}},
|
||||
{"t":"rev","s":"","o":"","v":"管家其实是间谍","l":"","f":"a",
|
||||
"th":{"fn":["reveal"],"pt":["asymmetric"],"kw":["背叛","真相"]}},
|
||||
{"t":"ten","s":"兄弟二人","o":"","v":"为遗产反目","l":"","f":"a",
|
||||
"th":{"fn":["conflict","break"],"pt":["power_contest"],"kw":["冲突","亲情破裂"]}}
|
||||
{"anchors":[
|
||||
{
|
||||
"scene": "60-100字完整场景描述",
|
||||
"who": ["角色名1","角色名2"],
|
||||
"edges": [{"s":"施事方","t":"受事方","r":"互动行为"}],
|
||||
"dynamics": ["标签"],
|
||||
"where": "地点"
|
||||
}
|
||||
]}
|
||||
|
||||
规则:
|
||||
- 只提取对未来检索有价值的锚点
|
||||
- fn 回答"这在故事里推动了什么"
|
||||
- pt 回答"这是什么结构的互动"
|
||||
- kw 用于细粒度检索
|
||||
- 无明显锚点时返回 {"atoms":[]}`;
|
||||
## scene 写法
|
||||
- 纯自然语言,像旁白或日记,不要任何标签/标记/枚举值
|
||||
- 必须包含:角色名、动作、情感氛围、关键细节
|
||||
- 读者只看 scene 就能复原这一幕
|
||||
- 60-100字,信息密集但流畅
|
||||
|
||||
const JSON_PREFILL = '{"atoms":[';
|
||||
## who
|
||||
- 参与互动的角色正式名称,不用代词或别称
|
||||
|
||||
// ============================================================================
|
||||
// Semantic 构建
|
||||
// ============================================================================
|
||||
## edges(关系三元组)
|
||||
- s=施事方 t=受事方 r=互动行为(10-15字)
|
||||
- 每个锚点 1-3 条
|
||||
|
||||
function buildSemantic(atom, userName, aiName) {
|
||||
const type = atom.t || 'act';
|
||||
const subject = atom.s || (atom.f === 'u' ? userName : aiName);
|
||||
const object = atom.o || '';
|
||||
const verb = atom.v || '';
|
||||
const location = atom.l || '';
|
||||
|
||||
// 三层 themes 合并
|
||||
const th = atom.th || {};
|
||||
const tags = [
|
||||
...(Array.isArray(th.fn) ? th.fn : []),
|
||||
...(Array.isArray(th.pt) ? th.pt : []),
|
||||
...(Array.isArray(th.kw) ? th.kw : []),
|
||||
].filter(Boolean);
|
||||
## dynamics(封闭枚举,选0-2个)
|
||||
权力轴:支配(控制/命令/审视) | 让渡(顺从/服从/屈服)
|
||||
情感轴:亲密(温柔/信任/连接) | 敌意(对抗/拒绝/攻击)
|
||||
叙事轴:揭示(真相/秘密) | 决意(选择/承诺) | 张力(对峙/不安) | 丧失(分离/破碎)
|
||||
纯日常无明显模式时 dynamics 为 []
|
||||
|
||||
const typePart = `<${type}>`;
|
||||
const themePart = tags.length > 0 ? ` [${tags.join('/')}]` : '';
|
||||
const locPart = location ? ` 在${location}` : '';
|
||||
const objPart = object ? ` -> ${object}` : '';
|
||||
## where
|
||||
- 场景地点,无明确地点时空字符串
|
||||
|
||||
let semantic = '';
|
||||
switch (type) {
|
||||
case 'emo':
|
||||
semantic = object
|
||||
? `${typePart} ${subject} -> ${verb} (对${object})${locPart}`
|
||||
: `${typePart} ${subject} -> ${verb}${locPart}`;
|
||||
break;
|
||||
## 数量规则
|
||||
- 最多2个。1个够时不凑2个
|
||||
- 明显场景切换(地点/时间/对象变化)时才2个
|
||||
- 同一场景不拆分
|
||||
- 无角色互动时返回 {"anchors":[]}
|
||||
|
||||
case 'act':
|
||||
semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`;
|
||||
break;
|
||||
## 示例
|
||||
输入:艾拉在火山口举起圣剑刺穿古龙心脏,龙血溅满她的铠甲,她跪倒在地痛哭
|
||||
输出:
|
||||
{"anchors":[{"scene":"火山口上艾拉举起圣剑刺穿古龙的心脏,龙血溅满铠甲,古龙轰然倒地,艾拉跪倒在滚烫的岩石上痛哭,完成了她不得不做的弑杀","who":["艾拉","古龙"],"edges":[{"s":"艾拉","t":"古龙","r":"以圣剑刺穿心脏"}],"dynamics":["决意","丧失"],"where":"火山口"}]}`;
|
||||
|
||||
case 'rev':
|
||||
semantic = object
|
||||
? `${typePart} 揭示: ${verb} (关于${object})${locPart}`
|
||||
: `${typePart} 揭示: ${verb}${locPart}`;
|
||||
break;
|
||||
|
||||
case 'dec':
|
||||
semantic = object
|
||||
? `${typePart} ${subject} -> ${verb} (对${object})${locPart}`
|
||||
: `${typePart} ${subject} -> ${verb}${locPart}`;
|
||||
break;
|
||||
|
||||
case 'ten':
|
||||
semantic = object
|
||||
? `${typePart} ${subject} <-> ${object}: ${verb}${locPart}`
|
||||
: `${typePart} ${subject}: ${verb}${locPart}`;
|
||||
break;
|
||||
|
||||
case 'loc':
|
||||
semantic = location
|
||||
? `${typePart} 场景: ${location} - ${verb}`
|
||||
: `${typePart} 场景: ${verb}`;
|
||||
break;
|
||||
|
||||
default:
|
||||
semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`;
|
||||
}
|
||||
|
||||
return semantic + themePart;
|
||||
}
|
||||
const JSON_PREFILL = '{"anchors":[';
|
||||
|
||||
// ============================================================================
|
||||
// 睡眠工具
|
||||
@@ -164,6 +116,100 @@ function buildSemantic(atom, userName, aiName) {
|
||||
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
// ============================================================================
|
||||
// 清洗与构建
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* 清洗 dynamics 标签,只保留合法枚举值
|
||||
* @param {string[]} raw
|
||||
* @returns {string[]}
|
||||
*/
|
||||
function sanitizeDynamics(raw) {
|
||||
if (!Array.isArray(raw)) return [];
|
||||
return raw
|
||||
.map(d => String(d || '').trim())
|
||||
.filter(d => VALID_DYNAMICS.has(d))
|
||||
.slice(0, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* 清洗 edges 三元组
|
||||
* @param {object[]} raw
|
||||
* @returns {object[]}
|
||||
*/
|
||||
function sanitizeEdges(raw) {
|
||||
if (!Array.isArray(raw)) return [];
|
||||
return raw
|
||||
.filter(e => e && typeof e === 'object')
|
||||
.map(e => ({
|
||||
s: String(e.s || '').trim(),
|
||||
t: String(e.t || '').trim(),
|
||||
r: String(e.r || '').trim().slice(0, 30),
|
||||
}))
|
||||
.filter(e => e.s && e.t && e.r)
|
||||
.slice(0, 3);
|
||||
}
|
||||
|
||||
/**
|
||||
* 清洗 who 列表
|
||||
* @param {string[]} raw
|
||||
* @returns {string[]}
|
||||
*/
|
||||
function sanitizeWho(raw) {
|
||||
if (!Array.isArray(raw)) return [];
|
||||
const seen = new Set();
|
||||
return raw
|
||||
.map(w => String(w || '').trim())
|
||||
.filter(w => {
|
||||
if (!w || w.length < 1 || seen.has(w)) return false;
|
||||
seen.add(w);
|
||||
return true;
|
||||
})
|
||||
.slice(0, 6);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将解析后的 anchor 转换为 atom 存储对象
|
||||
*
|
||||
* semantic = scene(纯自然语言,直接用于 embedding)
|
||||
*
|
||||
* @param {object} anchor - LLM 输出的 anchor 对象
|
||||
* @param {number} aiFloor - AI 消息楼层号
|
||||
* @param {number} idx - 同楼层序号(0 或 1)
|
||||
* @returns {object|null} atom 对象
|
||||
*/
|
||||
function anchorToAtom(anchor, aiFloor, idx) {
|
||||
const scene = String(anchor.scene || '').trim();
|
||||
if (!scene) return null;
|
||||
|
||||
// scene 过短(< 15 字)可能是噪音
|
||||
if (scene.length < 15) return null;
|
||||
|
||||
const who = sanitizeWho(anchor.who);
|
||||
const edges = sanitizeEdges(anchor.edges);
|
||||
const dynamics = sanitizeDynamics(anchor.dynamics);
|
||||
const where = String(anchor.where || '').trim();
|
||||
|
||||
return {
|
||||
atomId: `atom-${aiFloor}-${idx}`,
|
||||
floor: aiFloor,
|
||||
source: 'ai',
|
||||
|
||||
// ═══ 检索层(embedding 的唯一入口) ═══
|
||||
semantic: scene,
|
||||
|
||||
// ═══ 场景数据 ═══
|
||||
scene,
|
||||
|
||||
// ═══ 图结构层(扩散的 key) ═══
|
||||
who,
|
||||
edges,
|
||||
dynamics,
|
||||
where,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 单轮提取(带重试)
|
||||
// ============================================================================
|
||||
@@ -196,8 +242,8 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
||||
{ role: 'user', content: input },
|
||||
{ role: 'assistant', content: JSON_PREFILL },
|
||||
], {
|
||||
temperature: 0.2,
|
||||
max_tokens: 1000,
|
||||
temperature: 0.3,
|
||||
max_tokens: 600,
|
||||
timeout,
|
||||
});
|
||||
|
||||
@@ -216,7 +262,7 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
||||
try {
|
||||
parsed = parseJson(fullJson);
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败`);
|
||||
xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败 (attempt ${attempt})`);
|
||||
if (attempt < RETRY_COUNT) {
|
||||
await sleep(RETRY_DELAY);
|
||||
continue;
|
||||
@@ -224,7 +270,9 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!parsed?.atoms || !Array.isArray(parsed.atoms)) {
|
||||
// 兼容:优先 anchors,回退 atoms
|
||||
const rawAnchors = parsed?.anchors || parsed?.atoms;
|
||||
if (!rawAnchors || !Array.isArray(rawAnchors)) {
|
||||
if (attempt < RETRY_COUNT) {
|
||||
await sleep(RETRY_DELAY);
|
||||
continue;
|
||||
@@ -232,22 +280,13 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
||||
return null;
|
||||
}
|
||||
|
||||
const filtered = parsed.atoms
|
||||
.filter(a => a?.t && a?.v)
|
||||
.map((a, idx) => ({
|
||||
atomId: `atom-${aiFloor}-${idx}`,
|
||||
floor: aiFloor,
|
||||
type: a.t,
|
||||
subject: a.s || null,
|
||||
object: a.o || null,
|
||||
value: String(a.v).slice(0, 50),
|
||||
location: a.l || null,
|
||||
source: a.f === 'u' ? 'user' : 'ai',
|
||||
themes: a.th || { fn: [], pt: [], kw: [] },
|
||||
semantic: buildSemantic(a, userName, aiName),
|
||||
}));
|
||||
// 转换为 atom 存储格式(最多 2 个)
|
||||
const atoms = rawAnchors
|
||||
.slice(0, 2)
|
||||
.map((a, idx) => anchorToAtom(a, aiFloor, idx))
|
||||
.filter(Boolean);
|
||||
|
||||
return filtered;
|
||||
return atoms;
|
||||
|
||||
} catch (e) {
|
||||
if (batchCancelled) return null;
|
||||
|
||||
Reference in New Issue
Block a user