Files
LittleWhiteBox/modules/story-summary/vector/llm/query-expansion.js

334 lines
10 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// ============================================================================
// query-expansion.js - 检索查询生成器(三层 themes 版)
// ============================================================================
import { callLLM, parseJson } from './llm-service.js';
import { xbLog } from '../../../../core/debug-core.js';
import { filterText } from '../utils/text-filter.js';
import { getContext } from '../../../../../../../extensions.js';
import { getSummaryStore } from '../../data/store.js';
const MODULE_ID = 'query-expansion';
const SESSION_ID = 'xb6';
// ============================================================================
// 系统提示词
// ============================================================================
const SYSTEM_PROMPT = `你是检索查询生成器。根据当前对话上下文,生成用于检索历史剧情的查询语句。
## 输出格式严格JSON
{
"focus": ["焦点人物"],
"fn": ["叙事功能"],
"pt": ["互动模式"],
"kw": ["关键词"],
"queries": ["DSL查询语句"]
}
## fn叙事功能枚举
establish=建立设定 | escalate=升级加剧 | reveal=揭示发现 | challenge=挑战试探
commit=承诺锁定 | conflict=冲突对抗 | resolve=解决收束 | transform=转变逆转
bond=连接羁绊 | break=断裂破坏
## pt互动模式枚举
power_down=上对下 | power_up=下对上 | power_equal=对等 | power_contest=争夺
asymmetric=信息不对称 | witnessed=有观众 | secluded=隔绝私密
ritual=仪式正式 | routine=日常惯例 | triangular=三方介入
## DSL 查询格式
- <act> 主体 -> 动作 (-> 客体)? (在地点)?
- <emo> 主体 -> 情绪 (对客体)?
- <dec> 主体 -> 决定/承诺 (对客体)?
- <rev> 揭示: 内容 (关于客体)?
- <ten> 主体A <-> 主体B: 冲突内容
- <loc> 场景: 地点/状态
## 规则
- focus: 核心人物1-4个
- fn: 当前对话涉及的叙事功能1-3个
- pt: 当前对话涉及的互动模式1-3个
- kw: 具体关键词1-4个
- queries: 2-4条 DSL 查询
## 示例
输入:艾拉说"那把剑...我记得它的重量,在火山口的时候"
输出:
{
"focus": ["艾拉", "古龙"],
"fn": ["commit", "bond"],
"pt": ["power_down", "ritual"],
"kw": ["圣剑", "战斗", "火山口"],
"queries": [
"<act> 艾拉 -> 战斗/使用圣剑 -> 古龙 [commit/power_down]",
"<loc> 场景: 火山口 [ritual]",
"<emo> 艾拉 -> 牺牲/决绝 [commit]"
]
}`;
// ============================================================================
// 上下文构建
// ============================================================================
function getCharacterContext() {
const context = getContext();
const char = context.characters?.[context.characterId];
if (!char) {
return { name: '', description: '', personality: '' };
}
return {
name: char.name || '',
description: (char.description || '').slice(0, 500),
personality: (char.personality || '').slice(0, 300),
};
}
function getPersonaContext() {
const context = getContext();
if (typeof window !== 'undefined' && window.power_user?.persona_description) {
return String(window.power_user.persona_description).slice(0, 500);
}
if (context.persona_description) {
return String(context.persona_description).slice(0, 500);
}
return '';
}
function getRecentEvents(count = 8) {
const store = getSummaryStore();
const events = store?.json?.events || [];
return events
.slice(-count)
.map(e => {
const time = e.timeLabel || '';
const title = e.title || '';
const participants = (e.participants || []).join('/');
const summary = (e.summary || '').replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '').slice(0, 80);
return time
? `[${time}] ${title || participants}: ${summary}`
: `${title || participants}: ${summary}`;
});
}
function getRelevantArcs(focusHint = []) {
const store = getSummaryStore();
const arcs = store?.json?.arcs || [];
if (!arcs.length) return [];
const hintSet = new Set(focusHint.map(s => String(s).toLowerCase()));
const sorted = [...arcs].sort((a, b) => {
const aHit = hintSet.has(String(a.name || '').toLowerCase()) ? 1 : 0;
const bHit = hintSet.has(String(b.name || '').toLowerCase()) ? 1 : 0;
return bHit - aHit;
});
return sorted.slice(0, 4).map(a => {
const progress = Math.round((a.progress || 0) * 100);
return `${a.name}: ${a.trajectory || '未知状态'} (${progress}%)`;
});
}
function extractNamesFromMessages(messages) {
const names = new Set();
for (const m of messages) {
if (m.name) names.add(m.name);
}
const text = messages.map(m => m.mes || '').join(' ');
const namePattern = /[\u4e00-\u9fff]{2,4}/g;
const matches = text.match(namePattern) || [];
const freq = {};
for (const name of matches) {
freq[name] = (freq[name] || 0) + 1;
}
Object.entries(freq)
.filter(([, count]) => count >= 2)
.forEach(([name]) => names.add(name));
return Array.from(names).slice(0, 6);
}
// ============================================================================
// 主函数
// ============================================================================
export async function expandQuery(messages, options = {}) {
const { pendingUserMessage = null, timeout = 6000 } = options;
if (!messages?.length && !pendingUserMessage) {
return { focus: [], fn: [], pt: [], kw: [], queries: [] };
}
const T0 = performance.now();
const character = getCharacterContext();
const persona = getPersonaContext();
const nameHints = extractNamesFromMessages(messages || []);
const recentEvents = getRecentEvents(8);
const arcs = getRelevantArcs(nameHints);
const dialogueParts = [];
for (const m of (messages || [])) {
const speaker = m.is_user ? '用户' : (m.name || '角色');
const text = filterText(m.mes || '').trim();
if (text) {
dialogueParts.push(`${speaker}\n${text.slice(0, 400)}`);
}
}
if (pendingUserMessage) {
dialogueParts.push(`【用户(刚输入)】\n${filterText(pendingUserMessage).slice(0, 400)}`);
}
const inputParts = [];
if (character.name) {
inputParts.push(`## 当前角色\n${character.name}: ${character.description || character.personality || '无描述'}`);
}
if (persona) {
inputParts.push(`## 用户人设\n${persona}`);
}
if (recentEvents.length) {
inputParts.push(`## 近期剧情\n${recentEvents.map((e, i) => `${i + 1}. ${e}`).join('\n')}`);
}
if (arcs.length) {
inputParts.push(`## 角色状态\n${arcs.join('\n')}`);
}
inputParts.push(`## 最近对话\n${dialogueParts.join('\n\n')}`);
const input = inputParts.join('\n\n');
try {
const response = await callLLM([
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: input },
], {
temperature: 0.15,
max_tokens: 500,
timeout,
sessionId: SESSION_ID,
});
const parsed = parseJson(response);
if (!parsed) {
xbLog.warn(MODULE_ID, 'JSON解析失败', response?.slice(0, 200));
return { focus: [], fn: [], pt: [], kw: [], queries: [] };
}
const result = {
focus: Array.isArray(parsed.focus) ? parsed.focus.slice(0, 5) : [],
fn: Array.isArray(parsed.fn) ? parsed.fn.slice(0, 4) : [],
pt: Array.isArray(parsed.pt) ? parsed.pt.slice(0, 4) : [],
kw: Array.isArray(parsed.kw) ? parsed.kw.slice(0, 5) : [],
queries: Array.isArray(parsed.queries) ? parsed.queries.slice(0, 5) : [],
};
xbLog.info(MODULE_ID, `完成 (${Math.round(performance.now() - T0)}ms) focus=[${result.focus.join(',')}] fn=[${result.fn.join(',')}]`);
return result;
} catch (e) {
xbLog.error(MODULE_ID, '调用失败', e);
return { focus: [], fn: [], pt: [], kw: [], queries: [] };
}
}
// ============================================================================
// 缓存
// ============================================================================
const cache = new Map();
const CACHE_TTL = 300000;
function hashMessages(messages, pending = '') {
const text = (messages || [])
.slice(-3)
.map(m => (m.mes || '').slice(0, 100))
.join('|') + '|' + (pending || '').slice(0, 100);
let h = 0;
for (let i = 0; i < text.length; i++) {
h = ((h << 5) - h + text.charCodeAt(i)) | 0;
}
return h.toString(36);
}
export async function expandQueryCached(messages, options = {}) {
const key = hashMessages(messages, options.pendingUserMessage);
const cached = cache.get(key);
if (cached && Date.now() - cached.time < CACHE_TTL) {
return cached.result;
}
const result = await expandQuery(messages, options);
if (result.focus.length || result.queries.length) {
if (cache.size > 50) {
cache.delete(cache.keys().next().value);
}
cache.set(key, { result, time: Date.now() });
}
return result;
}
// ============================================================================
// 辅助函数:构建检索文本
// ============================================================================
/**
* 将 expansion 结果转换为检索文本
* 三层 themes 自然拼入,让向量自动编码
*/
export function buildSearchText(expansion) {
const parts = [];
// focus 人物
if (expansion.focus?.length) {
parts.push(expansion.focus.join(' '));
}
// fn + pt + kw 合并为标签
const tags = [
...(expansion.fn || []),
...(expansion.pt || []),
...(expansion.kw || []),
].filter(Boolean);
if (tags.length) {
parts.push(`[${tags.join('/')}]`);
}
// queries
if (expansion.queries?.length) {
parts.push(...expansion.queries);
}
return parts.filter(Boolean).join(' ').slice(0, 1500);
}
/**
* 提取实体列表(兼容旧接口)
*/
export function getEntitiesFromExpansion(expansion) {
return expansion?.focus || [];
}