refactor focus concepts: add focusTerms/focusCharacters and switch character filtering
This commit is contained in:
@@ -19,6 +19,7 @@ import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/con
|
||||
import { recallMemory } from "../vector/retrieval/recall.js";
|
||||
import { getMeta } from "../vector/storage/chunk-store.js";
|
||||
import { getEngineFingerprint } from "../vector/utils/embedder.js";
|
||||
import { buildTrustedCharacters } from "../vector/retrieval/entity-lexicon.js";
|
||||
|
||||
// Metrics
|
||||
import { formatMetricsLog, detectIssues } from "../vector/retrieval/metrics.js";
|
||||
@@ -239,23 +240,11 @@ function buildPostscript() {
|
||||
* @returns {Set<string>} 角色名称集合(标准化后)
|
||||
*/
|
||||
function getKnownCharacters(store) {
|
||||
const names = new Set();
|
||||
|
||||
const arcs = store?.json?.arcs || [];
|
||||
for (const a of arcs) {
|
||||
if (a.name) names.add(normalize(a.name));
|
||||
}
|
||||
|
||||
const main = store?.json?.characters?.main || [];
|
||||
for (const m of main) {
|
||||
const name = typeof m === 'string' ? m : m.name;
|
||||
if (name) names.add(normalize(name));
|
||||
}
|
||||
|
||||
const { name1, name2 } = getContext();
|
||||
const names = buildTrustedCharacters(store, { name1, name2 }) || new Set();
|
||||
// Keep name1 in known-character filtering domain to avoid behavior regression
|
||||
// for L3 subject filtering (lexicon exclusion and filtering semantics are different concerns).
|
||||
if (name1) names.add(normalize(name1));
|
||||
if (name2) names.add(normalize(name2));
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
@@ -272,14 +261,14 @@ function parseRelationTarget(predicate) {
|
||||
/**
|
||||
* 按相关性过滤 facts
|
||||
* @param {object[]} facts - 所有 facts
|
||||
* @param {string[]} focusEntities - 焦点实体
|
||||
* @param {string[]} focusCharacters - 焦点人物
|
||||
* @param {Set<string>} knownCharacters - 已知角色
|
||||
* @returns {object[]} 过滤后的 facts
|
||||
*/
|
||||
function filterConstraintsByRelevance(facts, focusEntities, knownCharacters) {
|
||||
function filterConstraintsByRelevance(facts, focusCharacters, knownCharacters) {
|
||||
if (!facts?.length) return [];
|
||||
|
||||
const focusSet = new Set((focusEntities || []).map(normalize));
|
||||
const focusSet = new Set((focusCharacters || []).map(normalize));
|
||||
|
||||
return facts.filter(f => {
|
||||
if (f._isState === true) return true;
|
||||
@@ -304,13 +293,13 @@ function filterConstraintsByRelevance(facts, focusEntities, knownCharacters) {
|
||||
|
||||
/**
|
||||
* Build people dictionary for constraints display.
|
||||
* Primary source: selected event participants; fallback: focus entities.
|
||||
* Primary source: selected event participants; fallback: focus characters.
|
||||
*
|
||||
* @param {object|null} recallResult
|
||||
* @param {string[]} focusEntities
|
||||
* @param {string[]} focusCharacters
|
||||
* @returns {Map<string, string>} normalize(name) -> display name
|
||||
*/
|
||||
function buildConstraintPeopleDict(recallResult, focusEntities = []) {
|
||||
function buildConstraintPeopleDict(recallResult, focusCharacters = []) {
|
||||
const dict = new Map();
|
||||
const add = (raw) => {
|
||||
const display = String(raw || '').trim();
|
||||
@@ -326,7 +315,7 @@ function buildConstraintPeopleDict(recallResult, focusEntities = []) {
|
||||
}
|
||||
|
||||
if (dict.size === 0) {
|
||||
for (const f of (focusEntities || [])) add(f);
|
||||
for (const f of (focusCharacters || [])) add(f);
|
||||
}
|
||||
|
||||
return dict;
|
||||
@@ -375,16 +364,19 @@ function formatConstraintLine(f, includeSubject = false) {
|
||||
* @param {{ people: Map<string, object[]>, world: object[] }} grouped
|
||||
* @returns {string[]}
|
||||
*/
|
||||
function formatConstraintsStructured(grouped) {
|
||||
function formatConstraintsStructured(grouped, order = 'desc') {
|
||||
const lines = [];
|
||||
const people = grouped?.people || new Map();
|
||||
const world = grouped?.world || [];
|
||||
const sorter = order === 'asc'
|
||||
? ((a, b) => (a.since || 0) - (b.since || 0))
|
||||
: ((a, b) => (b.since || 0) - (a.since || 0));
|
||||
|
||||
if (people.size > 0) {
|
||||
lines.push('people:');
|
||||
for (const [name, facts] of people.entries()) {
|
||||
lines.push(` ${name}:`);
|
||||
const sorted = [...facts].sort((a, b) => (b.since || 0) - (a.since || 0));
|
||||
const sorted = [...facts].sort(sorter);
|
||||
for (const f of sorted) {
|
||||
lines.push(` ${formatConstraintLine(f, false)}`);
|
||||
}
|
||||
@@ -393,7 +385,7 @@ function formatConstraintsStructured(grouped) {
|
||||
|
||||
if (world.length > 0) {
|
||||
lines.push('world:');
|
||||
const sortedWorld = [...world].sort((a, b) => (b.since || 0) - (a.since || 0));
|
||||
const sortedWorld = [...world].sort(sorter);
|
||||
for (const f of sortedWorld) {
|
||||
lines.push(` ${formatConstraintLine(f, true)}`);
|
||||
}
|
||||
@@ -402,6 +394,58 @@ function formatConstraintsStructured(grouped) {
|
||||
return lines;
|
||||
}
|
||||
|
||||
function tryConsumeConstraintLineBudget(line, budgetState) {
|
||||
const cost = estimateTokens(line);
|
||||
if (budgetState.used + cost > budgetState.max) return false;
|
||||
budgetState.used += cost;
|
||||
return true;
|
||||
}
|
||||
|
||||
function selectConstraintsByBudgetDesc(grouped, budgetState) {
|
||||
const selectedPeople = new Map();
|
||||
const selectedWorld = [];
|
||||
const people = grouped?.people || new Map();
|
||||
const world = grouped?.world || [];
|
||||
|
||||
if (people.size > 0) {
|
||||
if (!tryConsumeConstraintLineBudget('people:', budgetState)) {
|
||||
return { people: selectedPeople, world: selectedWorld };
|
||||
}
|
||||
for (const [name, facts] of people.entries()) {
|
||||
const header = ` ${name}:`;
|
||||
if (!tryConsumeConstraintLineBudget(header, budgetState)) {
|
||||
return { people: selectedPeople, world: selectedWorld };
|
||||
}
|
||||
const picked = [];
|
||||
const sorted = [...facts].sort((a, b) => (b.since || 0) - (a.since || 0));
|
||||
for (const f of sorted) {
|
||||
const line = ` ${formatConstraintLine(f, false)}`;
|
||||
if (!tryConsumeConstraintLineBudget(line, budgetState)) {
|
||||
return { people: selectedPeople, world: selectedWorld };
|
||||
}
|
||||
picked.push(f);
|
||||
}
|
||||
selectedPeople.set(name, picked);
|
||||
}
|
||||
}
|
||||
|
||||
if (world.length > 0) {
|
||||
if (!tryConsumeConstraintLineBudget('world:', budgetState)) {
|
||||
return { people: selectedPeople, world: selectedWorld };
|
||||
}
|
||||
const sortedWorld = [...world].sort((a, b) => (b.since || 0) - (a.since || 0));
|
||||
for (const f of sortedWorld) {
|
||||
const line = ` ${formatConstraintLine(f, true)}`;
|
||||
if (!tryConsumeConstraintLineBudget(line, budgetState)) {
|
||||
return { people: selectedPeople, world: selectedWorld };
|
||||
}
|
||||
selectedWorld.push(f);
|
||||
}
|
||||
}
|
||||
|
||||
return { people: selectedPeople, world: selectedWorld };
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 格式化函数
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -703,7 +747,7 @@ function buildNonVectorPrompt(store) {
|
||||
nonVectorKnownCharacters
|
||||
);
|
||||
const groupedConstraints = groupConstraintsForDisplay(filteredConstraints, nonVectorPeopleDict);
|
||||
const constraintLines = formatConstraintsStructured(groupedConstraints);
|
||||
const constraintLines = formatConstraintsStructured(groupedConstraints, 'asc');
|
||||
|
||||
if (constraintLines.length) {
|
||||
sections.push(`[定了的事] 已确立的事实\n${constraintLines.join("\n")}`);
|
||||
@@ -772,12 +816,12 @@ export function buildNonVectorPromptText() {
|
||||
* @param {object} store - 存储对象
|
||||
* @param {object} recallResult - 召回结果
|
||||
* @param {Map<string, object>} causalById - 因果事件索引
|
||||
* @param {string[]} focusEntities - 焦点实体
|
||||
* @param {string[]} focusCharacters - 焦点人物
|
||||
* @param {object} meta - 元数据
|
||||
* @param {object} metrics - 指标对象
|
||||
* @returns {Promise<{promptText: string, injectionStats: object, metrics: object}>}
|
||||
*/
|
||||
async function buildVectorPrompt(store, recallResult, causalById, focusEntities, meta, metrics) {
|
||||
async function buildVectorPrompt(store, recallResult, causalById, focusCharacters, meta, metrics) {
|
||||
const T_Start = performance.now();
|
||||
|
||||
const data = store.json || {};
|
||||
@@ -825,21 +869,21 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities,
|
||||
|
||||
const allFacts = getFacts();
|
||||
const knownCharacters = getKnownCharacters(store);
|
||||
const filteredConstraints = filterConstraintsByRelevance(allFacts, focusEntities, knownCharacters);
|
||||
const constraintPeopleDict = buildConstraintPeopleDict(recallResult, focusEntities);
|
||||
const filteredConstraints = filterConstraintsByRelevance(allFacts, focusCharacters, knownCharacters);
|
||||
const constraintPeopleDict = buildConstraintPeopleDict(recallResult, focusCharacters);
|
||||
const groupedConstraints = groupConstraintsForDisplay(filteredConstraints, constraintPeopleDict);
|
||||
const constraintLines = formatConstraintsStructured(groupedConstraints);
|
||||
|
||||
if (metrics) {
|
||||
metrics.constraint.total = allFacts.length;
|
||||
metrics.constraint.filtered = allFacts.length - filteredConstraints.length;
|
||||
}
|
||||
|
||||
if (constraintLines.length) {
|
||||
const constraintBudget = { used: 0, max: Math.min(CONSTRAINT_MAX, total.max - total.used) };
|
||||
for (const line of constraintLines) {
|
||||
if (!pushWithBudget(assembled.constraints.lines, line, constraintBudget)) break;
|
||||
}
|
||||
const groupedSelectedConstraints = selectConstraintsByBudgetDesc(groupedConstraints, constraintBudget);
|
||||
const constraintLines = formatConstraintsStructured(groupedSelectedConstraints, 'asc');
|
||||
|
||||
if (constraintLines.length) {
|
||||
assembled.constraints.lines.push(...constraintLines);
|
||||
assembled.constraints.tokens = constraintBudget.used;
|
||||
total.used += constraintBudget.used;
|
||||
injectionStats.constraint.count = assembled.constraints.lines.length;
|
||||
@@ -867,7 +911,7 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities,
|
||||
const userName = String(name1 || "").trim();
|
||||
|
||||
const relevant = new Set(
|
||||
[userName, ...(focusEntities || [])]
|
||||
[userName, ...(focusCharacters || [])]
|
||||
.map(s => String(s || "").trim())
|
||||
.filter(Boolean)
|
||||
);
|
||||
@@ -1048,7 +1092,7 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities,
|
||||
const keepVisible = store.keepVisibleCount ?? 3;
|
||||
|
||||
// 收集未被事件消费的 L0,按 rerankScore 降序
|
||||
const focusSetForEvidence = new Set((focusEntities || []).map(normalize).filter(Boolean));
|
||||
const focusSetForEvidence = new Set((focusCharacters || []).map(normalize).filter(Boolean));
|
||||
|
||||
const remainingL0 = l0Selected
|
||||
.filter(l0 => !usedL0Ids.has(l0.id))
|
||||
@@ -1279,7 +1323,9 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
|
||||
l0Selected: recallResult?.l0Selected || [],
|
||||
l1ByFloor: recallResult?.l1ByFloor || new Map(),
|
||||
causalChain: recallResult?.causalChain || [],
|
||||
focusEntities: recallResult?.focusEntities || [],
|
||||
focusTerms: recallResult?.focusTerms || recallResult?.focusEntities || [],
|
||||
focusEntities: recallResult?.focusTerms || recallResult?.focusEntities || [], // compat alias
|
||||
focusCharacters: recallResult?.focusCharacters || [],
|
||||
metrics: recallResult?.metrics || null,
|
||||
};
|
||||
|
||||
@@ -1340,7 +1386,7 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
|
||||
store,
|
||||
recallResult,
|
||||
causalById,
|
||||
recallResult?.focusEntities || [],
|
||||
recallResult?.focusCharacters || [],
|
||||
meta,
|
||||
recallResult?.metrics || null
|
||||
);
|
||||
|
||||
@@ -36,6 +36,83 @@ function isBlacklistedPersonTerm(raw) {
|
||||
return PERSON_LEXICON_BLACKLIST.has(normalize(raw));
|
||||
}
|
||||
|
||||
function addPersonTerm(set, raw) {
|
||||
const n = normalize(raw);
|
||||
if (!n || n.length < 2) return;
|
||||
if (isBlacklistedPersonTerm(n)) return;
|
||||
set.add(n);
|
||||
}
|
||||
|
||||
function collectTrustedCharacters(store, context) {
|
||||
const trusted = new Set();
|
||||
|
||||
const main = store?.json?.characters?.main || [];
|
||||
for (const m of main) {
|
||||
addPersonTerm(trusted, typeof m === 'string' ? m : m.name);
|
||||
}
|
||||
|
||||
const arcs = store?.json?.arcs || [];
|
||||
for (const a of arcs) {
|
||||
addPersonTerm(trusted, a.name);
|
||||
}
|
||||
|
||||
if (context?.name2) {
|
||||
addPersonTerm(trusted, context.name2);
|
||||
}
|
||||
|
||||
const events = store?.json?.events || [];
|
||||
for (const ev of events) {
|
||||
for (const p of (ev?.participants || [])) {
|
||||
addPersonTerm(trusted, p);
|
||||
}
|
||||
}
|
||||
|
||||
if (context?.name1) {
|
||||
trusted.delete(normalize(context.name1));
|
||||
}
|
||||
|
||||
return trusted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build trusted character pool only (without scanning L0 candidate atoms).
|
||||
* trustedCharacters: main/arcs/name2/L2 participants, excludes name1.
|
||||
*
|
||||
* @param {object} store
|
||||
* @param {object} context
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
export function buildTrustedCharacters(store, context) {
|
||||
return collectTrustedCharacters(store, context);
|
||||
}
|
||||
|
||||
function collectCandidateCharactersFromL0(context) {
|
||||
const candidate = new Set();
|
||||
const atoms = getStateAtoms();
|
||||
for (const atom of atoms) {
|
||||
for (const e of (atom.edges || [])) {
|
||||
addPersonTerm(candidate, e?.s);
|
||||
addPersonTerm(candidate, e?.t);
|
||||
}
|
||||
}
|
||||
if (context?.name1) {
|
||||
candidate.delete(normalize(context.name1));
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build character pools with trust tiers.
|
||||
* trustedCharacters: main/arcs/name2/L2 participants (clean source)
|
||||
* candidateCharacters: L0 edges.s/t (blacklist-cleaned)
|
||||
*/
|
||||
export function buildCharacterPools(store, context) {
|
||||
const trustedCharacters = collectTrustedCharacters(store, context);
|
||||
const candidateCharacters = collectCandidateCharactersFromL0(context);
|
||||
const allCharacters = new Set([...trustedCharacters, ...candidateCharacters]);
|
||||
return { trustedCharacters, candidateCharacters, allCharacters };
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建实体词典
|
||||
*
|
||||
@@ -53,56 +130,7 @@ function isBlacklistedPersonTerm(raw) {
|
||||
* @returns {Set<string>} 标准化后的实体集合
|
||||
*/
|
||||
export function buildEntityLexicon(store, context) {
|
||||
const lexicon = new Set();
|
||||
|
||||
// 内部辅助:添加非空实体
|
||||
const add = (raw) => {
|
||||
const n = normalize(raw);
|
||||
if (!n || n.length < 2) return;
|
||||
if (isBlacklistedPersonTerm(n)) return;
|
||||
lexicon.add(n);
|
||||
};
|
||||
|
||||
// 1. 主要角色
|
||||
const main = store?.json?.characters?.main || [];
|
||||
for (const m of main) {
|
||||
add(typeof m === 'string' ? m : m.name);
|
||||
}
|
||||
|
||||
// 2. 弧光角色
|
||||
const arcs = store?.json?.arcs || [];
|
||||
for (const a of arcs) {
|
||||
add(a.name);
|
||||
}
|
||||
|
||||
// 3. 当前角色 name2
|
||||
if (context?.name2) {
|
||||
add(context.name2);
|
||||
}
|
||||
|
||||
// 4. L2 events 参与者
|
||||
const events = store?.json?.events || [];
|
||||
for (const ev of events) {
|
||||
for (const p of (ev?.participants || [])) {
|
||||
add(p);
|
||||
}
|
||||
}
|
||||
|
||||
// 5. L0 atoms 的 edges.s/edges.t(新角色在 L2 总结前即可进入词典)
|
||||
const atoms = getStateAtoms();
|
||||
for (const atom of atoms) {
|
||||
for (const e of (atom.edges || [])) {
|
||||
add(e?.s);
|
||||
add(e?.t);
|
||||
}
|
||||
}
|
||||
|
||||
// ★ 硬约束:删除 name1
|
||||
if (context?.name1) {
|
||||
lexicon.delete(normalize(context.name1));
|
||||
}
|
||||
|
||||
return lexicon;
|
||||
return buildCharacterPools(store, context).allCharacters;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -36,6 +36,8 @@ export function createMetrics() {
|
||||
// Anchor (L0 StateAtoms) - 语义锚点
|
||||
anchor: {
|
||||
needRecall: false,
|
||||
focusTerms: [],
|
||||
focusCharacters: [],
|
||||
focusEntities: [],
|
||||
matched: 0,
|
||||
floorsHit: 0,
|
||||
@@ -85,6 +87,7 @@ export function createMetrics() {
|
||||
causalChainDepth: 0,
|
||||
causalCount: 0,
|
||||
entitiesUsed: 0,
|
||||
focusTermsCount: 0,
|
||||
entityNames: [],
|
||||
},
|
||||
|
||||
@@ -254,7 +257,8 @@ export function formatMetricsLog(metrics) {
|
||||
lines.push('[Anchor] L0 StateAtoms - 语义锚点');
|
||||
lines.push(`├─ need_recall: ${m.anchor.needRecall}`);
|
||||
if (m.anchor.needRecall) {
|
||||
lines.push(`├─ focus_entities: [${(m.anchor.focusEntities || []).join(', ')}]`);
|
||||
lines.push(`├─ focus_terms: [${(m.anchor.focusTerms || m.anchor.focusEntities || []).join(', ')}]`);
|
||||
lines.push(`├─ focus_characters: [${(m.anchor.focusCharacters || []).join(', ')}]`);
|
||||
lines.push(`├─ matched: ${m.anchor.matched || 0}`);
|
||||
lines.push(`└─ floors_hit: ${m.anchor.floorsHit || 0}`);
|
||||
}
|
||||
@@ -310,7 +314,7 @@ export function formatMetricsLog(metrics) {
|
||||
if (m.event.entityFilter) {
|
||||
const ef = m.event.entityFilter;
|
||||
lines.push(`├─ entity_filter:`);
|
||||
lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`);
|
||||
lines.push(`│ ├─ focus_characters: [${(ef.focusCharacters || ef.focusEntities || []).join(', ')}]`);
|
||||
lines.push(`│ ├─ before: ${ef.before}`);
|
||||
lines.push(`│ ├─ after: ${ef.after}`);
|
||||
lines.push(`│ └─ filtered: ${ef.filtered}`);
|
||||
@@ -338,7 +342,7 @@ export function formatMetricsLog(metrics) {
|
||||
}
|
||||
|
||||
lines.push(`├─ causal_chain: depth=${m.event.causalChainDepth}, count=${m.event.causalCount}`);
|
||||
lines.push(`└─ entities_used: ${m.event.entitiesUsed} [${(m.event.entityNames || []).join(', ')}]`);
|
||||
lines.push(`└─ focus_characters_used: ${m.event.entitiesUsed} [${(m.event.entityNames || []).join(', ')}], focus_terms_count=${m.event.focusTermsCount || 0}`);
|
||||
lines.push('');
|
||||
|
||||
// Evidence (Two-Stage: Floor Rerank → L1 Pull)
|
||||
@@ -485,7 +489,7 @@ export function detectIssues(metrics) {
|
||||
// 查询构建问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if ((m.anchor.focusEntities || []).length === 0) {
|
||||
if ((m.anchor.focusTerms || m.anchor.focusEntities || []).length === 0) {
|
||||
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText } from './entity-lexicon.js';
|
||||
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText, buildCharacterPools } from './entity-lexicon.js';
|
||||
import { getSummaryStore } from '../../data/store.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
|
||||
@@ -137,7 +137,13 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
|
||||
* @property {QuerySegment|null} hintsSegment - R2 hints 段(refinement 后填充)
|
||||
* @property {string} rerankQuery - rerank 用的纯自然语言查询(焦点在前)
|
||||
* @property {string[]} lexicalTerms - MiniSearch 查询词
|
||||
* @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1)
|
||||
* @property {string[]} focusTerms - 焦点词(原 focusEntities)
|
||||
* @property {string[]} focusCharacters - 焦点人物(focusTerms ∩ trustedCharacters)
|
||||
* @property {string[]} focusEntities - Deprecated alias of focusTerms
|
||||
* @property {Set<string>} allEntities - Full entity lexicon (includes non-character entities)
|
||||
* @property {Set<string>} allCharacters - Union of trusted and candidate character pools
|
||||
* @property {Set<string>} trustedCharacters - Clean character pool (main/arcs/name2/L2 participants)
|
||||
* @property {Set<string>} candidateCharacters - Extended character pool from L0 edges.s/t after cleanup
|
||||
* @property {Set<string>} _lexicon - 实体词典(内部使用)
|
||||
* @property {Map<string, string>} _displayMap - 标准化→原词形映射(内部使用)
|
||||
*/
|
||||
@@ -203,9 +209,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
context = { name1: ctx.name1, name2: ctx.name2 };
|
||||
}
|
||||
|
||||
// 1. 实体词典
|
||||
// 1. 实体/人物词典
|
||||
const lexicon = buildEntityLexicon(store, context);
|
||||
const displayMap = buildDisplayNameMap(store, context);
|
||||
const { trustedCharacters, candidateCharacters, allCharacters } = buildCharacterPools(store, context);
|
||||
|
||||
// 2. 分离焦点与上下文
|
||||
const contextEntries = [];
|
||||
@@ -253,9 +260,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 提取焦点实体
|
||||
// 3. 提取焦点词与焦点人物
|
||||
const combinedText = allCleanTexts.join(' ');
|
||||
const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);
|
||||
const focusTerms = extractEntitiesFromText(combinedText, lexicon, displayMap);
|
||||
const focusCharacters = focusTerms.filter(term => trustedCharacters.has(term.toLowerCase()));
|
||||
|
||||
// 4. 构建 querySegments
|
||||
// 上下文在前(oldest → newest),焦点在末尾
|
||||
@@ -286,7 +294,7 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
: contextLines.join('\n');
|
||||
|
||||
// 6. lexicalTerms(实体优先 + 高频实词补充)
|
||||
const entityTerms = focusEntities.map(e => e.toLowerCase());
|
||||
const entityTerms = focusTerms.map(e => e.toLowerCase());
|
||||
const textTerms = extractKeyTerms(combinedText);
|
||||
const termSet = new Set(entityTerms);
|
||||
for (const t of textTerms) {
|
||||
@@ -299,7 +307,13 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
hintsSegment: null,
|
||||
rerankQuery,
|
||||
lexicalTerms: Array.from(termSet),
|
||||
focusEntities,
|
||||
focusTerms,
|
||||
focusCharacters,
|
||||
focusEntities: focusTerms, // deprecated alias (compat)
|
||||
allEntities: lexicon,
|
||||
allCharacters,
|
||||
trustedCharacters,
|
||||
candidateCharacters,
|
||||
_lexicon: lexicon,
|
||||
_displayMap: displayMap,
|
||||
};
|
||||
|
||||
@@ -319,7 +319,7 @@ async function recallAnchors(queryVector, vectorConfig, metrics) {
|
||||
// 返回 { events, vectorMap }
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities, metrics) {
|
||||
async function recallEvents(queryVector, allEvents, vectorConfig, focusCharacters, metrics) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length || !allEvents?.length) {
|
||||
return { events: [], vectorMap: new Map() };
|
||||
@@ -339,7 +339,7 @@ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities,
|
||||
return { events: [], vectorMap };
|
||||
}
|
||||
|
||||
const focusSet = new Set((focusEntities || []).map(normalize));
|
||||
const focusSet = new Set((focusCharacters || []).map(normalize));
|
||||
|
||||
const scored = allEvents.map(event => {
|
||||
const v = vectorMap.get(event.id);
|
||||
@@ -381,7 +381,8 @@ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities,
|
||||
|
||||
if (metrics) {
|
||||
metrics.event.entityFilter = {
|
||||
focusEntities: focusEntities || [],
|
||||
focusCharacters: focusCharacters || [],
|
||||
focusEntities: focusCharacters || [],
|
||||
before: beforeFilter,
|
||||
after: candidates.length,
|
||||
filtered: beforeFilter - candidates.length,
|
||||
@@ -962,6 +963,8 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
l1ByFloor: new Map(),
|
||||
causalChain: [],
|
||||
focusEntities: [],
|
||||
focusTerms: [],
|
||||
focusCharacters: [],
|
||||
elapsed: metrics.timing.total,
|
||||
logText: 'No events.',
|
||||
metrics,
|
||||
@@ -982,9 +985,13 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi);
|
||||
|
||||
const bundle = buildQueryBundle(lastMessages, pendingUserMessage);
|
||||
const focusTerms = bundle.focusTerms || bundle.focusEntities || [];
|
||||
const focusCharacters = bundle.focusCharacters || [];
|
||||
|
||||
metrics.query.buildTime = Math.round(performance.now() - T_Build_Start);
|
||||
metrics.anchor.focusEntities = bundle.focusEntities;
|
||||
metrics.anchor.focusTerms = focusTerms;
|
||||
metrics.anchor.focusEntities = focusTerms; // compat
|
||||
metrics.anchor.focusCharacters = focusCharacters;
|
||||
|
||||
if (metrics.query?.lengths) {
|
||||
metrics.query.lengths.v0Chars = bundle.querySegments.reduce((sum, s) => sum + s.text.length, 0);
|
||||
@@ -993,7 +1000,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`Query Build: focus=[${bundle.focusEntities.join(',')}] segments=${bundle.querySegments.length} lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]`
|
||||
`Query Build: focus_terms=[${focusTerms.join(',')}] focus_characters=[${focusCharacters.join(',')}] segments=${bundle.querySegments.length} lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]`
|
||||
);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
@@ -1005,7 +1012,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
metrics.timing.total = Math.round(performance.now() - T0);
|
||||
return {
|
||||
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
|
||||
focusEntities: bundle.focusEntities,
|
||||
focusEntities: focusTerms,
|
||||
focusTerms,
|
||||
focusCharacters,
|
||||
elapsed: metrics.timing.total,
|
||||
logText: 'No query segments.',
|
||||
metrics,
|
||||
@@ -1025,7 +1034,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
metrics.timing.total = Math.round(performance.now() - T0);
|
||||
return {
|
||||
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
|
||||
focusEntities: bundle.focusEntities,
|
||||
focusEntities: focusTerms,
|
||||
focusTerms,
|
||||
focusCharacters,
|
||||
elapsed: metrics.timing.total,
|
||||
logText: 'Embedding failed (round 1, after retry).',
|
||||
metrics,
|
||||
@@ -1037,7 +1048,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
metrics.timing.total = Math.round(performance.now() - T0);
|
||||
return {
|
||||
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
|
||||
focusEntities: bundle.focusEntities,
|
||||
focusEntities: focusTerms,
|
||||
focusTerms,
|
||||
focusCharacters,
|
||||
elapsed: metrics.timing.total,
|
||||
logText: 'Empty query vectors (round 1).',
|
||||
metrics,
|
||||
@@ -1055,7 +1068,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
metrics.timing.total = Math.round(performance.now() - T0);
|
||||
return {
|
||||
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
|
||||
focusEntities: bundle.focusEntities,
|
||||
focusEntities: focusTerms,
|
||||
focusTerms,
|
||||
focusCharacters,
|
||||
elapsed: metrics.timing.total,
|
||||
logText: 'Weighted average produced empty vector.',
|
||||
metrics,
|
||||
@@ -1067,7 +1082,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
const r1AnchorTime = Math.round(performance.now() - T_R1_Anchor_Start);
|
||||
|
||||
const T_R1_Event_Start = performance.now();
|
||||
const { events: eventHits_v0 } = await recallEvents(queryVector_v0, allEvents, vectorConfig, bundle.focusEntities, null);
|
||||
const { events: eventHits_v0 } = await recallEvents(queryVector_v0, allEvents, vectorConfig, focusCharacters, null);
|
||||
const r1EventTime = Math.round(performance.now() - T_R1_Event_Start);
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
@@ -1089,7 +1104,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`Refinement: focus=[${bundle.focusEntities.join(',')}] hasHints=${!!bundle.hintsSegment} (${metrics.query.refineTime}ms)`
|
||||
`Refinement: focus_terms=[${focusTerms.join(',')}] focus_characters=[${focusCharacters.join(',')}] hasHints=${!!bundle.hintsSegment} (${metrics.query.refineTime}ms)`
|
||||
);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
@@ -1129,7 +1144,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
metrics.timing.anchorSearch = Math.round(performance.now() - T_R2_Anchor_Start);
|
||||
|
||||
const T_R2_Event_Start = performance.now();
|
||||
let { events: eventHits, vectorMap: eventVectorMap } = await recallEvents(queryVector_v1, allEvents, vectorConfig, bundle.focusEntities, metrics);
|
||||
let { events: eventHits, vectorMap: eventVectorMap } = await recallEvents(queryVector_v1, allEvents, vectorConfig, focusCharacters, metrics);
|
||||
metrics.timing.eventRetrieval = Math.round(performance.now() - T_R2_Event_Start);
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
@@ -1178,7 +1193,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
let lexicalEventCount = 0;
|
||||
let lexicalEventFilteredByDense = 0;
|
||||
let l0LinkedCount = 0;
|
||||
const focusSetForLexical = new Set((bundle.focusEntities || []).map(normalize));
|
||||
const focusSetForLexical = new Set((focusCharacters || []).map(normalize));
|
||||
|
||||
for (const eid of lexicalResult.eventIds) {
|
||||
if (existingEventIds.has(eid)) continue;
|
||||
@@ -1351,14 +1366,16 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
metrics.timing.total = Math.round(performance.now() - T0);
|
||||
metrics.event.entityNames = bundle.focusEntities;
|
||||
metrics.event.entitiesUsed = bundle.focusEntities.length;
|
||||
metrics.event.entityNames = focusCharacters;
|
||||
metrics.event.entitiesUsed = focusCharacters.length;
|
||||
metrics.event.focusTermsCount = focusTerms.length;
|
||||
|
||||
console.group('%c[Recall v9]', 'color: #7c3aed; font-weight: bold');
|
||||
console.log(`Total: ${metrics.timing.total}ms`);
|
||||
console.log(`Query Build: ${metrics.query.buildTime}ms | Refine: ${metrics.query.refineTime}ms`);
|
||||
console.log(`R1 weights: [${r1Weights.map(w => w.toFixed(2)).join(', ')}]`);
|
||||
console.log(`Focus: [${bundle.focusEntities.join(', ')}]`);
|
||||
console.log(`Focus terms: [${focusTerms.join(', ')}]`);
|
||||
console.log(`Focus characters: [${focusCharacters.join(', ')}]`);
|
||||
console.log(`Round 2 Anchors: ${anchorHits.length} hits → ${anchorFloors_dense.size} floors`);
|
||||
console.log(`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} evtMerged=+${lexicalEventCount} evtFiltered=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (idx=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`);
|
||||
console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
|
||||
@@ -1373,7 +1390,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
causalChain,
|
||||
l0Selected,
|
||||
l1ByFloor,
|
||||
focusEntities: bundle.focusEntities,
|
||||
focusEntities: focusTerms,
|
||||
focusTerms,
|
||||
focusCharacters,
|
||||
elapsed: metrics.timing.total,
|
||||
metrics,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user