refactor focus concepts: add focusTerms/focusCharacters and switch character filtering

This commit is contained in:
2026-02-15 18:58:51 +08:00
parent d7beead43a
commit ab8f2c9f40
5 changed files with 228 additions and 117 deletions

View File

@@ -36,6 +36,83 @@ function isBlacklistedPersonTerm(raw) {
return PERSON_LEXICON_BLACKLIST.has(normalize(raw));
}
function addPersonTerm(set, raw) {
const n = normalize(raw);
if (!n || n.length < 2) return;
if (isBlacklistedPersonTerm(n)) return;
set.add(n);
}
function collectTrustedCharacters(store, context) {
const trusted = new Set();
const main = store?.json?.characters?.main || [];
for (const m of main) {
addPersonTerm(trusted, typeof m === 'string' ? m : m.name);
}
const arcs = store?.json?.arcs || [];
for (const a of arcs) {
addPersonTerm(trusted, a.name);
}
if (context?.name2) {
addPersonTerm(trusted, context.name2);
}
const events = store?.json?.events || [];
for (const ev of events) {
for (const p of (ev?.participants || [])) {
addPersonTerm(trusted, p);
}
}
if (context?.name1) {
trusted.delete(normalize(context.name1));
}
return trusted;
}
/**
* Build trusted character pool only (without scanning L0 candidate atoms).
* trustedCharacters: main/arcs/name2/L2 participants, excludes name1.
*
* @param {object} store
* @param {object} context
* @returns {Set<string>}
*/
export function buildTrustedCharacters(store, context) {
return collectTrustedCharacters(store, context);
}
function collectCandidateCharactersFromL0(context) {
const candidate = new Set();
const atoms = getStateAtoms();
for (const atom of atoms) {
for (const e of (atom.edges || [])) {
addPersonTerm(candidate, e?.s);
addPersonTerm(candidate, e?.t);
}
}
if (context?.name1) {
candidate.delete(normalize(context.name1));
}
return candidate;
}
/**
* Build character pools with trust tiers.
* trustedCharacters: main/arcs/name2/L2 participants (clean source)
* candidateCharacters: L0 edges.s/t (blacklist-cleaned)
*/
export function buildCharacterPools(store, context) {
const trustedCharacters = collectTrustedCharacters(store, context);
const candidateCharacters = collectCandidateCharactersFromL0(context);
const allCharacters = new Set([...trustedCharacters, ...candidateCharacters]);
return { trustedCharacters, candidateCharacters, allCharacters };
}
/**
* 构建实体词典
*
@@ -53,56 +130,7 @@ function isBlacklistedPersonTerm(raw) {
* @returns {Set<string>} 标准化后的实体集合
*/
export function buildEntityLexicon(store, context) {
const lexicon = new Set();
// 内部辅助:添加非空实体
const add = (raw) => {
const n = normalize(raw);
if (!n || n.length < 2) return;
if (isBlacklistedPersonTerm(n)) return;
lexicon.add(n);
};
// 1. 主要角色
const main = store?.json?.characters?.main || [];
for (const m of main) {
add(typeof m === 'string' ? m : m.name);
}
// 2. 弧光角色
const arcs = store?.json?.arcs || [];
for (const a of arcs) {
add(a.name);
}
// 3. 当前角色 name2
if (context?.name2) {
add(context.name2);
}
// 4. L2 events 参与者
const events = store?.json?.events || [];
for (const ev of events) {
for (const p of (ev?.participants || [])) {
add(p);
}
}
// 5. L0 atoms 的 edges.s/edges.t新角色在 L2 总结前即可进入词典)
const atoms = getStateAtoms();
for (const atom of atoms) {
for (const e of (atom.edges || [])) {
add(e?.s);
add(e?.t);
}
}
// ★ 硬约束:删除 name1
if (context?.name1) {
lexicon.delete(normalize(context.name1));
}
return lexicon;
return buildCharacterPools(store, context).allCharacters;
}
/**

View File

@@ -36,6 +36,8 @@ export function createMetrics() {
// Anchor (L0 StateAtoms) - 语义锚点
anchor: {
needRecall: false,
focusTerms: [],
focusCharacters: [],
focusEntities: [],
matched: 0,
floorsHit: 0,
@@ -85,6 +87,7 @@ export function createMetrics() {
causalChainDepth: 0,
causalCount: 0,
entitiesUsed: 0,
focusTermsCount: 0,
entityNames: [],
},
@@ -254,7 +257,8 @@ export function formatMetricsLog(metrics) {
lines.push('[Anchor] L0 StateAtoms - 语义锚点');
lines.push(`├─ need_recall: ${m.anchor.needRecall}`);
if (m.anchor.needRecall) {
lines.push(`├─ focus_entities: [${(m.anchor.focusEntities || []).join(', ')}]`);
lines.push(`├─ focus_terms: [${(m.anchor.focusTerms || m.anchor.focusEntities || []).join(', ')}]`);
lines.push(`├─ focus_characters: [${(m.anchor.focusCharacters || []).join(', ')}]`);
lines.push(`├─ matched: ${m.anchor.matched || 0}`);
lines.push(`└─ floors_hit: ${m.anchor.floorsHit || 0}`);
}
@@ -310,7 +314,7 @@ export function formatMetricsLog(metrics) {
if (m.event.entityFilter) {
const ef = m.event.entityFilter;
lines.push(`├─ entity_filter:`);
lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`);
lines.push(`│ ├─ focus_characters: [${(ef.focusCharacters || ef.focusEntities || []).join(', ')}]`);
lines.push(`│ ├─ before: ${ef.before}`);
lines.push(`│ ├─ after: ${ef.after}`);
lines.push(`│ └─ filtered: ${ef.filtered}`);
@@ -338,7 +342,7 @@ export function formatMetricsLog(metrics) {
}
lines.push(`├─ causal_chain: depth=${m.event.causalChainDepth}, count=${m.event.causalCount}`);
lines.push(`└─ entities_used: ${m.event.entitiesUsed} [${(m.event.entityNames || []).join(', ')}]`);
lines.push(`└─ focus_characters_used: ${m.event.entitiesUsed} [${(m.event.entityNames || []).join(', ')}], focus_terms_count=${m.event.focusTermsCount || 0}`);
lines.push('');
// Evidence (Two-Stage: Floor Rerank → L1 Pull)
@@ -485,7 +489,7 @@ export function detectIssues(metrics) {
// 查询构建问题
// ─────────────────────────────────────────────────────────────────
if ((m.anchor.focusEntities || []).length === 0) {
if ((m.anchor.focusTerms || m.anchor.focusEntities || []).length === 0) {
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
}

View File

@@ -19,7 +19,7 @@
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../../extensions.js';
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText } from './entity-lexicon.js';
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText, buildCharacterPools } from './entity-lexicon.js';
import { getSummaryStore } from '../../data/store.js';
import { filterText } from '../utils/text-filter.js';
import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
@@ -137,7 +137,13 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
* @property {QuerySegment|null} hintsSegment - R2 hints 段refinement 后填充)
* @property {string} rerankQuery - rerank 用的纯自然语言查询(焦点在前)
* @property {string[]} lexicalTerms - MiniSearch 查询词
* @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1
* @property {string[]} focusTerms - 焦点词(原 focusEntities
* @property {string[]} focusCharacters - 焦点人物focusTerms ∩ trustedCharacters
* @property {string[]} focusEntities - Deprecated alias of focusTerms
* @property {Set<string>} allEntities - Full entity lexicon (includes non-character entities)
* @property {Set<string>} allCharacters - Union of trusted and candidate character pools
* @property {Set<string>} trustedCharacters - Clean character pool (main/arcs/name2/L2 participants)
* @property {Set<string>} candidateCharacters - Extended character pool from L0 edges.s/t after cleanup
* @property {Set<string>} _lexicon - 实体词典(内部使用)
* @property {Map<string, string>} _displayMap - 标准化→原词形映射(内部使用)
*/
@@ -203,9 +209,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
context = { name1: ctx.name1, name2: ctx.name2 };
}
// 1. 实体词典
// 1. 实体/人物词典
const lexicon = buildEntityLexicon(store, context);
const displayMap = buildDisplayNameMap(store, context);
const { trustedCharacters, candidateCharacters, allCharacters } = buildCharacterPools(store, context);
// 2. 分离焦点与上下文
const contextEntries = [];
@@ -253,9 +260,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
}
}
// 3. 提取焦点实体
// 3. 提取焦点词与焦点人物
const combinedText = allCleanTexts.join(' ');
const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);
const focusTerms = extractEntitiesFromText(combinedText, lexicon, displayMap);
const focusCharacters = focusTerms.filter(term => trustedCharacters.has(term.toLowerCase()));
// 4. 构建 querySegments
// 上下文在前oldest → newest焦点在末尾
@@ -286,7 +294,7 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
: contextLines.join('\n');
// 6. lexicalTerms实体优先 + 高频实词补充)
const entityTerms = focusEntities.map(e => e.toLowerCase());
const entityTerms = focusTerms.map(e => e.toLowerCase());
const textTerms = extractKeyTerms(combinedText);
const termSet = new Set(entityTerms);
for (const t of textTerms) {
@@ -299,7 +307,13 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
hintsSegment: null,
rerankQuery,
lexicalTerms: Array.from(termSet),
focusEntities,
focusTerms,
focusCharacters,
focusEntities: focusTerms, // deprecated alias (compat)
allEntities: lexicon,
allCharacters,
trustedCharacters,
candidateCharacters,
_lexicon: lexicon,
_displayMap: displayMap,
};

View File

@@ -319,7 +319,7 @@ async function recallAnchors(queryVector, vectorConfig, metrics) {
// 返回 { events, vectorMap }
// ═══════════════════════════════════════════════════════════════════════════
async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities, metrics) {
async function recallEvents(queryVector, allEvents, vectorConfig, focusCharacters, metrics) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length || !allEvents?.length) {
return { events: [], vectorMap: new Map() };
@@ -339,7 +339,7 @@ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities,
return { events: [], vectorMap };
}
const focusSet = new Set((focusEntities || []).map(normalize));
const focusSet = new Set((focusCharacters || []).map(normalize));
const scored = allEvents.map(event => {
const v = vectorMap.get(event.id);
@@ -381,7 +381,8 @@ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities,
if (metrics) {
metrics.event.entityFilter = {
focusEntities: focusEntities || [],
focusCharacters: focusCharacters || [],
focusEntities: focusCharacters || [],
before: beforeFilter,
after: candidates.length,
filtered: beforeFilter - candidates.length,
@@ -962,6 +963,8 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
l1ByFloor: new Map(),
causalChain: [],
focusEntities: [],
focusTerms: [],
focusCharacters: [],
elapsed: metrics.timing.total,
logText: 'No events.',
metrics,
@@ -982,9 +985,13 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi);
const bundle = buildQueryBundle(lastMessages, pendingUserMessage);
const focusTerms = bundle.focusTerms || bundle.focusEntities || [];
const focusCharacters = bundle.focusCharacters || [];
metrics.query.buildTime = Math.round(performance.now() - T_Build_Start);
metrics.anchor.focusEntities = bundle.focusEntities;
metrics.anchor.focusTerms = focusTerms;
metrics.anchor.focusEntities = focusTerms; // compat
metrics.anchor.focusCharacters = focusCharacters;
if (metrics.query?.lengths) {
metrics.query.lengths.v0Chars = bundle.querySegments.reduce((sum, s) => sum + s.text.length, 0);
@@ -993,7 +1000,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
}
xbLog.info(MODULE_ID,
`Query Build: focus=[${bundle.focusEntities.join(',')}] segments=${bundle.querySegments.length} lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]`
`Query Build: focus_terms=[${focusTerms.join(',')}] focus_characters=[${focusCharacters.join(',')}] segments=${bundle.querySegments.length} lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]`
);
// ═══════════════════════════════════════════════════════════════════
@@ -1005,7 +1012,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.timing.total = Math.round(performance.now() - T0);
return {
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
focusEntities: bundle.focusEntities,
focusEntities: focusTerms,
focusTerms,
focusCharacters,
elapsed: metrics.timing.total,
logText: 'No query segments.',
metrics,
@@ -1025,7 +1034,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.timing.total = Math.round(performance.now() - T0);
return {
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
focusEntities: bundle.focusEntities,
focusEntities: focusTerms,
focusTerms,
focusCharacters,
elapsed: metrics.timing.total,
logText: 'Embedding failed (round 1, after retry).',
metrics,
@@ -1037,7 +1048,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.timing.total = Math.round(performance.now() - T0);
return {
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
focusEntities: bundle.focusEntities,
focusEntities: focusTerms,
focusTerms,
focusCharacters,
elapsed: metrics.timing.total,
logText: 'Empty query vectors (round 1).',
metrics,
@@ -1055,7 +1068,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.timing.total = Math.round(performance.now() - T0);
return {
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
focusEntities: bundle.focusEntities,
focusEntities: focusTerms,
focusTerms,
focusCharacters,
elapsed: metrics.timing.total,
logText: 'Weighted average produced empty vector.',
metrics,
@@ -1067,7 +1082,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
const r1AnchorTime = Math.round(performance.now() - T_R1_Anchor_Start);
const T_R1_Event_Start = performance.now();
const { events: eventHits_v0 } = await recallEvents(queryVector_v0, allEvents, vectorConfig, bundle.focusEntities, null);
const { events: eventHits_v0 } = await recallEvents(queryVector_v0, allEvents, vectorConfig, focusCharacters, null);
const r1EventTime = Math.round(performance.now() - T_R1_Event_Start);
xbLog.info(MODULE_ID,
@@ -1089,7 +1104,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
}
xbLog.info(MODULE_ID,
`Refinement: focus=[${bundle.focusEntities.join(',')}] hasHints=${!!bundle.hintsSegment} (${metrics.query.refineTime}ms)`
`Refinement: focus_terms=[${focusTerms.join(',')}] focus_characters=[${focusCharacters.join(',')}] hasHints=${!!bundle.hintsSegment} (${metrics.query.refineTime}ms)`
);
// ═══════════════════════════════════════════════════════════════════
@@ -1129,7 +1144,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.timing.anchorSearch = Math.round(performance.now() - T_R2_Anchor_Start);
const T_R2_Event_Start = performance.now();
let { events: eventHits, vectorMap: eventVectorMap } = await recallEvents(queryVector_v1, allEvents, vectorConfig, bundle.focusEntities, metrics);
let { events: eventHits, vectorMap: eventVectorMap } = await recallEvents(queryVector_v1, allEvents, vectorConfig, focusCharacters, metrics);
metrics.timing.eventRetrieval = Math.round(performance.now() - T_R2_Event_Start);
xbLog.info(MODULE_ID,
@@ -1178,7 +1193,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
let lexicalEventCount = 0;
let lexicalEventFilteredByDense = 0;
let l0LinkedCount = 0;
const focusSetForLexical = new Set((bundle.focusEntities || []).map(normalize));
const focusSetForLexical = new Set((focusCharacters || []).map(normalize));
for (const eid of lexicalResult.eventIds) {
if (existingEventIds.has(eid)) continue;
@@ -1351,14 +1366,16 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
// ═══════════════════════════════════════════════════════════════════
metrics.timing.total = Math.round(performance.now() - T0);
metrics.event.entityNames = bundle.focusEntities;
metrics.event.entitiesUsed = bundle.focusEntities.length;
metrics.event.entityNames = focusCharacters;
metrics.event.entitiesUsed = focusCharacters.length;
metrics.event.focusTermsCount = focusTerms.length;
console.group('%c[Recall v9]', 'color: #7c3aed; font-weight: bold');
console.log(`Total: ${metrics.timing.total}ms`);
console.log(`Query Build: ${metrics.query.buildTime}ms | Refine: ${metrics.query.refineTime}ms`);
console.log(`R1 weights: [${r1Weights.map(w => w.toFixed(2)).join(', ')}]`);
console.log(`Focus: [${bundle.focusEntities.join(', ')}]`);
console.log(`Focus terms: [${focusTerms.join(', ')}]`);
console.log(`Focus characters: [${focusCharacters.join(', ')}]`);
console.log(`Round 2 Anchors: ${anchorHits.length} hits → ${anchorFloors_dense.size} floors`);
console.log(`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} evtMerged=+${lexicalEventCount} evtFiltered=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (idx=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`);
console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
@@ -1373,7 +1390,9 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
causalChain,
l0Selected,
l1ByFloor,
focusEntities: bundle.focusEntities,
focusEntities: focusTerms,
focusTerms,
focusCharacters,
elapsed: metrics.timing.total,
metrics,
};