refactor focus concepts: add focusTerms/focusCharacters and switch character filtering

This commit is contained in:
2026-02-15 18:58:51 +08:00
parent d7beead43a
commit ab8f2c9f40
5 changed files with 228 additions and 117 deletions

View File

@@ -19,7 +19,7 @@
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../../extensions.js';
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText } from './entity-lexicon.js';
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText, buildCharacterPools } from './entity-lexicon.js';
import { getSummaryStore } from '../../data/store.js';
import { filterText } from '../utils/text-filter.js';
import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
@@ -137,7 +137,13 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
* @property {QuerySegment|null} hintsSegment - R2 hints 段refinement 后填充)
* @property {string} rerankQuery - rerank 用的纯自然语言查询(焦点在前)
* @property {string[]} lexicalTerms - MiniSearch 查询词
* @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1
* @property {string[]} focusTerms - 焦点词(原 focusEntities
* @property {string[]} focusCharacters - 焦点人物focusTerms ∩ trustedCharacters
* @property {string[]} focusEntities - Deprecated alias of focusTerms
* @property {Set<string>} allEntities - Full entity lexicon (includes non-character entities)
* @property {Set<string>} allCharacters - Union of trusted and candidate character pools
* @property {Set<string>} trustedCharacters - Clean character pool (main/arcs/name2/L2 participants)
* @property {Set<string>} candidateCharacters - Extended character pool from L0 edges.s/t after cleanup
* @property {Set<string>} _lexicon - 实体词典(内部使用)
* @property {Map<string, string>} _displayMap - 标准化→原词形映射(内部使用)
*/
@@ -203,9 +209,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
context = { name1: ctx.name1, name2: ctx.name2 };
}
// 1. 实体词典
// 1. 实体/人物词典
const lexicon = buildEntityLexicon(store, context);
const displayMap = buildDisplayNameMap(store, context);
const { trustedCharacters, candidateCharacters, allCharacters } = buildCharacterPools(store, context);
// 2. 分离焦点与上下文
const contextEntries = [];
@@ -253,9 +260,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
}
}
// 3. 提取焦点实体
// 3. 提取焦点词与焦点人物
const combinedText = allCleanTexts.join(' ');
const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);
const focusTerms = extractEntitiesFromText(combinedText, lexicon, displayMap);
const focusCharacters = focusTerms.filter(term => trustedCharacters.has(term.toLowerCase()));
// 4. 构建 querySegments
// 上下文在前oldest → newest焦点在末尾
@@ -286,7 +294,7 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
: contextLines.join('\n');
// 6. lexicalTerms实体优先 + 高频实词补充)
const entityTerms = focusEntities.map(e => e.toLowerCase());
const entityTerms = focusTerms.map(e => e.toLowerCase());
const textTerms = extractKeyTerms(combinedText);
const termSet = new Set(entityTerms);
for (const t of textTerms) {
@@ -299,7 +307,13 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
hintsSegment: null,
rerankQuery,
lexicalTerms: Array.from(termSet),
focusEntities,
focusTerms,
focusCharacters,
focusEntities: focusTerms, // deprecated alias (compat)
allEntities: lexicon,
allCharacters,
trustedCharacters,
candidateCharacters,
_lexicon: lexicon,
_displayMap: displayMap,
};