refactor focus concepts: add focusTerms/focusCharacters and switch character filtering
This commit is contained in:
@@ -19,7 +19,7 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText } from './entity-lexicon.js';
|
||||
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText, buildCharacterPools } from './entity-lexicon.js';
|
||||
import { getSummaryStore } from '../../data/store.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
|
||||
@@ -137,7 +137,13 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
|
||||
* @property {QuerySegment|null} hintsSegment - R2 hints 段(refinement 后填充)
|
||||
* @property {string} rerankQuery - rerank 用的纯自然语言查询(焦点在前)
|
||||
* @property {string[]} lexicalTerms - MiniSearch 查询词
|
||||
* @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1)
|
||||
* @property {string[]} focusTerms - 焦点词(原 focusEntities)
|
||||
* @property {string[]} focusCharacters - 焦点人物(focusTerms ∩ trustedCharacters)
|
||||
* @property {string[]} focusEntities - Deprecated alias of focusTerms
|
||||
* @property {Set<string>} allEntities - Full entity lexicon (includes non-character entities)
|
||||
* @property {Set<string>} allCharacters - Union of trusted and candidate character pools
|
||||
* @property {Set<string>} trustedCharacters - Clean character pool (main/arcs/name2/L2 participants)
|
||||
* @property {Set<string>} candidateCharacters - Extended character pool from L0 edges.s/t after cleanup
|
||||
* @property {Set<string>} _lexicon - 实体词典(内部使用)
|
||||
* @property {Map<string, string>} _displayMap - 标准化→原词形映射(内部使用)
|
||||
*/
|
||||
@@ -203,9 +209,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
context = { name1: ctx.name1, name2: ctx.name2 };
|
||||
}
|
||||
|
||||
// 1. 实体词典
|
||||
// 1. 实体/人物词典
|
||||
const lexicon = buildEntityLexicon(store, context);
|
||||
const displayMap = buildDisplayNameMap(store, context);
|
||||
const { trustedCharacters, candidateCharacters, allCharacters } = buildCharacterPools(store, context);
|
||||
|
||||
// 2. 分离焦点与上下文
|
||||
const contextEntries = [];
|
||||
@@ -253,9 +260,10 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 提取焦点实体
|
||||
// 3. 提取焦点词与焦点人物
|
||||
const combinedText = allCleanTexts.join(' ');
|
||||
const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);
|
||||
const focusTerms = extractEntitiesFromText(combinedText, lexicon, displayMap);
|
||||
const focusCharacters = focusTerms.filter(term => trustedCharacters.has(term.toLowerCase()));
|
||||
|
||||
// 4. 构建 querySegments
|
||||
// 上下文在前(oldest → newest),焦点在末尾
|
||||
@@ -286,7 +294,7 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
: contextLines.join('\n');
|
||||
|
||||
// 6. lexicalTerms(实体优先 + 高频实词补充)
|
||||
const entityTerms = focusEntities.map(e => e.toLowerCase());
|
||||
const entityTerms = focusTerms.map(e => e.toLowerCase());
|
||||
const textTerms = extractKeyTerms(combinedText);
|
||||
const termSet = new Set(entityTerms);
|
||||
for (const t of textTerms) {
|
||||
@@ -299,7 +307,13 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
hintsSegment: null,
|
||||
rerankQuery,
|
||||
lexicalTerms: Array.from(termSet),
|
||||
focusEntities,
|
||||
focusTerms,
|
||||
focusCharacters,
|
||||
focusEntities: focusTerms, // deprecated alias (compat)
|
||||
allEntities: lexicon,
|
||||
allCharacters,
|
||||
trustedCharacters,
|
||||
candidateCharacters,
|
||||
_lexicon: lexicon,
|
||||
_displayMap: displayMap,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user