Update retrieval, rerank, and indexing changes

This commit is contained in:
2026-02-11 13:55:19 +08:00
parent 8d062d39b5
commit 297cc03770
7 changed files with 501 additions and 287 deletions

View File

@@ -89,6 +89,46 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
.map(([term]) => term);
}
/**
* 构建 rerank 专用查询(纯自然语言,不带结构标签)
*
* rerankerbge-reranker-v2-m3的 query 应为自然语言文本,
* 不含 [ENTITIES] [DIALOGUE] 等结构标签。
*
* @param {string[]} focusEntities - 焦点实体
* @param {object[]} lastMessages - 最近 K 条消息
* @param {string|null} pendingUserMessage - 待发送的用户消息
* @param {object} context - { name1, name2 }
* @returns {string}
*/
function buildRerankQuery(focusEntities, lastMessages, pendingUserMessage, context) {
const parts = [];
// 实体提示
if (focusEntities.length > 0) {
parts.push(`关于${focusEntities.join('、')}`);
}
// 最近对话原文
for (const m of (lastMessages || [])) {
const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
const clean = cleanMessageText(m.mes || '');
if (clean) {
parts.push(`${speaker}${clean}`);
}
}
// 待发送消息
if (pendingUserMessage) {
const clean = cleanMessageText(pendingUserMessage);
if (clean) {
parts.push(`${context.name1 || '用户'}${clean}`);
}
}
return parts.join('\n');
}
// ─────────────────────────────────────────────────────────────────────────
// QueryBundle 类型定义JSDoc
// ─────────────────────────────────────────────────────────────────────────
@@ -176,9 +216,8 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
const queryText_v0 = queryParts.join('\n\n');
// 6. rerankQuery 与 embedding query 同源(零暗箱
// 后续 refine 会把它升级为与 queryText_v1 同源。
const rerankQuery = queryText_v0;
// 6. rerankQuery 独立构建(纯自然语言,供 reranker 使用
const rerankQuery = buildRerankQuery(focusEntities, dialogueLines.length > 0 ? lastMessages : [], pendingUserMessage, context);
// 7. 构建 lexicalTerms
const entityTerms = focusEntities.map(e => e.toLowerCase());
@@ -281,8 +320,8 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
}
}
// 5. rerankQuery 与最终 query 同源(零暗箱
bundle.rerankQuery = bundle.queryText_v1 || bundle.queryText_v0;
// 5. rerankQuery 保持独立(不随 refinement 变更
// reranker 需要纯自然语言 query不受 memory hints 干扰
// 6. 增强 lexicalTerms
if (hints.length > 0) {