Update retrieval, rerank, and indexing changes
This commit is contained in:
@@ -89,6 +89,46 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
|
||||
.map(([term]) => term);
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 rerank 专用查询(纯自然语言,不带结构标签)
|
||||
*
|
||||
* reranker(bge-reranker-v2-m3)的 query 应为自然语言文本,
|
||||
* 不含 [ENTITIES] [DIALOGUE] 等结构标签。
|
||||
*
|
||||
* @param {string[]} focusEntities - 焦点实体
|
||||
* @param {object[]} lastMessages - 最近 K 条消息
|
||||
* @param {string|null} pendingUserMessage - 待发送的用户消息
|
||||
* @param {object} context - { name1, name2 }
|
||||
* @returns {string}
|
||||
*/
|
||||
function buildRerankQuery(focusEntities, lastMessages, pendingUserMessage, context) {
|
||||
const parts = [];
|
||||
|
||||
// 实体提示
|
||||
if (focusEntities.length > 0) {
|
||||
parts.push(`关于${focusEntities.join('、')}。`);
|
||||
}
|
||||
|
||||
// 最近对话原文
|
||||
for (const m of (lastMessages || [])) {
|
||||
const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
|
||||
const clean = cleanMessageText(m.mes || '');
|
||||
if (clean) {
|
||||
parts.push(`${speaker}:${clean}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 待发送消息
|
||||
if (pendingUserMessage) {
|
||||
const clean = cleanMessageText(pendingUserMessage);
|
||||
if (clean) {
|
||||
parts.push(`${context.name1 || '用户'}:${clean}`);
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// QueryBundle 类型定义(JSDoc)
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
@@ -176,9 +216,8 @@ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null,
|
||||
|
||||
const queryText_v0 = queryParts.join('\n\n');
|
||||
|
||||
// 6. rerankQuery 与 embedding query 同源(零暗箱)
|
||||
// 后续 refine 会把它升级为与 queryText_v1 同源。
|
||||
const rerankQuery = queryText_v0;
|
||||
// 6. rerankQuery 独立构建(纯自然语言,供 reranker 使用)
|
||||
const rerankQuery = buildRerankQuery(focusEntities, dialogueLines.length > 0 ? lastMessages : [], pendingUserMessage, context);
|
||||
|
||||
// 7. 构建 lexicalTerms
|
||||
const entityTerms = focusEntities.map(e => e.toLowerCase());
|
||||
@@ -281,8 +320,8 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
|
||||
}
|
||||
}
|
||||
|
||||
// 5. rerankQuery 与最终 query 同源(零暗箱)
|
||||
bundle.rerankQuery = bundle.queryText_v1 || bundle.queryText_v0;
|
||||
// 5. rerankQuery 保持独立(不随 refinement 变更)
|
||||
// reranker 需要纯自然语言 query,不受 memory hints 干扰
|
||||
|
||||
// 6. 增强 lexicalTerms
|
||||
if (hints.length > 0) {
|
||||
|
||||
Reference in New Issue
Block a user