Improve rerank failure handling and tokenizer JP support

This commit is contained in:
2026-02-10 17:52:09 +08:00
parent fbf34815bb
commit 062df60570
7 changed files with 655 additions and 285 deletions

View File

@@ -28,17 +28,17 @@ export async function rerank(query, documents, options = {}) {
if (!query?.trim()) {
xbLog.warn(MODULE_ID, 'query 为空,跳过 rerank');
return documents.map((_, i) => ({ index: i, relevance_score: 0.5 }));
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
}
if (!documents?.length) {
return [];
return { results: [], failed: false };
}
const key = getApiKey();
if (!key) {
xbLog.warn(MODULE_ID, '未配置 API Key跳过 rerank');
return documents.map((_, i) => ({ index: i, relevance_score: 0.5 }));
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
}
// 截断超长文档列表
@@ -61,7 +61,7 @@ export async function rerank(query, documents, options = {}) {
if (!validDocs.length) {
xbLog.warn(MODULE_ID, '无有效文档,跳过 rerank');
return [];
return { results: [], failed: false };
}
const controller = new AbortController();
@@ -106,7 +106,7 @@ export async function rerank(query, documents, options = {}) {
const elapsed = Math.round(performance.now() - T0);
xbLog.info(MODULE_ID, `Rerank 完成: ${validDocs.length} docs → ${results.length} selected (${elapsed}ms)`);
return mapped;
return { results: mapped, failed: false };
} catch (e) {
clearTimeout(timeoutId);
@@ -118,10 +118,13 @@ export async function rerank(query, documents, options = {}) {
}
// 降级:返回原顺序,分数均匀分布
return documents.slice(0, topN).map((_, i) => ({
index: i,
relevance_score: 1 - (i / documents.length) * 0.5,
}));
return {
results: documents.slice(0, topN).map((_, i) => ({
index: i,
relevance_score: 0,
})),
failed: true,
};
}
}
@@ -138,30 +141,38 @@ export async function rerankChunks(query, chunks, options = {}) {
if (!chunks?.length) return [];
if (chunks.length <= topN) {
// 数量不超限,仍然 rerank 以获取分数,但不过滤
const texts = chunks.map(c => c.text || c.semantic || '');
const results = await rerank(query, texts, { topN: chunks.length, ...options });
const { results, failed } = await rerank(query, texts, { topN: chunks.length, ...options });
if (failed) {
return chunks.map(c => ({ ...c, _rerankScore: 0, _rerankFailed: true }));
}
const scoreMap = new Map(results.map(r => [r.index, r.relevance_score]));
return chunks.map((c, i) => ({
...c,
_rerankScore: scoreMap.get(i) ?? 0.5,
_rerankScore: scoreMap.get(i) ?? 0,
})).sort((a, b) => b._rerankScore - a._rerankScore);
}
const texts = chunks.map(c => c.text || c.semantic || '');
const results = await rerank(query, texts, { topN, ...options });
const { results, failed } = await rerank(query, texts, { topN, ...options });
// 过滤低分 + 排序
const selected = results
if (failed) {
return chunks.slice(0, topN).map(c => ({
...c,
_rerankScore: 0,
_rerankFailed: true,
}));
}
return results
.filter(r => r.relevance_score >= minScore)
.sort((a, b) => b.relevance_score - a.relevance_score)
.map(r => ({
...chunks[r.index],
_rerankScore: r.relevance_score,
}));
return selected;
}
/**
@@ -174,7 +185,7 @@ export async function testRerankService() {
}
try {
const results = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 });
const { results } = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 });
return {
success: true,
message: `连接成功,返回 ${results.length} 个结果`,