Improve rerank failure handling and tokenizer JP support
This commit is contained in:
@@ -28,17 +28,17 @@ export async function rerank(query, documents, options = {}) {
|
||||
|
||||
if (!query?.trim()) {
|
||||
xbLog.warn(MODULE_ID, 'query 为空,跳过 rerank');
|
||||
return documents.map((_, i) => ({ index: i, relevance_score: 0.5 }));
|
||||
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
|
||||
}
|
||||
|
||||
if (!documents?.length) {
|
||||
return [];
|
||||
return { results: [], failed: false };
|
||||
}
|
||||
|
||||
const key = getApiKey();
|
||||
if (!key) {
|
||||
xbLog.warn(MODULE_ID, '未配置 API Key,跳过 rerank');
|
||||
return documents.map((_, i) => ({ index: i, relevance_score: 0.5 }));
|
||||
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
|
||||
}
|
||||
|
||||
// 截断超长文档列表
|
||||
@@ -61,7 +61,7 @@ export async function rerank(query, documents, options = {}) {
|
||||
|
||||
if (!validDocs.length) {
|
||||
xbLog.warn(MODULE_ID, '无有效文档,跳过 rerank');
|
||||
return [];
|
||||
return { results: [], failed: false };
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
@@ -106,7 +106,7 @@ export async function rerank(query, documents, options = {}) {
|
||||
const elapsed = Math.round(performance.now() - T0);
|
||||
xbLog.info(MODULE_ID, `Rerank 完成: ${validDocs.length} docs → ${results.length} selected (${elapsed}ms)`);
|
||||
|
||||
return mapped;
|
||||
return { results: mapped, failed: false };
|
||||
|
||||
} catch (e) {
|
||||
clearTimeout(timeoutId);
|
||||
@@ -118,10 +118,13 @@ export async function rerank(query, documents, options = {}) {
|
||||
}
|
||||
|
||||
// 降级:返回原顺序,分数均匀分布
|
||||
return documents.slice(0, topN).map((_, i) => ({
|
||||
index: i,
|
||||
relevance_score: 1 - (i / documents.length) * 0.5,
|
||||
}));
|
||||
return {
|
||||
results: documents.slice(0, topN).map((_, i) => ({
|
||||
index: i,
|
||||
relevance_score: 0,
|
||||
})),
|
||||
failed: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,30 +141,38 @@ export async function rerankChunks(query, chunks, options = {}) {
|
||||
|
||||
if (!chunks?.length) return [];
|
||||
if (chunks.length <= topN) {
|
||||
// 数量不超限,仍然 rerank 以获取分数,但不过滤
|
||||
const texts = chunks.map(c => c.text || c.semantic || '');
|
||||
const results = await rerank(query, texts, { topN: chunks.length, ...options });
|
||||
|
||||
const { results, failed } = await rerank(query, texts, { topN: chunks.length, ...options });
|
||||
|
||||
if (failed) {
|
||||
return chunks.map(c => ({ ...c, _rerankScore: 0, _rerankFailed: true }));
|
||||
}
|
||||
|
||||
const scoreMap = new Map(results.map(r => [r.index, r.relevance_score]));
|
||||
return chunks.map((c, i) => ({
|
||||
...c,
|
||||
_rerankScore: scoreMap.get(i) ?? 0.5,
|
||||
_rerankScore: scoreMap.get(i) ?? 0,
|
||||
})).sort((a, b) => b._rerankScore - a._rerankScore);
|
||||
}
|
||||
|
||||
const texts = chunks.map(c => c.text || c.semantic || '');
|
||||
const results = await rerank(query, texts, { topN, ...options });
|
||||
const { results, failed } = await rerank(query, texts, { topN, ...options });
|
||||
|
||||
// 过滤低分 + 排序
|
||||
const selected = results
|
||||
if (failed) {
|
||||
return chunks.slice(0, topN).map(c => ({
|
||||
...c,
|
||||
_rerankScore: 0,
|
||||
_rerankFailed: true,
|
||||
}));
|
||||
}
|
||||
|
||||
return results
|
||||
.filter(r => r.relevance_score >= minScore)
|
||||
.sort((a, b) => b.relevance_score - a.relevance_score)
|
||||
.map(r => ({
|
||||
...chunks[r.index],
|
||||
_rerankScore: r.relevance_score,
|
||||
}));
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -174,7 +185,7 @@ export async function testRerankService() {
|
||||
}
|
||||
|
||||
try {
|
||||
const results = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 });
|
||||
const { results } = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 });
|
||||
return {
|
||||
success: true,
|
||||
message: `连接成功,返回 ${results.length} 个结果`,
|
||||
|
||||
Reference in New Issue
Block a user