refine recall fusion guard and floor-based L0 collection
This commit is contained in:
@@ -233,6 +233,9 @@ async function buildIndexAsync(docs) {
|
||||
* @property {boolean} idfEnabled - Whether IDF stats are available for weighting.
|
||||
* @property {number} idfDocCount - Number of lexical docs used to compute IDF.
|
||||
* @property {Array<{term:string,idf:number}>} topIdfTerms - Top query terms by IDF.
|
||||
* @property {string[]} queryTerms - Normalized query terms actually searched.
|
||||
* @property {Record<string, Array<{floor:number, weightedScore:number, chunkId:string}>>} termFloorHits - Chunk-floor hits by term.
|
||||
* @property {Array<{floor:number, score:number, hitTermsCount:number}>} floorLexScores - Aggregated lexical floor scores (debug).
|
||||
* @property {number} termSearches - Number of per-term MiniSearch queries executed.
|
||||
* @property {number} searchTime - Total lexical search time in milliseconds.
|
||||
*/
|
||||
@@ -258,6 +261,9 @@ export function searchLexicalIndex(index, terms) {
|
||||
idfEnabled: lexicalDocCount > 0,
|
||||
idfDocCount: lexicalDocCount,
|
||||
topIdfTerms: [],
|
||||
queryTerms: [],
|
||||
termFloorHits: {},
|
||||
floorLexScores: [],
|
||||
termSearches: 0,
|
||||
searchTime: 0,
|
||||
};
|
||||
@@ -268,9 +274,12 @@ export function searchLexicalIndex(index, terms) {
|
||||
}
|
||||
|
||||
const queryTerms = Array.from(new Set((terms || []).map(normalizeTerm).filter(Boolean)));
|
||||
result.queryTerms = [...queryTerms];
|
||||
const weightedScores = new Map(); // docId -> score
|
||||
const hitMeta = new Map(); // docId -> { type, floor }
|
||||
const idfPairs = [];
|
||||
const termFloorHits = new Map(); // term -> [{ floor, weightedScore, chunkId }]
|
||||
const floorLexAgg = new Map(); // floor -> { score, terms:Set<string> }
|
||||
|
||||
for (const term of queryTerms) {
|
||||
const idf = computeIdf(term);
|
||||
@@ -305,11 +314,35 @@ export function searchLexicalIndex(index, terms) {
|
||||
floor: hit.floor,
|
||||
});
|
||||
}
|
||||
|
||||
if (hit.type === 'chunk' && typeof hit.floor === 'number' && hit.floor >= 0) {
|
||||
if (!termFloorHits.has(term)) termFloorHits.set(term, []);
|
||||
termFloorHits.get(term).push({
|
||||
floor: hit.floor,
|
||||
weightedScore: weighted,
|
||||
chunkId: id,
|
||||
});
|
||||
|
||||
const floorAgg = floorLexAgg.get(hit.floor) || { score: 0, terms: new Set() };
|
||||
floorAgg.score += weighted;
|
||||
floorAgg.terms.add(term);
|
||||
floorLexAgg.set(hit.floor, floorAgg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
idfPairs.sort((a, b) => b.idf - a.idf);
|
||||
result.topIdfTerms = idfPairs.slice(0, 5);
|
||||
result.termFloorHits = Object.fromEntries(
|
||||
[...termFloorHits.entries()].map(([term, hits]) => [term, hits]),
|
||||
);
|
||||
result.floorLexScores = [...floorLexAgg.entries()]
|
||||
.map(([floor, info]) => ({
|
||||
floor,
|
||||
score: Number(info.score.toFixed(6)),
|
||||
hitTermsCount: info.terms.size,
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
const sortedHits = Array.from(weightedScores.entries())
|
||||
.sort((a, b) => b[1] - a[1]);
|
||||
|
||||
Reference in New Issue
Block a user