Refactor L0 quality and diffusion graph gating for balanced recall
This commit is contained in:
@@ -47,20 +47,23 @@ const CONFIG = {
|
||||
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
|
||||
|
||||
// Edge weight channel coefficients
|
||||
// No standalone WHO channel: rely on interaction/action/location only.
|
||||
// Candidate generation uses WHAT/HOW only.
|
||||
// WHO/WHERE are reweight-only signals.
|
||||
GAMMA: {
|
||||
what: 0.55, // interaction pair overlap — Szymkiewicz-Simpson
|
||||
where: 0.15, // location exact match — binary
|
||||
what: 0.45, // interaction pair overlap — Szymkiewicz-Simpson
|
||||
how: 0.30, // action-term co-occurrence — Jaccard
|
||||
who: 0.15, // endpoint entity overlap — Jaccard (reweight-only)
|
||||
where: 0.10, // location exact match — damped (reweight-only)
|
||||
},
|
||||
WHERE_MAX_GROUP_SIZE: 16, // skip location-only pair expansion for over-common places
|
||||
WHERE_FREQ_DAMP_PIVOT: 6, // location freq <= pivot keeps full WHERE score
|
||||
WHERE_FREQ_DAMP_MIN: 0.20, // lower bound for damped WHERE contribution
|
||||
HOW_MAX_GROUP_SIZE: 24, // skip ultra-common action terms to avoid dense pair explosion
|
||||
|
||||
// Post-verification (Cosine Gate)
|
||||
COSINE_GATE: 0.45, // min cosine(queryVector, stateVector)
|
||||
SCORE_FLOOR: 0.10, // min finalScore = PPR_normalized × cosine
|
||||
DIFFUSION_CAP: 100, // max diffused nodes (excluding seeds)
|
||||
COSINE_GATE: 0.48, // min cosine(queryVector, stateVector)
|
||||
SCORE_FLOOR: 0.12, // min finalScore = PPR_normalized × cosine
|
||||
DIFFUSION_CAP: 80, // max diffused nodes (excluding seeds)
|
||||
};
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -226,25 +229,27 @@ function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
|
||||
/**
|
||||
* Build inverted index: value → list of atom indices
|
||||
* @param {object[]} features
|
||||
* @returns {{ entityIndex: Map, locationIndex: Map }}
|
||||
* @returns {{ whatIndex: Map, howIndex: Map, locationFreq: Map }}
|
||||
*/
|
||||
function buildInvertedIndices(features) {
|
||||
const entityIndex = new Map();
|
||||
const locationIndex = new Map();
|
||||
const whatIndex = new Map();
|
||||
const howIndex = new Map();
|
||||
const locationFreq = new Map();
|
||||
|
||||
for (let i = 0; i < features.length; i++) {
|
||||
for (const e of features[i].entities) {
|
||||
if (!entityIndex.has(e)) entityIndex.set(e, []);
|
||||
entityIndex.get(e).push(i);
|
||||
for (const pair of features[i].interactionPairs) {
|
||||
if (!whatIndex.has(pair)) whatIndex.set(pair, []);
|
||||
whatIndex.get(pair).push(i);
|
||||
}
|
||||
for (const action of features[i].actionTerms) {
|
||||
if (!howIndex.has(action)) howIndex.set(action, []);
|
||||
howIndex.get(action).push(i);
|
||||
}
|
||||
const loc = features[i].location;
|
||||
if (loc) {
|
||||
if (!locationIndex.has(loc)) locationIndex.set(loc, []);
|
||||
locationIndex.get(loc).push(i);
|
||||
}
|
||||
if (loc) locationFreq.set(loc, (locationFreq.get(loc) || 0) + 1);
|
||||
}
|
||||
|
||||
return { entityIndex, locationIndex };
|
||||
return { whatIndex, howIndex, locationFreq };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -277,30 +282,32 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
const T0 = performance.now();
|
||||
|
||||
const features = extractAllFeatures(allAtoms, excludeEntities);
|
||||
const { entityIndex, locationIndex } = buildInvertedIndices(features);
|
||||
const locationFreq = new Map();
|
||||
for (const [loc, indices] of locationIndex.entries()) {
|
||||
locationFreq.set(loc, indices.length);
|
||||
}
|
||||
const { whatIndex, howIndex, locationFreq } = buildInvertedIndices(features);
|
||||
|
||||
// Candidate pairs: share ≥1 entity or same location
|
||||
// Candidate pairs: only WHAT/HOW can create edges
|
||||
const pairSetByWhat = new Set();
|
||||
const pairSetByHow = new Set();
|
||||
const pairSet = new Set();
|
||||
collectPairsFromIndex(entityIndex, pairSet, N);
|
||||
let skippedLocationGroups = 0;
|
||||
for (const [loc, indices] of locationIndex.entries()) {
|
||||
if (!loc) continue;
|
||||
if (indices.length > CONFIG.WHERE_MAX_GROUP_SIZE) {
|
||||
skippedLocationGroups++;
|
||||
collectPairsFromIndex(whatIndex, pairSetByWhat, N);
|
||||
let skippedHowGroups = 0;
|
||||
for (const [term, indices] of howIndex.entries()) {
|
||||
if (!term) continue;
|
||||
if (indices.length > CONFIG.HOW_MAX_GROUP_SIZE) {
|
||||
skippedHowGroups++;
|
||||
continue;
|
||||
}
|
||||
const oneLocMap = new Map([[loc, indices]]);
|
||||
collectPairsFromIndex(oneLocMap, pairSet, N);
|
||||
const oneHowMap = new Map([[term, indices]]);
|
||||
collectPairsFromIndex(oneHowMap, pairSetByHow, N);
|
||||
}
|
||||
for (const p of pairSetByWhat) pairSet.add(p);
|
||||
for (const p of pairSetByHow) pairSet.add(p);
|
||||
|
||||
// Compute three-channel edge weights for all candidates
|
||||
// Compute edge weights for all candidates
|
||||
const neighbors = Array.from({ length: N }, () => []);
|
||||
let edgeCount = 0;
|
||||
const channelStats = { what: 0, where: 0, how: 0 };
|
||||
const channelStats = { what: 0, where: 0, how: 0, who: 0 };
|
||||
let reweightWhoUsed = 0;
|
||||
let reweightWhereUsed = 0;
|
||||
|
||||
for (const packed of pairSet) {
|
||||
const i = Math.floor(packed / N);
|
||||
@@ -310,6 +317,8 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
const fj = features[j];
|
||||
|
||||
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
|
||||
const wHow = jaccard(fi.actionTerms, fj.actionTerms);
|
||||
const wWho = jaccard(fi.entities, fj.entities);
|
||||
let wWhere = 0.0;
|
||||
if (fi.location && fi.location === fj.location) {
|
||||
const freq = locationFreq.get(fi.location) || 1;
|
||||
@@ -319,12 +328,12 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
);
|
||||
wWhere = damp;
|
||||
}
|
||||
const wHow = jaccard(fi.actionTerms, fj.actionTerms);
|
||||
|
||||
const weight =
|
||||
CONFIG.GAMMA.what * wWhat +
|
||||
CONFIG.GAMMA.where * wWhere +
|
||||
CONFIG.GAMMA.how * wHow;
|
||||
CONFIG.GAMMA.how * wHow +
|
||||
CONFIG.GAMMA.who * wWho +
|
||||
CONFIG.GAMMA.where * wWhere;
|
||||
|
||||
if (weight > 0) {
|
||||
neighbors[i].push({ target: j, weight });
|
||||
@@ -332,8 +341,11 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
edgeCount++;
|
||||
|
||||
if (wWhat > 0) channelStats.what++;
|
||||
if (wWhere > 0) channelStats.where++;
|
||||
if (wHow > 0) channelStats.how++;
|
||||
if (wWho > 0) channelStats.who++;
|
||||
if (wWhere > 0) channelStats.where++;
|
||||
if (wWho > 0) reweightWhoUsed++;
|
||||
if (wWhere > 0) reweightWhereUsed++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -341,12 +353,28 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`Graph: ${N} nodes, ${edgeCount} edges ` +
|
||||
`(what=${channelStats.what} where=${channelStats.where} how=${channelStats.how}) ` +
|
||||
`(whereSkippedGroups=${skippedLocationGroups}) ` +
|
||||
`(candidate_by_what=${pairSetByWhat.size} candidate_by_how=${pairSetByHow.size}) ` +
|
||||
`(what=${channelStats.what} how=${channelStats.how} who=${channelStats.who} where=${channelStats.where}) ` +
|
||||
`(reweight_who_used=${reweightWhoUsed} reweight_where_used=${reweightWhereUsed}) ` +
|
||||
`(howSkippedGroups=${skippedHowGroups}) ` +
|
||||
`(${buildTime}ms)`
|
||||
);
|
||||
|
||||
return { neighbors, edgeCount, channelStats, buildTime };
|
||||
const totalPairs = N > 1 ? (N * (N - 1)) / 2 : 0;
|
||||
const edgeDensity = totalPairs > 0 ? Number((edgeCount / totalPairs * 100).toFixed(2)) : 0;
|
||||
|
||||
return {
|
||||
neighbors,
|
||||
edgeCount,
|
||||
channelStats,
|
||||
buildTime,
|
||||
candidatePairs: pairSet.size,
|
||||
pairsFromWhat: pairSetByWhat.size,
|
||||
pairsFromHow: pairSetByHow.size,
|
||||
reweightWhoUsed,
|
||||
reweightWhereUsed,
|
||||
edgeDensity,
|
||||
};
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -673,6 +701,12 @@ export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, met
|
||||
graphNodes: N,
|
||||
graphEdges: 0,
|
||||
channelStats: graph.channelStats,
|
||||
candidatePairs: graph.candidatePairs,
|
||||
pairsFromWhat: graph.pairsFromWhat,
|
||||
pairsFromHow: graph.pairsFromHow,
|
||||
edgeDensity: graph.edgeDensity,
|
||||
reweightWhoUsed: graph.reweightWhoUsed,
|
||||
reweightWhereUsed: graph.reweightWhereUsed,
|
||||
time: graph.buildTime,
|
||||
});
|
||||
xbLog.info(MODULE_ID, 'No graph edges — skipping diffusion');
|
||||
@@ -719,6 +753,12 @@ export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, met
|
||||
graphNodes: N,
|
||||
graphEdges: graph.edgeCount,
|
||||
channelStats: graph.channelStats,
|
||||
candidatePairs: graph.candidatePairs,
|
||||
pairsFromWhat: graph.pairsFromWhat,
|
||||
pairsFromHow: graph.pairsFromHow,
|
||||
edgeDensity: graph.edgeDensity,
|
||||
reweightWhoUsed: graph.reweightWhoUsed,
|
||||
reweightWhereUsed: graph.reweightWhereUsed,
|
||||
buildTime: graph.buildTime,
|
||||
iterations,
|
||||
convergenceError: finalError,
|
||||
@@ -726,6 +766,9 @@ export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, met
|
||||
cosineGatePassed: gateStats.passed,
|
||||
cosineGateFiltered: gateStats.filtered,
|
||||
cosineGateNoVector: gateStats.noVector,
|
||||
postGatePassRate: pprActivated > 0
|
||||
? Math.round((gateStats.passed / pprActivated) * 100)
|
||||
: 0,
|
||||
finalCount: diffused.length,
|
||||
scoreDistribution: diffused.length > 0
|
||||
? calcScoreStats(diffused.map(d => d.finalScore))
|
||||
@@ -783,7 +826,14 @@ function fillMetricsEmpty(metrics) {
|
||||
cosineGateNoVector: 0,
|
||||
finalCount: 0,
|
||||
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
||||
byChannel: { what: 0, where: 0, how: 0 },
|
||||
byChannel: { what: 0, where: 0, how: 0, who: 0 },
|
||||
candidatePairs: 0,
|
||||
pairsFromWhat: 0,
|
||||
pairsFromHow: 0,
|
||||
edgeDensity: 0,
|
||||
reweightWhoUsed: 0,
|
||||
reweightWhereUsed: 0,
|
||||
postGatePassRate: 0,
|
||||
time: 0,
|
||||
};
|
||||
}
|
||||
@@ -803,9 +853,16 @@ function fillMetrics(metrics, data) {
|
||||
cosineGatePassed: data.cosineGatePassed || 0,
|
||||
cosineGateFiltered: data.cosineGateFiltered || 0,
|
||||
cosineGateNoVector: data.cosineGateNoVector || 0,
|
||||
postGatePassRate: data.postGatePassRate || 0,
|
||||
finalCount: data.finalCount || 0,
|
||||
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
|
||||
byChannel: data.channelStats || { what: 0, where: 0, how: 0 },
|
||||
byChannel: data.channelStats || { what: 0, where: 0, how: 0, who: 0 },
|
||||
candidatePairs: data.candidatePairs || 0,
|
||||
pairsFromWhat: data.pairsFromWhat || 0,
|
||||
pairsFromHow: data.pairsFromHow || 0,
|
||||
edgeDensity: data.edgeDensity || 0,
|
||||
reweightWhoUsed: data.reweightWhoUsed || 0,
|
||||
reweightWhereUsed: data.reweightWhereUsed || 0,
|
||||
time: data.time || 0,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user