// ═══════════════════════════════════════════════════════════════════════════ // diffusion.js - PPR Graph Diffusion (Personalized PageRank) // // Spreads activation from seed L0 atoms through entity co-occurrence graph // to discover narratively-connected but semantically-distant memories. // // Pipeline position: recall.js Stage 7.5 // Input: seeds (reranked L0 from Stage 6) // Output: additional L0 atoms → merged into l0Selected // // Algorithm: // 1. Build undirected weighted graph over all L0 atoms // Four channels: WHO/WHAT/WHERE/HOW (Jaccard/Overlap/ExactMatch) // 2. Personalized PageRank (Power Iteration) // Seeds weighted by rerankScore — Haveliwala (2002) topic-sensitive variant // α = 0.15 restart probability — Page et al. (1998) // 3. Post-verification (Dense Cosine Gate) // Exclude seeds, cosine ≥ 0.45, final = PPR_norm × cosine ≥ 0.10 // // References: // Page et al. "The PageRank Citation Ranking" (1998) // Haveliwala "Topic-Sensitive PageRank" (IEEE TKDE 2003) // Langville & Meyer "Eigenvector Methods for Web IR" (SIAM Review 2005) // Sun et al. "GraftNet" (EMNLP 2018) // Jaccard "Étude comparative de la distribution florale" (1912) // Szymkiewicz "Une contribution statistique" (1934) — Overlap coefficient // Rimmon-Kenan "Narrative Fiction" (2002) — Channel weight rationale // // Core PPR iteration aligned with NetworkX pagerank(): // github.com/networkx/networkx — algorithms/link_analysis/pagerank_alg.py // ═══════════════════════════════════════════════════════════════════════════ import { xbLog } from '../../../../core/debug-core.js'; import { getContext } from '../../../../../../../extensions.js'; import { tokenizeForIndex } from '../utils/tokenizer.js'; const MODULE_ID = 'diffusion'; // ═══════════════════════════════════════════════════════════════════════════ // Configuration // ═══════════════════════════════════════════════════════════════════════════ const CONFIG = { // PPR parameters (Page et al. 1998; GraftNet 2018 uses same values) ALPHA: 0.15, // restart probability EPSILON: 1e-6, // L1 convergence threshold MAX_ITER: 50, // hard iteration cap (typically converges in 15-25) // Edge weight channel coefficients // Candidate generation uses WHAT/HOW only. // WHO/WHERE are reweight-only signals. GAMMA: { what: 0.45, // interaction pair overlap — Szymkiewicz-Simpson how: 0.30, // action-term co-occurrence — Jaccard who: 0.15, // endpoint entity overlap — Jaccard (reweight-only) where: 0.10, // location exact match — damped (reweight-only) }, WHERE_MAX_GROUP_SIZE: 16, // skip location-only pair expansion for over-common places WHERE_FREQ_DAMP_PIVOT: 6, // location freq <= pivot keeps full WHERE score WHERE_FREQ_DAMP_MIN: 0.20, // lower bound for damped WHERE contribution HOW_MAX_GROUP_SIZE: 24, // skip ultra-common action terms to avoid dense pair explosion // Post-verification (Cosine Gate) COSINE_GATE: 0.46, // min cosine(queryVector, stateVector) SCORE_FLOOR: 0.12, // min finalScore = PPR_normalized × cosine DIFFUSION_CAP: 100, // max diffused nodes (excluding seeds) }; // ═══════════════════════════════════════════════════════════════════════════ // Utility functions // ═══════════════════════════════════════════════════════════════════════════ /** * Unicode-safe text normalization (matches recall.js / entity-lexicon.js) */ function normalize(s) { return String(s || '') .normalize('NFKC') .replace(/[\u200B-\u200D\uFEFF]/g, '') .trim() .toLowerCase(); } /** * Cosine similarity between two vectors */ function cosineSimilarity(a, b) { if (!a?.length || !b?.length || a.length !== b.length) return 0; let dot = 0, nA = 0, nB = 0; for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; nA += a[i] * a[i]; nB += b[i] * b[i]; } return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0; } // ═══════════════════════════════════════════════════════════════════════════ // Feature extraction from L0 atoms // ═══════════════════════════════════════════════════════════════════════════ /** * Endpoint entity set from edges.s/edges.t (used for candidate pair generation). * @param {object} atom * @param {Set} excludeEntities - entities to exclude (e.g. name1) * @returns {Set} */ function extractEntities(atom, excludeEntities = new Set()) { const set = new Set(); for (const e of (atom.edges || [])) { const s = normalize(e?.s); const t = normalize(e?.t); if (s && !excludeEntities.has(s)) set.add(s); if (t && !excludeEntities.has(t)) set.add(t); } return set; } /** * WHAT channel: interaction pairs "A↔B" (direction-insensitive). * @param {object} atom * @param {Set} excludeEntities * @returns {Set} */ function extractInteractionPairs(atom, excludeEntities = new Set()) { const set = new Set(); for (const e of (atom.edges || [])) { const s = normalize(e?.s); const t = normalize(e?.t); if (s && t && !excludeEntities.has(s) && !excludeEntities.has(t)) { const pair = [s, t].sort().join('\u2194'); set.add(pair); } } return set; } /** * WHERE channel: normalized location string * @param {object} atom * @returns {string} empty string if absent */ function extractLocation(atom) { return normalize(atom.where); } /** * HOW channel: action terms from edges.r * @param {object} atom * @param {Set} excludeEntities * @returns {Set} */ function extractActionTerms(atom, excludeEntities = new Set()) { const set = new Set(); for (const e of (atom.edges || [])) { const rel = String(e?.r || '').trim(); if (!rel) continue; for (const token of tokenizeForIndex(rel)) { const t = normalize(token); if (t && !excludeEntities.has(t)) set.add(t); } } return set; } // ═══════════════════════════════════════════════════════════════════════════ // Set similarity functions // ═══════════════════════════════════════════════════════════════════════════ /** * Jaccard index: |A∩B| / |A∪B| (Jaccard 1912) * @param {Set} a * @param {Set} b * @returns {number} 0..1 */ function jaccard(a, b) { if (!a.size || !b.size) return 0; let inter = 0; const [smaller, larger] = a.size <= b.size ? [a, b] : [b, a]; for (const x of smaller) { if (larger.has(x)) inter++; } const union = a.size + b.size - inter; return union > 0 ? inter / union : 0; } /** * Overlap coefficient: |A∩B| / min(|A|,|B|) (Szymkiewicz-Simpson 1934) * Used for directed pairs where set sizes are small (1-3); Jaccard * over-penalizes small-set asymmetry. * @param {Set} a * @param {Set} b * @returns {number} 0..1 */ function overlapCoefficient(a, b) { if (!a.size || !b.size) return 0; let inter = 0; const [smaller, larger] = a.size <= b.size ? [a, b] : [b, a]; for (const x of smaller) { if (larger.has(x)) inter++; } return inter / smaller.size; } // ═══════════════════════════════════════════════════════════════════════════ // Graph construction // // Candidate pairs discovered via inverted indices on entities and locations. // HOW-only pairs are still excluded from candidate generation to avoid O(N²); // all channel weights are evaluated for the entity/location candidate set. // All four channels evaluated for every candidate pair. // ═══════════════════════════════════════════════════════════════════════════ /** * Pre-extract features for all atoms * @param {object[]} allAtoms * @param {Set} excludeEntities * @returns {object[]} feature objects with entities/interactionPairs/location/actionTerms */ function extractAllFeatures(allAtoms, excludeEntities = new Set()) { return allAtoms.map(atom => ({ entities: extractEntities(atom, excludeEntities), interactionPairs: extractInteractionPairs(atom, excludeEntities), location: extractLocation(atom), actionTerms: extractActionTerms(atom, excludeEntities), })); } /** * Build inverted index: value → list of atom indices * @param {object[]} features * @returns {{ whatIndex: Map, howIndex: Map, locationFreq: Map }} */ function buildInvertedIndices(features) { const whatIndex = new Map(); const howIndex = new Map(); const locationFreq = new Map(); for (let i = 0; i < features.length; i++) { for (const pair of features[i].interactionPairs) { if (!whatIndex.has(pair)) whatIndex.set(pair, []); whatIndex.get(pair).push(i); } for (const action of features[i].actionTerms) { if (!howIndex.has(action)) howIndex.set(action, []); howIndex.get(action).push(i); } const loc = features[i].location; if (loc) locationFreq.set(loc, (locationFreq.get(loc) || 0) + 1); } return { whatIndex, howIndex, locationFreq }; } /** * Collect candidate pairs from inverted index * @param {Map} index - value → [atomIndex, ...] * @param {Set} pairSet - packed pair collector * @param {number} N - total atom count (for pair packing) */ function collectPairsFromIndex(index, pairSet, N) { for (const indices of index.values()) { for (let a = 0; a < indices.length; a++) { for (let b = a + 1; b < indices.length; b++) { const lo = Math.min(indices[a], indices[b]); const hi = Math.max(indices[a], indices[b]); pairSet.add(lo * N + hi); } } } } /** * Build weighted undirected graph over L0 atoms. * * @param {object[]} allAtoms * @param {Set} excludeEntities * @returns {{ neighbors: object[][], edgeCount: number, channelStats: object, buildTime: number }} */ function buildGraph(allAtoms, excludeEntities = new Set()) { const N = allAtoms.length; const T0 = performance.now(); const features = extractAllFeatures(allAtoms, excludeEntities); const { whatIndex, howIndex, locationFreq } = buildInvertedIndices(features); // Candidate pairs: only WHAT/HOW can create edges const pairSetByWhat = new Set(); const pairSetByHow = new Set(); const pairSet = new Set(); collectPairsFromIndex(whatIndex, pairSetByWhat, N); let skippedHowGroups = 0; for (const [term, indices] of howIndex.entries()) { if (!term) continue; if (indices.length > CONFIG.HOW_MAX_GROUP_SIZE) { skippedHowGroups++; continue; } const oneHowMap = new Map([[term, indices]]); collectPairsFromIndex(oneHowMap, pairSetByHow, N); } for (const p of pairSetByWhat) pairSet.add(p); for (const p of pairSetByHow) pairSet.add(p); // Compute edge weights for all candidates const neighbors = Array.from({ length: N }, () => []); let edgeCount = 0; const channelStats = { what: 0, where: 0, how: 0, who: 0 }; let reweightWhoUsed = 0; let reweightWhereUsed = 0; for (const packed of pairSet) { const i = Math.floor(packed / N); const j = packed % N; const fi = features[i]; const fj = features[j]; const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs); const wHow = jaccard(fi.actionTerms, fj.actionTerms); const wWho = jaccard(fi.entities, fj.entities); let wWhere = 0.0; if (fi.location && fi.location === fj.location) { const freq = locationFreq.get(fi.location) || 1; const damp = Math.max( CONFIG.WHERE_FREQ_DAMP_MIN, Math.min(1, CONFIG.WHERE_FREQ_DAMP_PIVOT / Math.max(1, freq)) ); wWhere = damp; } const weight = CONFIG.GAMMA.what * wWhat + CONFIG.GAMMA.how * wHow + CONFIG.GAMMA.who * wWho + CONFIG.GAMMA.where * wWhere; if (weight > 0) { neighbors[i].push({ target: j, weight }); neighbors[j].push({ target: i, weight }); edgeCount++; if (wWhat > 0) channelStats.what++; if (wHow > 0) channelStats.how++; if (wWho > 0) channelStats.who++; if (wWhere > 0) channelStats.where++; if (wWho > 0) reweightWhoUsed++; if (wWhere > 0) reweightWhereUsed++; } } const buildTime = Math.round(performance.now() - T0); xbLog.info(MODULE_ID, `Graph: ${N} nodes, ${edgeCount} edges ` + `(candidate_by_what=${pairSetByWhat.size} candidate_by_how=${pairSetByHow.size}) ` + `(what=${channelStats.what} how=${channelStats.how} who=${channelStats.who} where=${channelStats.where}) ` + `(reweight_who_used=${reweightWhoUsed} reweight_where_used=${reweightWhereUsed}) ` + `(howSkippedGroups=${skippedHowGroups}) ` + `(${buildTime}ms)` ); const totalPairs = N > 1 ? (N * (N - 1)) / 2 : 0; const edgeDensity = totalPairs > 0 ? Number((edgeCount / totalPairs * 100).toFixed(2)) : 0; return { neighbors, edgeCount, channelStats, buildTime, candidatePairs: pairSet.size, pairsFromWhat: pairSetByWhat.size, pairsFromHow: pairSetByHow.size, reweightWhoUsed, reweightWhereUsed, edgeDensity, }; } // ═══════════════════════════════════════════════════════════════════════════ // PPR: Seed vector construction // ═══════════════════════════════════════════════════════════════════════════ /** * Build personalization vector s from seeds, weighted by rerankScore. * Haveliwala (2002): non-uniform personalization improves topic sensitivity. * * @param {object[]} seeds - seed L0 entries with atomId and rerankScore * @param {Map} idToIdx - atomId → array index * @param {number} N - total node count * @returns {Float64Array} personalization vector (L1-normalized, sums to 1) */ function buildSeedVector(seeds, idToIdx, N) { const s = new Float64Array(N); let total = 0; for (const seed of seeds) { const idx = idToIdx.get(seed.atomId); if (idx == null) continue; const score = Math.max(0, seed.rerankScore || seed.similarity || 0); s[idx] += score; total += score; } // L1 normalize to probability distribution if (total > 0) { for (let i = 0; i < N; i++) s[i] /= total; } return s; } // ═══════════════════════════════════════════════════════════════════════════ // PPR: Column normalization + dangling node detection // ═══════════════════════════════════════════════════════════════════════════ /** * Column-normalize adjacency into transition matrix W. * * Column j of W: W_{ij} = weight(i,j) / Σ_k weight(k,j) * Dangling nodes (no outgoing edges): handled in powerIteration * via redistribution to personalization vector s. * (Langville & Meyer 2005, §4.1) * * @param {object[][]} neighbors - neighbors[j] = [{target, weight}, ...] * @param {number} N * @returns {{ columns: object[][], dangling: number[] }} */ function columnNormalize(neighbors, N) { const columns = Array.from({ length: N }, () => []); const dangling = []; for (let j = 0; j < N; j++) { const edges = neighbors[j]; let sum = 0; for (let e = 0; e < edges.length; e++) sum += edges[e].weight; if (sum <= 0) { dangling.push(j); continue; } const col = columns[j]; for (let e = 0; e < edges.length; e++) { col.push({ target: edges[e].target, prob: edges[e].weight / sum }); } } return { columns, dangling }; } // ═══════════════════════════════════════════════════════════════════════════ // PPR: Power Iteration // // Aligned with NetworkX pagerank() (pagerank_alg.py): // // NetworkX "alpha" = damping = our (1 − α) // NetworkX "1-alpha" = teleportation = our α // // Per iteration: // π_new[i] = α·s[i] + (1−α)·( Σ_j W_{ij}·π[j] + dangling_sum·s[i] ) // // Convergence: Perron-Frobenius theorem guarantees unique stationary // distribution for irreducible aperiodic column-stochastic matrix. // Rate: ‖π^(t+1) − π^t‖₁ ≤ (1−α)^t (geometric). // ═══════════════════════════════════════════════════════════════════════════ /** * Run PPR Power Iteration. * * @param {object[][]} columns - column-normalized transition matrix * @param {Float64Array} s - personalization vector (sums to 1) * @param {number[]} dangling - dangling node indices * @param {number} N - node count * @returns {{ pi: Float64Array, iterations: number, finalError: number }} */ function powerIteration(columns, s, dangling, N) { const alpha = CONFIG.ALPHA; const d = 1 - alpha; // damping factor = prob of following edges const epsilon = CONFIG.EPSILON; const maxIter = CONFIG.MAX_ITER; // Initialize π to personalization vector let pi = new Float64Array(N); for (let i = 0; i < N; i++) pi[i] = s[i]; let iterations = 0; let finalError = 0; for (let iter = 0; iter < maxIter; iter++) { const piNew = new Float64Array(N); // Dangling mass: probability at nodes with no outgoing edges // redistributed to personalization vector (Langville & Meyer 2005) let danglingSum = 0; for (let k = 0; k < dangling.length; k++) { danglingSum += pi[dangling[k]]; } // Sparse matrix-vector product: (1−α) · W · π for (let j = 0; j < N; j++) { const pj = pi[j]; if (pj === 0) continue; const col = columns[j]; const dpj = d * pj; for (let e = 0; e < col.length; e++) { piNew[col[e].target] += dpj * col[e].prob; } } // Restart + dangling contribution: // α · s[i] + (1−α) · danglingSum · s[i] const restartCoeff = alpha + d * danglingSum; for (let i = 0; i < N; i++) { piNew[i] += restartCoeff * s[i]; } // L1 convergence check let l1 = 0; for (let i = 0; i < N; i++) { l1 += Math.abs(piNew[i] - pi[i]); } pi = piNew; iterations = iter + 1; finalError = l1; if (l1 < epsilon) break; } return { pi, iterations, finalError }; } // ═══════════════════════════════════════════════════════════════════════════ // Post-verification: Dense Cosine Gate // // PPR measures graph-structural relevance ("same characters"). // Cosine gate measures semantic relevance ("related to current topic"). // Product combination ensures both dimensions are satisfied // (CombMNZ — Fox & Shaw, TREC-2 1994). // ═══════════════════════════════════════════════════════════════════════════ /** * Filter PPR-activated nodes by semantic relevance. * * For each non-seed node with PPR > 0: * 1. cosine(queryVector, stateVector) ≥ COSINE_GATE * 2. finalScore = PPR_normalized × cosine ≥ SCORE_FLOOR * 3. Top DIFFUSION_CAP by finalScore * * @param {Float64Array} pi - PPR stationary distribution * @param {string[]} atomIds - index → atomId * @param {Map} atomById - atomId → atom object * @param {Set} seedAtomIds - seed atomIds (excluded from output) * @param {Map} vectorMap - atomId → embedding vector * @param {Float32Array|number[]} queryVector - R2 weighted query vector * @returns {{ diffused: object[], gateStats: object }} */ function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector) { const N = atomIds.length; const gateStats = { passed: 0, filtered: 0, noVector: 0 }; // Find max PPR score among non-seed nodes (for normalization) let maxPPR = 0; for (let i = 0; i < N; i++) { if (pi[i] > 0 && !seedAtomIds.has(atomIds[i])) { if (pi[i] > maxPPR) maxPPR = pi[i]; } } if (maxPPR <= 0) { return { diffused: [], gateStats }; } const candidates = []; for (let i = 0; i < N; i++) { const atomId = atomIds[i]; // Skip seeds and zero-probability nodes if (seedAtomIds.has(atomId)) continue; if (pi[i] <= 0) continue; // Require state vector for cosine verification const vec = vectorMap.get(atomId); if (!vec?.length) { gateStats.noVector++; continue; } // Cosine gate const cos = cosineSimilarity(queryVector, vec); if (cos < CONFIG.COSINE_GATE) { gateStats.filtered++; continue; } // Final score = PPR_normalized × cosine const pprNorm = pi[i] / maxPPR; const finalScore = pprNorm * cos; if (finalScore < CONFIG.SCORE_FLOOR) { gateStats.filtered++; continue; } gateStats.passed++; const atom = atomById.get(atomId); if (!atom) continue; candidates.push({ atomId, floor: atom.floor, atom, finalScore, pprScore: pi[i], pprNormalized: pprNorm, cosine: cos, }); } // Sort by finalScore descending, cap at DIFFUSION_CAP candidates.sort((a, b) => b.finalScore - a.finalScore); const diffused = candidates.slice(0, CONFIG.DIFFUSION_CAP); return { diffused, gateStats }; } // ═══════════════════════════════════════════════════════════════════════════ // Main entry point // ═══════════════════════════════════════════════════════════════════════════ /** * Spread activation from seed L0 atoms through entity co-occurrence graph. * * Called from recall.js Stage 7.5, after locateAndPullEvidence and before * Causation Trace. Results are merged into l0Selected and consumed by * prompt.js through existing budget/formatting pipeline (zero downstream changes). * * @param {object[]} seeds - l0Selected from recall Stage 6 * Each: { atomId, rerankScore, similarity, atom, ... } * @param {object[]} allAtoms - getStateAtoms() result * Each: { atomId, floor, semantic, edges, where } * @param {object[]} stateVectors - getAllStateVectors() result * Each: { atomId, floor, vector: Float32Array } * @param {Float32Array|number[]} queryVector - R2 weighted query vector * @param {object|null} metrics - metrics object (optional, mutated in-place) * @returns {object[]} Additional L0 atoms for l0Selected * Each: { atomId, floor, atom, finalScore, pprScore, pprNormalized, cosine } */ export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, metrics) { const T0 = performance.now(); // ─── Early exits ───────────────────────────────────────────────── if (!seeds?.length || !allAtoms?.length || !queryVector?.length) { fillMetricsEmpty(metrics); return []; } // Align with entity-lexicon hard rule: exclude name1 from graph features. const { name1 } = getContext(); const excludeEntities = new Set(); if (name1) excludeEntities.add(normalize(name1)); // ─── 1. Build atom index ───────────────────────────────────────── const atomById = new Map(); const atomIds = []; const idToIdx = new Map(); for (let i = 0; i < allAtoms.length; i++) { const a = allAtoms[i]; atomById.set(a.atomId, a); atomIds.push(a.atomId); idToIdx.set(a.atomId, i); } const N = allAtoms.length; // Validate seeds against atom index const validSeeds = seeds.filter(s => idToIdx.has(s.atomId)); const seedAtomIds = new Set(validSeeds.map(s => s.atomId)); if (!validSeeds.length) { fillMetricsEmpty(metrics); return []; } // ─── 2. Build graph ────────────────────────────────────────────── const graph = buildGraph(allAtoms, excludeEntities); if (graph.edgeCount === 0) { fillMetrics(metrics, { seedCount: validSeeds.length, graphNodes: N, graphEdges: 0, channelStats: graph.channelStats, candidatePairs: graph.candidatePairs, pairsFromWhat: graph.pairsFromWhat, pairsFromHow: graph.pairsFromHow, edgeDensity: graph.edgeDensity, reweightWhoUsed: graph.reweightWhoUsed, reweightWhereUsed: graph.reweightWhereUsed, time: graph.buildTime, }); xbLog.info(MODULE_ID, 'No graph edges — skipping diffusion'); return []; } // ─── 3. Build seed vector ──────────────────────────────────────── const s = buildSeedVector(validSeeds, idToIdx, N); // ─── 4. Column normalize ───────────────────────────────────────── const { columns, dangling } = columnNormalize(graph.neighbors, N); // ─── 5. PPR Power Iteration ────────────────────────────────────── const T_PPR = performance.now(); const { pi, iterations, finalError } = powerIteration(columns, s, dangling, N); const pprTime = Math.round(performance.now() - T_PPR); // Count activated non-seed nodes let pprActivated = 0; for (let i = 0; i < N; i++) { if (pi[i] > 0 && !seedAtomIds.has(atomIds[i])) pprActivated++; } // ─── 6. Post-verification ──────────────────────────────────────── const vectorMap = new Map(); for (const sv of (stateVectors || [])) { vectorMap.set(sv.atomId, sv.vector); } const { diffused, gateStats } = postVerify( pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector ); // ─── 7. Metrics ────────────────────────────────────────────────── const totalTime = Math.round(performance.now() - T0); fillMetrics(metrics, { seedCount: validSeeds.length, graphNodes: N, graphEdges: graph.edgeCount, channelStats: graph.channelStats, candidatePairs: graph.candidatePairs, pairsFromWhat: graph.pairsFromWhat, pairsFromHow: graph.pairsFromHow, edgeDensity: graph.edgeDensity, reweightWhoUsed: graph.reweightWhoUsed, reweightWhereUsed: graph.reweightWhereUsed, buildTime: graph.buildTime, iterations, convergenceError: finalError, pprActivated, cosineGatePassed: gateStats.passed, cosineGateFiltered: gateStats.filtered, cosineGateNoVector: gateStats.noVector, postGatePassRate: pprActivated > 0 ? Math.round((gateStats.passed / pprActivated) * 100) : 0, finalCount: diffused.length, scoreDistribution: diffused.length > 0 ? calcScoreStats(diffused.map(d => d.finalScore)) : { min: 0, max: 0, mean: 0 }, time: totalTime, }); xbLog.info(MODULE_ID, `Diffusion: ${validSeeds.length} seeds → ` + `graph(${N}n/${graph.edgeCount}e) → ` + `PPR(${iterations}it, ε=${finalError.toExponential(1)}, ${pprTime}ms) → ` + `${pprActivated} activated → ` + `gate(${gateStats.passed}\u2713/${gateStats.filtered}\u2717` + `${gateStats.noVector ? `/${gateStats.noVector}?` : ''}) → ` + `${diffused.length} final (${totalTime}ms)` ); return diffused; } // ═══════════════════════════════════════════════════════════════════════════ // Metrics helpers // ═══════════════════════════════════════════════════════════════════════════ /** * Compute min/max/mean distribution * @param {number[]} scores * @returns {{ min: number, max: number, mean: number }} */ function calcScoreStats(scores) { if (!scores.length) return { min: 0, max: 0, mean: 0 }; const sorted = [...scores].sort((a, b) => a - b); const sum = sorted.reduce((a, b) => a + b, 0); return { min: Number(sorted[0].toFixed(3)), max: Number(sorted[sorted.length - 1].toFixed(3)), mean: Number((sum / sorted.length).toFixed(3)), }; } /** * Fill metrics with empty diffusion block */ function fillMetricsEmpty(metrics) { if (!metrics) return; metrics.diffusion = { seedCount: 0, graphNodes: 0, graphEdges: 0, iterations: 0, convergenceError: 0, pprActivated: 0, cosineGatePassed: 0, cosineGateFiltered: 0, cosineGateNoVector: 0, finalCount: 0, scoreDistribution: { min: 0, max: 0, mean: 0 }, byChannel: { what: 0, where: 0, how: 0, who: 0 }, candidatePairs: 0, pairsFromWhat: 0, pairsFromHow: 0, edgeDensity: 0, reweightWhoUsed: 0, reweightWhereUsed: 0, postGatePassRate: 0, time: 0, }; } /** * Fill metrics with diffusion results */ function fillMetrics(metrics, data) { if (!metrics) return; metrics.diffusion = { seedCount: data.seedCount || 0, graphNodes: data.graphNodes || 0, graphEdges: data.graphEdges || 0, iterations: data.iterations || 0, convergenceError: data.convergenceError || 0, pprActivated: data.pprActivated || 0, cosineGatePassed: data.cosineGatePassed || 0, cosineGateFiltered: data.cosineGateFiltered || 0, cosineGateNoVector: data.cosineGateNoVector || 0, postGatePassRate: data.postGatePassRate || 0, finalCount: data.finalCount || 0, scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 }, byChannel: data.channelStats || { what: 0, where: 0, how: 0, who: 0 }, candidatePairs: data.candidatePairs || 0, pairsFromWhat: data.pairsFromWhat || 0, pairsFromHow: data.pairsFromHow || 0, edgeDensity: data.edgeDensity || 0, reweightWhoUsed: data.reweightWhoUsed || 0, reweightWhereUsed: data.reweightWhereUsed || 0, time: data.time || 0, }; }