Refine diffusion graph channels and drop legacy who compatibility
This commit is contained in:
@@ -32,6 +32,7 @@
|
||||
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { tokenizeForIndex } from '../utils/tokenizer.js';
|
||||
|
||||
const MODULE_ID = 'diffusion';
|
||||
|
||||
@@ -46,12 +47,11 @@ const CONFIG = {
|
||||
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
|
||||
|
||||
// Edge weight channel coefficients
|
||||
// Rationale: Rimmon-Kenan (2002) hierarchy: characters > events > setting > themes
|
||||
// No standalone WHO channel: rely on interaction/action/location only.
|
||||
GAMMA: {
|
||||
who: 0.50, // entity co-occurrence — Jaccard
|
||||
what: 0.25, // directed pair overlap — Szymkiewicz-Simpson
|
||||
where: 0.15, // location exact match — binary
|
||||
how: 0.10, // dynamics tag co-occurrence — Jaccard
|
||||
what: 0.55, // interaction pair overlap — Szymkiewicz-Simpson
|
||||
where: 0.15, // location exact match — binary
|
||||
how: 0.30, // action-term co-occurrence — Jaccard
|
||||
},
|
||||
|
||||
// Post-verification (Cosine Gate)
|
||||
@@ -94,17 +94,13 @@ function cosineSimilarity(a, b) {
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* WHO channel: entity set = who ∪ edges.s ∪ edges.t
|
||||
* Endpoint entity set from edges.s/edges.t (used for candidate pair generation).
|
||||
* @param {object} atom
|
||||
* @param {Set<string>} excludeEntities - entities to exclude (e.g. name1)
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function extractEntities(atom, excludeEntities = new Set()) {
|
||||
const set = new Set();
|
||||
for (const w of (atom.who || [])) {
|
||||
const n = normalize(w);
|
||||
if (n && !excludeEntities.has(n)) set.add(n);
|
||||
}
|
||||
for (const e of (atom.edges || [])) {
|
||||
const s = normalize(e?.s);
|
||||
const t = normalize(e?.t);
|
||||
@@ -115,18 +111,19 @@ function extractEntities(atom, excludeEntities = new Set()) {
|
||||
}
|
||||
|
||||
/**
|
||||
* WHAT channel: directed interaction pairs "A→B" (strict direction — option A)
|
||||
* WHAT channel: interaction pairs "A↔B" (direction-insensitive).
|
||||
* @param {object} atom
|
||||
* @param {Set<string>} excludeEntities
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function extractDirectedPairs(atom, excludeEntities = new Set()) {
|
||||
function extractInteractionPairs(atom, excludeEntities = new Set()) {
|
||||
const set = new Set();
|
||||
for (const e of (atom.edges || [])) {
|
||||
const s = normalize(e?.s);
|
||||
const t = normalize(e?.t);
|
||||
if (s && t && !excludeEntities.has(s) && !excludeEntities.has(t)) {
|
||||
set.add(`${s}\u2192${t}`);
|
||||
const pair = [s, t].sort().join('\u2194');
|
||||
set.add(pair);
|
||||
}
|
||||
}
|
||||
return set;
|
||||
@@ -142,15 +139,20 @@ function extractLocation(atom) {
|
||||
}
|
||||
|
||||
/**
|
||||
* HOW channel: dynamics tags set
|
||||
* HOW channel: action terms from edges.r
|
||||
* @param {object} atom
|
||||
* @param {Set<string>} excludeEntities
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function extractDynamics(atom) {
|
||||
function extractActionTerms(atom, excludeEntities = new Set()) {
|
||||
const set = new Set();
|
||||
for (const d of (atom.dynamics || [])) {
|
||||
const n = normalize(d);
|
||||
if (n) set.add(n);
|
||||
for (const e of (atom.edges || [])) {
|
||||
const rel = String(e?.r || '').trim();
|
||||
if (!rel) continue;
|
||||
for (const token of tokenizeForIndex(rel)) {
|
||||
const t = normalize(token);
|
||||
if (t && !excludeEntities.has(t)) set.add(t);
|
||||
}
|
||||
}
|
||||
return set;
|
||||
}
|
||||
@@ -198,8 +200,8 @@ function overlapCoefficient(a, b) {
|
||||
// Graph construction
|
||||
//
|
||||
// Candidate pairs discovered via inverted indices on entities and locations.
|
||||
// Dynamics-only pairs excluded from candidate generation (γ_HOW = 0.10 is
|
||||
// too weak to justify O(N²) blowup from 8-tag combinatorics).
|
||||
// HOW-only pairs are still excluded from candidate generation to avoid O(N²);
|
||||
// all channel weights are evaluated for the entity/location candidate set.
|
||||
// All four channels evaluated for every candidate pair.
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@@ -207,14 +209,14 @@ function overlapCoefficient(a, b) {
|
||||
* Pre-extract features for all atoms
|
||||
* @param {object[]} allAtoms
|
||||
* @param {Set<string>} excludeEntities
|
||||
* @returns {object[]} feature objects with entities/directedPairs/location/dynamics
|
||||
* @returns {object[]} feature objects with entities/interactionPairs/location/actionTerms
|
||||
*/
|
||||
function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
|
||||
return allAtoms.map(atom => ({
|
||||
entities: extractEntities(atom, excludeEntities),
|
||||
directedPairs: extractDirectedPairs(atom, excludeEntities),
|
||||
interactionPairs: extractInteractionPairs(atom, excludeEntities),
|
||||
location: extractLocation(atom),
|
||||
dynamics: extractDynamics(atom),
|
||||
actionTerms: extractActionTerms(atom, excludeEntities),
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -279,10 +281,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
collectPairsFromIndex(entityIndex, pairSet, N);
|
||||
collectPairsFromIndex(locationIndex, pairSet, N);
|
||||
|
||||
// Compute four-channel edge weights for all candidates
|
||||
// Compute three-channel edge weights for all candidates
|
||||
const neighbors = Array.from({ length: N }, () => []);
|
||||
let edgeCount = 0;
|
||||
const channelStats = { who: 0, what: 0, where: 0, how: 0 };
|
||||
const channelStats = { what: 0, where: 0, how: 0 };
|
||||
|
||||
for (const packed of pairSet) {
|
||||
const i = Math.floor(packed / N);
|
||||
@@ -291,13 +293,11 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
const fi = features[i];
|
||||
const fj = features[j];
|
||||
|
||||
const wWho = jaccard(fi.entities, fj.entities);
|
||||
const wWhat = overlapCoefficient(fi.directedPairs, fj.directedPairs);
|
||||
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
|
||||
const wWhere = (fi.location && fi.location === fj.location) ? 1.0 : 0.0;
|
||||
const wHow = jaccard(fi.dynamics, fj.dynamics);
|
||||
const wHow = jaccard(fi.actionTerms, fj.actionTerms);
|
||||
|
||||
const weight =
|
||||
CONFIG.GAMMA.who * wWho +
|
||||
CONFIG.GAMMA.what * wWhat +
|
||||
CONFIG.GAMMA.where * wWhere +
|
||||
CONFIG.GAMMA.how * wHow;
|
||||
@@ -307,7 +307,6 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
neighbors[j].push({ target: i, weight });
|
||||
edgeCount++;
|
||||
|
||||
if (wWho > 0) channelStats.who++;
|
||||
if (wWhat > 0) channelStats.what++;
|
||||
if (wWhere > 0) channelStats.where++;
|
||||
if (wHow > 0) channelStats.how++;
|
||||
@@ -318,8 +317,7 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`Graph: ${N} nodes, ${edgeCount} edges ` +
|
||||
`(who=${channelStats.who} what=${channelStats.what} ` +
|
||||
`where=${channelStats.where} how=${channelStats.how}) ` +
|
||||
`(what=${channelStats.what} where=${channelStats.where} how=${channelStats.how}) ` +
|
||||
`(${buildTime}ms)`
|
||||
);
|
||||
|
||||
@@ -593,7 +591,7 @@ function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector)
|
||||
* @param {object[]} seeds - l0Selected from recall Stage 6
|
||||
* Each: { atomId, rerankScore, similarity, atom, ... }
|
||||
* @param {object[]} allAtoms - getStateAtoms() result
|
||||
* Each: { atomId, floor, semantic, who, edges, dynamics, where }
|
||||
* Each: { atomId, floor, semantic, edges, where }
|
||||
* @param {object[]} stateVectors - getAllStateVectors() result
|
||||
* Each: { atomId, floor, vector: Float32Array }
|
||||
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
|
||||
@@ -760,7 +758,7 @@ function fillMetricsEmpty(metrics) {
|
||||
cosineGateNoVector: 0,
|
||||
finalCount: 0,
|
||||
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
||||
byChannel: { who: 0, what: 0, where: 0, how: 0 },
|
||||
byChannel: { what: 0, where: 0, how: 0 },
|
||||
time: 0,
|
||||
};
|
||||
}
|
||||
@@ -782,7 +780,7 @@ function fillMetrics(metrics, data) {
|
||||
cosineGateNoVector: data.cosineGateNoVector || 0,
|
||||
finalCount: data.finalCount || 0,
|
||||
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
|
||||
byChannel: data.channelStats || { who: 0, what: 0, where: 0, how: 0 },
|
||||
byChannel: data.channelStats || { what: 0, where: 0, how: 0 },
|
||||
time: data.time || 0,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -71,11 +71,12 @@ export function buildEntityLexicon(store, context) {
|
||||
add(f.s);
|
||||
}
|
||||
|
||||
// 5. L0 atoms 的 who(新角色在 L2 总结前即可进入词典)
|
||||
// 5. L0 atoms 的 edges.s/edges.t(新角色在 L2 总结前即可进入词典)
|
||||
const atoms = getStateAtoms();
|
||||
for (const atom of atoms) {
|
||||
for (const name of (atom.who || [])) {
|
||||
add(name);
|
||||
for (const e of (atom.edges || [])) {
|
||||
add(e?.s);
|
||||
add(e?.t);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,11 +123,12 @@ export function buildDisplayNameMap(store, context) {
|
||||
if (!f.retracted) register(f.s);
|
||||
}
|
||||
|
||||
// 5. L0 atoms 的 who
|
||||
// 5. L0 atoms 的 edges.s/edges.t
|
||||
const atoms = getStateAtoms();
|
||||
for (const atom of atoms) {
|
||||
for (const name of (atom.who || [])) {
|
||||
register(name);
|
||||
for (const e of (atom.edges || [])) {
|
||||
register(e?.s);
|
||||
register(e?.t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -125,7 +125,7 @@ export function createMetrics() {
|
||||
cosineGateNoVector: 0,
|
||||
finalCount: 0,
|
||||
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
||||
byChannel: { who: 0, what: 0, where: 0, how: 0 },
|
||||
byChannel: { what: 0, where: 0, how: 0 },
|
||||
time: 0,
|
||||
},
|
||||
|
||||
@@ -366,7 +366,7 @@ export function formatMetricsLog(metrics) {
|
||||
lines.push(`├─ graph: ${m.diffusion.graphNodes} nodes, ${m.diffusion.graphEdges} edges`);
|
||||
if (m.diffusion.graphEdges > 0) {
|
||||
const ch = m.diffusion.byChannel || {};
|
||||
lines.push(`│ └─ by_channel: who=${ch.who || 0}, what=${ch.what || 0}, where=${ch.where || 0}, how=${ch.how || 0}`);
|
||||
lines.push(`│ └─ by_channel: what=${ch.what || 0}, where=${ch.where || 0}, how=${ch.how || 0}`);
|
||||
}
|
||||
if (m.diffusion.iterations > 0) {
|
||||
lines.push(`├─ ppr: ${m.diffusion.iterations} iterations, ε=${Number(m.diffusion.convergenceError).toExponential(1)}`);
|
||||
@@ -630,7 +630,7 @@ export function detectIssues(metrics) {
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if (m.diffusion.graphEdges === 0 && m.diffusion.seedCount > 0) {
|
||||
issues.push('No diffusion graph edges - atoms may lack who/edges fields');
|
||||
issues.push('No diffusion graph edges - atoms may lack edges fields');
|
||||
}
|
||||
|
||||
if (m.diffusion.pprActivated > 0 && m.diffusion.cosineGatePassed === 0) {
|
||||
|
||||
@@ -20,7 +20,9 @@
|
||||
// 阶段 5: Lexical Retrieval + Dense-Gated Event Merge
|
||||
// 阶段 6: Floor W-RRF Fusion + Rerank + L1 配对
|
||||
// 阶段 7: L1 配对组装(L0 → top-1 AI L1 + top-1 USER L1)
|
||||
// 阶段 8: Causation Trace
|
||||
// 阶段 7.5: PPR Diffusion
|
||||
// 阶段 8: L0 → L2 反向查找(后置,基于最终 l0Selected)
|
||||
// 阶段 9: Causation Trace
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getAllEventVectors, getChunksByFloors, getMeta, getChunkVectorsByIds } from '../storage/chunk-store.js';
|
||||
@@ -1114,6 +1116,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
const eventIndex = buildEventIndex(allEvents);
|
||||
let lexicalEventCount = 0;
|
||||
let lexicalEventFilteredByDense = 0;
|
||||
let l0LinkedCount = 0;
|
||||
const focusSetForLexical = new Set((bundle.focusEntities || []).map(normalize));
|
||||
|
||||
for (const eid of lexicalResult.eventIds) {
|
||||
@@ -1149,46 +1152,6 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
lexicalEventCount++;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// 阶段 5.5: L0 → L2 反向查找
|
||||
// 已召回的 L0 楼层落在某 L2 事件范围内,但该 L2 自身未被召回
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
const recalledL0Floors = new Set(anchorHits.map(h => h.floor));
|
||||
let l0LinkedCount = 0;
|
||||
|
||||
for (const event of allEvents) {
|
||||
if (existingEventIds.has(event.id)) continue;
|
||||
|
||||
const range = parseFloorRange(event.summary);
|
||||
if (!range) continue;
|
||||
|
||||
let hasOverlap = false;
|
||||
for (const floor of recalledL0Floors) {
|
||||
if (floor >= range.start && floor <= range.end) {
|
||||
hasOverlap = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasOverlap) continue;
|
||||
|
||||
// 实体分类:与所有路径统一标准
|
||||
const participants = (event.participants || []).map(p => normalize(p));
|
||||
const hasEntityMatch = focusSetForLexical.size > 0
|
||||
&& participants.some(p => focusSetForLexical.has(p));
|
||||
|
||||
const evVec = eventVectorMap.get(event.id);
|
||||
const sim = evVec?.length ? cosineSimilarity(queryVector_v1, evVec) : 0;
|
||||
|
||||
eventHits.push({
|
||||
event,
|
||||
similarity: sim,
|
||||
_recallType: hasEntityMatch ? 'DIRECT' : 'RELATED',
|
||||
});
|
||||
existingEventIds.add(event.id);
|
||||
l0LinkedCount++;
|
||||
}
|
||||
|
||||
if (metrics) {
|
||||
metrics.lexical.eventFilteredByDense = lexicalEventFilteredByDense;
|
||||
|
||||
@@ -1196,14 +1159,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
metrics.event.byRecallType.lexical = lexicalEventCount;
|
||||
metrics.event.selected += lexicalEventCount;
|
||||
}
|
||||
if (l0LinkedCount > 0) {
|
||||
metrics.event.byRecallType.l0Linked = l0LinkedCount;
|
||||
metrics.event.selected += l0LinkedCount;
|
||||
}
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} l0Linked=+${l0LinkedCount} (${lexTime}ms)`
|
||||
`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (${lexTime}ms)`
|
||||
);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
@@ -1248,7 +1207,56 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
metrics.timing.diffusion = metrics.diffusion?.time || 0;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// 阶段 7: Causation Trace
|
||||
// Stage 8: L0 → L2 反向查找(后置,基于最终 l0Selected)
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
const recalledL0Floors = new Set(l0Selected.map(x => x.floor));
|
||||
|
||||
for (const event of allEvents) {
|
||||
if (existingEventIds.has(event.id)) continue;
|
||||
|
||||
const range = parseFloorRange(event.summary);
|
||||
if (!range) continue;
|
||||
|
||||
let hasOverlap = false;
|
||||
for (const floor of recalledL0Floors) {
|
||||
if (floor >= range.start && floor <= range.end) {
|
||||
hasOverlap = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasOverlap) continue;
|
||||
|
||||
// Dense similarity 门槛(与 Lexical Event 对齐)
|
||||
const evVec = eventVectorMap.get(event.id);
|
||||
const sim = evVec?.length ? cosineSimilarity(queryVector_v1, evVec) : 0;
|
||||
if (sim < CONFIG.LEXICAL_EVENT_DENSE_MIN) continue;
|
||||
|
||||
// 实体分类:与所有路径统一标准
|
||||
const participants = (event.participants || []).map(p => normalize(p));
|
||||
const hasEntityMatch = focusSetForLexical.size > 0
|
||||
&& participants.some(p => focusSetForLexical.has(p));
|
||||
|
||||
eventHits.push({
|
||||
event,
|
||||
similarity: sim,
|
||||
_recallType: hasEntityMatch ? 'DIRECT' : 'RELATED',
|
||||
});
|
||||
existingEventIds.add(event.id);
|
||||
l0LinkedCount++;
|
||||
}
|
||||
|
||||
if (metrics && l0LinkedCount > 0) {
|
||||
metrics.event.byRecallType.l0Linked = l0LinkedCount;
|
||||
metrics.event.selected += l0LinkedCount;
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`L0-linked events: ${recalledL0Floors.size} floors → ${l0LinkedCount} events linked (sim≥${CONFIG.LEXICAL_EVENT_DENSE_MIN})`
|
||||
);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// 阶段 9: Causation Trace
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
const { results: causalMap, maxDepth: causalMaxDepth } = traceCausation(eventHits, eventIndex);
|
||||
@@ -1288,7 +1296,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
|
||||
console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
|
||||
console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0} → ${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`);
|
||||
console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`);
|
||||
console.log(`Events: ${eventHits.length} hits, ${causalChain.length} causal`);
|
||||
console.log(`Events: ${eventHits.length} hits (l0Linked=+${l0LinkedCount}), ${causalChain.length} causal`);
|
||||
console.log(`Diffusion: ${metrics.diffusion?.seedCount || 0} seeds → ${metrics.diffusion?.pprActivated || 0} activated → ${metrics.diffusion?.finalCount || 0} final (${metrics.diffusion?.time || 0}ms)`);
|
||||
console.groupEnd();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user