Refine diffusion graph channels and drop legacy who compatibility

This commit is contained in:
2026-02-13 15:56:22 +08:00
parent 9ba120364c
commit 6aa1547d6f
6 changed files with 110 additions and 180 deletions

View File

@@ -32,6 +32,7 @@
import { xbLog } from '../../../../core/debug-core.js';
import { getContext } from '../../../../../../../extensions.js';
import { tokenizeForIndex } from '../utils/tokenizer.js';
const MODULE_ID = 'diffusion';
@@ -46,12 +47,11 @@ const CONFIG = {
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
// Edge weight channel coefficients
// Rationale: Rimmon-Kenan (2002) hierarchy: characters > events > setting > themes
// No standalone WHO channel: rely on interaction/action/location only.
GAMMA: {
who: 0.50, // entity co-occurrence — Jaccard
what: 0.25, // directed pair overlap — Szymkiewicz-Simpson
where: 0.15, // location exact match — binary
how: 0.10, // dynamics tag co-occurrence — Jaccard
what: 0.55, // interaction pair overlap — Szymkiewicz-Simpson
where: 0.15, // location exact match — binary
how: 0.30, // action-term co-occurrence — Jaccard
},
// Post-verification (Cosine Gate)
@@ -94,17 +94,13 @@ function cosineSimilarity(a, b) {
// ═══════════════════════════════════════════════════════════════════════════
/**
* WHO channel: entity set = who edges.s edges.t
* Endpoint entity set from edges.s/edges.t (used for candidate pair generation).
* @param {object} atom
* @param {Set<string>} excludeEntities - entities to exclude (e.g. name1)
* @returns {Set<string>}
*/
function extractEntities(atom, excludeEntities = new Set()) {
const set = new Set();
for (const w of (atom.who || [])) {
const n = normalize(w);
if (n && !excludeEntities.has(n)) set.add(n);
}
for (const e of (atom.edges || [])) {
const s = normalize(e?.s);
const t = normalize(e?.t);
@@ -115,18 +111,19 @@ function extractEntities(atom, excludeEntities = new Set()) {
}
/**
* WHAT channel: directed interaction pairs "AB" (strict direction — option A)
* WHAT channel: interaction pairs "AB" (direction-insensitive).
* @param {object} atom
* @param {Set<string>} excludeEntities
* @returns {Set<string>}
*/
function extractDirectedPairs(atom, excludeEntities = new Set()) {
function extractInteractionPairs(atom, excludeEntities = new Set()) {
const set = new Set();
for (const e of (atom.edges || [])) {
const s = normalize(e?.s);
const t = normalize(e?.t);
if (s && t && !excludeEntities.has(s) && !excludeEntities.has(t)) {
set.add(`${s}\u2192${t}`);
const pair = [s, t].sort().join('\u2194');
set.add(pair);
}
}
return set;
@@ -142,15 +139,20 @@ function extractLocation(atom) {
}
/**
* HOW channel: dynamics tags set
* HOW channel: action terms from edges.r
* @param {object} atom
* @param {Set<string>} excludeEntities
* @returns {Set<string>}
*/
function extractDynamics(atom) {
function extractActionTerms(atom, excludeEntities = new Set()) {
const set = new Set();
for (const d of (atom.dynamics || [])) {
const n = normalize(d);
if (n) set.add(n);
for (const e of (atom.edges || [])) {
const rel = String(e?.r || '').trim();
if (!rel) continue;
for (const token of tokenizeForIndex(rel)) {
const t = normalize(token);
if (t && !excludeEntities.has(t)) set.add(t);
}
}
return set;
}
@@ -198,8 +200,8 @@ function overlapCoefficient(a, b) {
// Graph construction
//
// Candidate pairs discovered via inverted indices on entities and locations.
// Dynamics-only pairs excluded from candidate generation (γ_HOW = 0.10 is
// too weak to justify O(N²) blowup from 8-tag combinatorics).
// HOW-only pairs are still excluded from candidate generation to avoid O(N²);
// all channel weights are evaluated for the entity/location candidate set.
// All four channels evaluated for every candidate pair.
// ═══════════════════════════════════════════════════════════════════════════
@@ -207,14 +209,14 @@ function overlapCoefficient(a, b) {
* Pre-extract features for all atoms
* @param {object[]} allAtoms
* @param {Set<string>} excludeEntities
* @returns {object[]} feature objects with entities/directedPairs/location/dynamics
* @returns {object[]} feature objects with entities/interactionPairs/location/actionTerms
*/
function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
return allAtoms.map(atom => ({
entities: extractEntities(atom, excludeEntities),
directedPairs: extractDirectedPairs(atom, excludeEntities),
interactionPairs: extractInteractionPairs(atom, excludeEntities),
location: extractLocation(atom),
dynamics: extractDynamics(atom),
actionTerms: extractActionTerms(atom, excludeEntities),
}));
}
@@ -279,10 +281,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
collectPairsFromIndex(entityIndex, pairSet, N);
collectPairsFromIndex(locationIndex, pairSet, N);
// Compute four-channel edge weights for all candidates
// Compute three-channel edge weights for all candidates
const neighbors = Array.from({ length: N }, () => []);
let edgeCount = 0;
const channelStats = { who: 0, what: 0, where: 0, how: 0 };
const channelStats = { what: 0, where: 0, how: 0 };
for (const packed of pairSet) {
const i = Math.floor(packed / N);
@@ -291,13 +293,11 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
const fi = features[i];
const fj = features[j];
const wWho = jaccard(fi.entities, fj.entities);
const wWhat = overlapCoefficient(fi.directedPairs, fj.directedPairs);
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
const wWhere = (fi.location && fi.location === fj.location) ? 1.0 : 0.0;
const wHow = jaccard(fi.dynamics, fj.dynamics);
const wHow = jaccard(fi.actionTerms, fj.actionTerms);
const weight =
CONFIG.GAMMA.who * wWho +
CONFIG.GAMMA.what * wWhat +
CONFIG.GAMMA.where * wWhere +
CONFIG.GAMMA.how * wHow;
@@ -307,7 +307,6 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
neighbors[j].push({ target: i, weight });
edgeCount++;
if (wWho > 0) channelStats.who++;
if (wWhat > 0) channelStats.what++;
if (wWhere > 0) channelStats.where++;
if (wHow > 0) channelStats.how++;
@@ -318,8 +317,7 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
xbLog.info(MODULE_ID,
`Graph: ${N} nodes, ${edgeCount} edges ` +
`(who=${channelStats.who} what=${channelStats.what} ` +
`where=${channelStats.where} how=${channelStats.how}) ` +
`(what=${channelStats.what} where=${channelStats.where} how=${channelStats.how}) ` +
`(${buildTime}ms)`
);
@@ -593,7 +591,7 @@ function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector)
* @param {object[]} seeds - l0Selected from recall Stage 6
* Each: { atomId, rerankScore, similarity, atom, ... }
* @param {object[]} allAtoms - getStateAtoms() result
* Each: { atomId, floor, semantic, who, edges, dynamics, where }
* Each: { atomId, floor, semantic, edges, where }
* @param {object[]} stateVectors - getAllStateVectors() result
* Each: { atomId, floor, vector: Float32Array }
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
@@ -760,7 +758,7 @@ function fillMetricsEmpty(metrics) {
cosineGateNoVector: 0,
finalCount: 0,
scoreDistribution: { min: 0, max: 0, mean: 0 },
byChannel: { who: 0, what: 0, where: 0, how: 0 },
byChannel: { what: 0, where: 0, how: 0 },
time: 0,
};
}
@@ -782,7 +780,7 @@ function fillMetrics(metrics, data) {
cosineGateNoVector: data.cosineGateNoVector || 0,
finalCount: data.finalCount || 0,
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
byChannel: data.channelStats || { who: 0, what: 0, where: 0, how: 0 },
byChannel: data.channelStats || { what: 0, where: 0, how: 0 },
time: data.time || 0,
};
}

View File

@@ -71,11 +71,12 @@ export function buildEntityLexicon(store, context) {
add(f.s);
}
// 5. L0 atoms 的 who(新角色在 L2 总结前即可进入词典)
// 5. L0 atoms 的 edges.s/edges.t(新角色在 L2 总结前即可进入词典)
const atoms = getStateAtoms();
for (const atom of atoms) {
for (const name of (atom.who || [])) {
add(name);
for (const e of (atom.edges || [])) {
add(e?.s);
add(e?.t);
}
}
@@ -122,11 +123,12 @@ export function buildDisplayNameMap(store, context) {
if (!f.retracted) register(f.s);
}
// 5. L0 atoms 的 who
// 5. L0 atoms 的 edges.s/edges.t
const atoms = getStateAtoms();
for (const atom of atoms) {
for (const name of (atom.who || [])) {
register(name);
for (const e of (atom.edges || [])) {
register(e?.s);
register(e?.t);
}
}

View File

@@ -125,7 +125,7 @@ export function createMetrics() {
cosineGateNoVector: 0,
finalCount: 0,
scoreDistribution: { min: 0, max: 0, mean: 0 },
byChannel: { who: 0, what: 0, where: 0, how: 0 },
byChannel: { what: 0, where: 0, how: 0 },
time: 0,
},
@@ -366,7 +366,7 @@ export function formatMetricsLog(metrics) {
lines.push(`├─ graph: ${m.diffusion.graphNodes} nodes, ${m.diffusion.graphEdges} edges`);
if (m.diffusion.graphEdges > 0) {
const ch = m.diffusion.byChannel || {};
lines.push(`│ └─ by_channel: who=${ch.who || 0}, what=${ch.what || 0}, where=${ch.where || 0}, how=${ch.how || 0}`);
lines.push(`│ └─ by_channel: what=${ch.what || 0}, where=${ch.where || 0}, how=${ch.how || 0}`);
}
if (m.diffusion.iterations > 0) {
lines.push(`├─ ppr: ${m.diffusion.iterations} iterations, ε=${Number(m.diffusion.convergenceError).toExponential(1)}`);
@@ -630,7 +630,7 @@ export function detectIssues(metrics) {
// ─────────────────────────────────────────────────────────────────
if (m.diffusion.graphEdges === 0 && m.diffusion.seedCount > 0) {
issues.push('No diffusion graph edges - atoms may lack who/edges fields');
issues.push('No diffusion graph edges - atoms may lack edges fields');
}
if (m.diffusion.pprActivated > 0 && m.diffusion.cosineGatePassed === 0) {

View File

@@ -20,7 +20,9 @@
// 阶段 5: Lexical Retrieval + Dense-Gated Event Merge
// 阶段 6: Floor W-RRF Fusion + Rerank + L1 配对
// 阶段 7: L1 配对组装L0 → top-1 AI L1 + top-1 USER L1
// 阶段 8: Causation Trace
// 阶段 7.5: PPR Diffusion
// 阶段 8: L0 → L2 反向查找(后置,基于最终 l0Selected
// 阶段 9: Causation Trace
// ═══════════════════════════════════════════════════════════════════════════
import { getAllEventVectors, getChunksByFloors, getMeta, getChunkVectorsByIds } from '../storage/chunk-store.js';
@@ -1114,6 +1116,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
const eventIndex = buildEventIndex(allEvents);
let lexicalEventCount = 0;
let lexicalEventFilteredByDense = 0;
let l0LinkedCount = 0;
const focusSetForLexical = new Set((bundle.focusEntities || []).map(normalize));
for (const eid of lexicalResult.eventIds) {
@@ -1149,46 +1152,6 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
lexicalEventCount++;
}
// ═══════════════════════════════════════════════════════════════════
// 阶段 5.5: L0 → L2 反向查找
// 已召回的 L0 楼层落在某 L2 事件范围内,但该 L2 自身未被召回
// ═══════════════════════════════════════════════════════════════════
const recalledL0Floors = new Set(anchorHits.map(h => h.floor));
let l0LinkedCount = 0;
for (const event of allEvents) {
if (existingEventIds.has(event.id)) continue;
const range = parseFloorRange(event.summary);
if (!range) continue;
let hasOverlap = false;
for (const floor of recalledL0Floors) {
if (floor >= range.start && floor <= range.end) {
hasOverlap = true;
break;
}
}
if (!hasOverlap) continue;
// 实体分类:与所有路径统一标准
const participants = (event.participants || []).map(p => normalize(p));
const hasEntityMatch = focusSetForLexical.size > 0
&& participants.some(p => focusSetForLexical.has(p));
const evVec = eventVectorMap.get(event.id);
const sim = evVec?.length ? cosineSimilarity(queryVector_v1, evVec) : 0;
eventHits.push({
event,
similarity: sim,
_recallType: hasEntityMatch ? 'DIRECT' : 'RELATED',
});
existingEventIds.add(event.id);
l0LinkedCount++;
}
if (metrics) {
metrics.lexical.eventFilteredByDense = lexicalEventFilteredByDense;
@@ -1196,14 +1159,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.event.byRecallType.lexical = lexicalEventCount;
metrics.event.selected += lexicalEventCount;
}
if (l0LinkedCount > 0) {
metrics.event.byRecallType.l0Linked = l0LinkedCount;
metrics.event.selected += l0LinkedCount;
}
}
xbLog.info(MODULE_ID,
`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} l0Linked=+${l0LinkedCount} (${lexTime}ms)`
`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (${lexTime}ms)`
);
// ═══════════════════════════════════════════════════════════════════
@@ -1248,7 +1207,56 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.timing.diffusion = metrics.diffusion?.time || 0;
// ═══════════════════════════════════════════════════════════════════
// 阶段 7: Causation Trace
// Stage 8: L0 → L2 反向查找(后置,基于最终 l0Selected
// ═══════════════════════════════════════════════════════════════════
const recalledL0Floors = new Set(l0Selected.map(x => x.floor));
for (const event of allEvents) {
if (existingEventIds.has(event.id)) continue;
const range = parseFloorRange(event.summary);
if (!range) continue;
let hasOverlap = false;
for (const floor of recalledL0Floors) {
if (floor >= range.start && floor <= range.end) {
hasOverlap = true;
break;
}
}
if (!hasOverlap) continue;
// Dense similarity 门槛(与 Lexical Event 对齐)
const evVec = eventVectorMap.get(event.id);
const sim = evVec?.length ? cosineSimilarity(queryVector_v1, evVec) : 0;
if (sim < CONFIG.LEXICAL_EVENT_DENSE_MIN) continue;
// 实体分类:与所有路径统一标准
const participants = (event.participants || []).map(p => normalize(p));
const hasEntityMatch = focusSetForLexical.size > 0
&& participants.some(p => focusSetForLexical.has(p));
eventHits.push({
event,
similarity: sim,
_recallType: hasEntityMatch ? 'DIRECT' : 'RELATED',
});
existingEventIds.add(event.id);
l0LinkedCount++;
}
if (metrics && l0LinkedCount > 0) {
metrics.event.byRecallType.l0Linked = l0LinkedCount;
metrics.event.selected += l0LinkedCount;
}
xbLog.info(MODULE_ID,
`L0-linked events: ${recalledL0Floors.size} floors → ${l0LinkedCount} events linked (sim≥${CONFIG.LEXICAL_EVENT_DENSE_MIN})`
);
// ═══════════════════════════════════════════════════════════════════
// 阶段 9: Causation Trace
// ═══════════════════════════════════════════════════════════════════
const { results: causalMap, maxDepth: causalMaxDepth } = traceCausation(eventHits, eventIndex);
@@ -1288,7 +1296,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0}${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`);
console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`);
console.log(`Events: ${eventHits.length} hits, ${causalChain.length} causal`);
console.log(`Events: ${eventHits.length} hits (l0Linked=+${l0LinkedCount}), ${causalChain.length} causal`);
console.log(`Diffusion: ${metrics.diffusion?.seedCount || 0} seeds → ${metrics.diffusion?.pprActivated || 0} activated → ${metrics.diffusion?.finalCount || 0} final (${metrics.diffusion?.time || 0}ms)`);
console.groupEnd();