Refine diffusion graph channels and drop legacy who compatibility

2026-02-13 15:56:22 +08:00
parent 9ba120364c
commit 6aa1547d6f
6 changed files with 110 additions and 180 deletions
--- a/modules/story-summary/vector/retrieval/diffusion.js
+++ b/modules/story-summary/vector/retrieval/diffusion.js
@@ -32,6 +32,7 @@

 import { xbLog } from '../../../../core/debug-core.js';
 import { getContext } from '../../../../../../../extensions.js';
+import { tokenizeForIndex } from '../utils/tokenizer.js';

 const MODULE_ID = 'diffusion';

@@ -46,12 +47,11 @@ const CONFIG = {
    MAX_ITER: 50,           // hard iteration cap (typically converges in 15-25)

    // Edge weight channel coefficients
-    // Rationale: Rimmon-Kenan (2002) hierarchy: characters > events > setting > themes
+    // No standalone WHO channel: rely on interaction/action/location only.
    GAMMA: {
-        who: 0.50,          // entity co-occurrence      — Jaccard
-        what: 0.25,         // directed pair overlap      — Szymkiewicz-Simpson
-        where: 0.15,        // location exact match       — binary
-        how: 0.10,          // dynamics tag co-occurrence  — Jaccard
+        what: 0.55,         // interaction pair overlap  — Szymkiewicz-Simpson
+        where: 0.15,        // location exact match      — binary
+        how: 0.30,          // action-term co-occurrence — Jaccard
    },

    // Post-verification (Cosine Gate)
@@ -94,17 +94,13 @@ function cosineSimilarity(a, b) {
 // ═══════════════════════════════════════════════════════════════════════════

 /**
- * WHO channel: entity set = who ∪ edges.s ∪ edges.t
+ * Endpoint entity set from edges.s/edges.t (used for candidate pair generation).
 * @param {object} atom
 * @param {Set<string>} excludeEntities - entities to exclude (e.g. name1)
 * @returns {Set<string>}
 */
 function extractEntities(atom, excludeEntities = new Set()) {
    const set = new Set();
-    for (const w of (atom.who || [])) {
-        const n = normalize(w);
-        if (n && !excludeEntities.has(n)) set.add(n);
-    }
    for (const e of (atom.edges || [])) {
        const s = normalize(e?.s);
        const t = normalize(e?.t);
@@ -115,18 +111,19 @@ function extractEntities(atom, excludeEntities = new Set()) {
 }

 /**
- * WHAT channel: directed interaction pairs "A→B" (strict direction — option A)
+ * WHAT channel: interaction pairs "A↔B" (direction-insensitive).
 * @param {object} atom
 * @param {Set<string>} excludeEntities
 * @returns {Set<string>}
 */
-function extractDirectedPairs(atom, excludeEntities = new Set()) {
+function extractInteractionPairs(atom, excludeEntities = new Set()) {
    const set = new Set();
    for (const e of (atom.edges || [])) {
        const s = normalize(e?.s);
        const t = normalize(e?.t);
        if (s && t && !excludeEntities.has(s) && !excludeEntities.has(t)) {
-            set.add(`${s}\u2192${t}`);
+            const pair = [s, t].sort().join('\u2194');
+            set.add(pair);
        }
    }
    return set;
@@ -142,15 +139,20 @@ function extractLocation(atom) {
 }

 /**
- * HOW channel: dynamics tags set
+ * HOW channel: action terms from edges.r
 * @param {object} atom
+ * @param {Set<string>} excludeEntities
 * @returns {Set<string>}
 */
-function extractDynamics(atom) {
+function extractActionTerms(atom, excludeEntities = new Set()) {
    const set = new Set();
-    for (const d of (atom.dynamics || [])) {
-        const n = normalize(d);
-        if (n) set.add(n);
+    for (const e of (atom.edges || [])) {
+        const rel = String(e?.r || '').trim();
+        if (!rel) continue;
+        for (const token of tokenizeForIndex(rel)) {
+            const t = normalize(token);
+            if (t && !excludeEntities.has(t)) set.add(t);
+        }
    }
    return set;
 }
@@ -198,8 +200,8 @@ function overlapCoefficient(a, b) {
 // Graph construction
 //
 // Candidate pairs discovered via inverted indices on entities and locations.
-// Dynamics-only pairs excluded from candidate generation (γ_HOW = 0.10 is
-// too weak to justify O(N²) blowup from 8-tag combinatorics).
+// HOW-only pairs are still excluded from candidate generation to avoid O(N²);
+// all channel weights are evaluated for the entity/location candidate set.
 // All four channels evaluated for every candidate pair.
 // ═══════════════════════════════════════════════════════════════════════════

@@ -207,14 +209,14 @@ function overlapCoefficient(a, b) {
 * Pre-extract features for all atoms
 * @param {object[]} allAtoms
 * @param {Set<string>} excludeEntities
- * @returns {object[]} feature objects with entities/directedPairs/location/dynamics
+ * @returns {object[]} feature objects with entities/interactionPairs/location/actionTerms
 */
 function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
    return allAtoms.map(atom => ({
        entities: extractEntities(atom, excludeEntities),
-        directedPairs: extractDirectedPairs(atom, excludeEntities),
+        interactionPairs: extractInteractionPairs(atom, excludeEntities),
        location: extractLocation(atom),
-        dynamics: extractDynamics(atom),
+        actionTerms: extractActionTerms(atom, excludeEntities),
    }));
 }

@@ -279,10 +281,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
    collectPairsFromIndex(entityIndex, pairSet, N);
    collectPairsFromIndex(locationIndex, pairSet, N);

-    // Compute four-channel edge weights for all candidates
+    // Compute three-channel edge weights for all candidates
    const neighbors = Array.from({ length: N }, () => []);
    let edgeCount = 0;
-    const channelStats = { who: 0, what: 0, where: 0, how: 0 };
+    const channelStats = { what: 0, where: 0, how: 0 };

    for (const packed of pairSet) {
        const i = Math.floor(packed / N);
@@ -291,13 +293,11 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
        const fi = features[i];
        const fj = features[j];

-        const wWho = jaccard(fi.entities, fj.entities);
-        const wWhat = overlapCoefficient(fi.directedPairs, fj.directedPairs);
+        const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
        const wWhere = (fi.location && fi.location === fj.location) ? 1.0 : 0.0;
-        const wHow = jaccard(fi.dynamics, fj.dynamics);
+        const wHow = jaccard(fi.actionTerms, fj.actionTerms);

        const weight =
-            CONFIG.GAMMA.who * wWho +
            CONFIG.GAMMA.what * wWhat +
            CONFIG.GAMMA.where * wWhere +
            CONFIG.GAMMA.how * wHow;
@@ -307,7 +307,6 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
            neighbors[j].push({ target: i, weight });
            edgeCount++;

-            if (wWho > 0) channelStats.who++;
            if (wWhat > 0) channelStats.what++;
            if (wWhere > 0) channelStats.where++;
            if (wHow > 0) channelStats.how++;
@@ -318,8 +317,7 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {

    xbLog.info(MODULE_ID,
        `Graph: ${N} nodes, ${edgeCount} edges ` +
-        `(who=${channelStats.who} what=${channelStats.what} ` +
-        `where=${channelStats.where} how=${channelStats.how}) ` +
+        `(what=${channelStats.what} where=${channelStats.where} how=${channelStats.how}) ` +
        `(${buildTime}ms)`
    );

@@ -593,7 +591,7 @@ function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector)
 * @param {object[]} seeds - l0Selected from recall Stage 6
 *   Each: { atomId, rerankScore, similarity, atom, ... }
 * @param {object[]} allAtoms - getStateAtoms() result
- *   Each: { atomId, floor, semantic, who, edges, dynamics, where }
+ *   Each: { atomId, floor, semantic, edges, where }
 * @param {object[]} stateVectors - getAllStateVectors() result
 *   Each: { atomId, floor, vector: Float32Array }
 * @param {Float32Array|number[]} queryVector - R2 weighted query vector
@@ -760,7 +758,7 @@ function fillMetricsEmpty(metrics) {
        cosineGateNoVector: 0,
        finalCount: 0,
        scoreDistribution: { min: 0, max: 0, mean: 0 },
-        byChannel: { who: 0, what: 0, where: 0, how: 0 },
+        byChannel: { what: 0, where: 0, how: 0 },
        time: 0,
    };
 }
@@ -782,7 +780,7 @@ function fillMetrics(metrics, data) {
        cosineGateNoVector: data.cosineGateNoVector || 0,
        finalCount: data.finalCount || 0,
        scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
-        byChannel: data.channelStats || { who: 0, what: 0, where: 0, how: 0 },
+        byChannel: data.channelStats || { what: 0, where: 0, how: 0 },
        time: data.time || 0,
    };
 }
--- a/modules/story-summary/vector/retrieval/entity-lexicon.js
+++ b/modules/story-summary/vector/retrieval/entity-lexicon.js
@@ -71,11 +71,12 @@ export function buildEntityLexicon(store, context) {
        add(f.s);
    }

-    // 5. L0 atoms 的 who（新角色在 L2 总结前即可进入词典）
+    // 5. L0 atoms 的 edges.s/edges.t（新角色在 L2 总结前即可进入词典）
    const atoms = getStateAtoms();
    for (const atom of atoms) {
-        for (const name of (atom.who || [])) {
-            add(name);
+        for (const e of (atom.edges || [])) {
+            add(e?.s);
+            add(e?.t);
        }
    }

@@ -122,11 +123,12 @@ export function buildDisplayNameMap(store, context) {
        if (!f.retracted) register(f.s);
    }

-    // 5. L0 atoms 的 who
+    // 5. L0 atoms 的 edges.s/edges.t
    const atoms = getStateAtoms();
    for (const atom of atoms) {
-        for (const name of (atom.who || [])) {
-            register(name);
+        for (const e of (atom.edges || [])) {
+            register(e?.s);
+            register(e?.t);
        }
    }

--- a/modules/story-summary/vector/retrieval/metrics.js
+++ b/modules/story-summary/vector/retrieval/metrics.js
@@ -125,7 +125,7 @@ export function createMetrics() {
            cosineGateNoVector: 0,
            finalCount: 0,
            scoreDistribution: { min: 0, max: 0, mean: 0 },
-            byChannel: { who: 0, what: 0, where: 0, how: 0 },
+            byChannel: { what: 0, where: 0, how: 0 },
            time: 0,
        },

@@ -366,7 +366,7 @@ export function formatMetricsLog(metrics) {
    lines.push(`├─ graph: ${m.diffusion.graphNodes} nodes, ${m.diffusion.graphEdges} edges`);
    if (m.diffusion.graphEdges > 0) {
        const ch = m.diffusion.byChannel || {};
-        lines.push(`│   └─ by_channel: who=${ch.who || 0}, what=${ch.what || 0}, where=${ch.where || 0}, how=${ch.how || 0}`);
+        lines.push(`│   └─ by_channel: what=${ch.what || 0}, where=${ch.where || 0}, how=${ch.how || 0}`);
    }
    if (m.diffusion.iterations > 0) {
        lines.push(`├─ ppr: ${m.diffusion.iterations} iterations, ε=${Number(m.diffusion.convergenceError).toExponential(1)}`);
@@ -630,7 +630,7 @@ export function detectIssues(metrics) {
    // ─────────────────────────────────────────────────────────────────

    if (m.diffusion.graphEdges === 0 && m.diffusion.seedCount > 0) {
-        issues.push('No diffusion graph edges - atoms may lack who/edges fields');
+        issues.push('No diffusion graph edges - atoms may lack edges fields');
    }

    if (m.diffusion.pprActivated > 0 && m.diffusion.cosineGatePassed === 0) {
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js
@@ -20,7 +20,9 @@
 // 阶段 5: Lexical Retrieval + Dense-Gated Event Merge
 // 阶段 6: Floor W-RRF Fusion + Rerank + L1 配对
 // 阶段 7: L1 配对组装（L0 → top-1 AI L1 + top-1 USER L1）
-// 阶段 8: Causation Trace
+// 阶段 7.5: PPR Diffusion
+// 阶段 8: L0 → L2 反向查找（后置，基于最终 l0Selected）
+// 阶段 9: Causation Trace
 // ═══════════════════════════════════════════════════════════════════════════

 import { getAllEventVectors, getChunksByFloors, getMeta, getChunkVectorsByIds } from '../storage/chunk-store.js';
@@ -1114,6 +1116,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    const eventIndex = buildEventIndex(allEvents);
    let lexicalEventCount = 0;
    let lexicalEventFilteredByDense = 0;
+    let l0LinkedCount = 0;
    const focusSetForLexical = new Set((bundle.focusEntities || []).map(normalize));

    for (const eid of lexicalResult.eventIds) {
@@ -1149,46 +1152,6 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
        lexicalEventCount++;
    }

-    // ═══════════════════════════════════════════════════════════════════
-    // 阶段 5.5: L0 → L2 反向查找
-    // 已召回的 L0 楼层落在某 L2 事件范围内，但该 L2 自身未被召回
-    // ═══════════════════════════════════════════════════════════════════
-
-    const recalledL0Floors = new Set(anchorHits.map(h => h.floor));
-    let l0LinkedCount = 0;
-
-    for (const event of allEvents) {
-        if (existingEventIds.has(event.id)) continue;
-
-        const range = parseFloorRange(event.summary);
-        if (!range) continue;
-
-        let hasOverlap = false;
-        for (const floor of recalledL0Floors) {
-            if (floor >= range.start && floor <= range.end) {
-                hasOverlap = true;
-                break;
-            }
-        }
-        if (!hasOverlap) continue;
-
-        // 实体分类：与所有路径统一标准
-        const participants = (event.participants || []).map(p => normalize(p));
-        const hasEntityMatch = focusSetForLexical.size > 0
-            && participants.some(p => focusSetForLexical.has(p));
-
-        const evVec = eventVectorMap.get(event.id);
-        const sim = evVec?.length ? cosineSimilarity(queryVector_v1, evVec) : 0;
-
-        eventHits.push({
-            event,
-            similarity: sim,
-            _recallType: hasEntityMatch ? 'DIRECT' : 'RELATED',
-        });
-        existingEventIds.add(event.id);
-        l0LinkedCount++;
-    }
-
    if (metrics) {
        metrics.lexical.eventFilteredByDense = lexicalEventFilteredByDense;

@@ -1196,14 +1159,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
            metrics.event.byRecallType.lexical = lexicalEventCount;
            metrics.event.selected += lexicalEventCount;
        }
-        if (l0LinkedCount > 0) {
-            metrics.event.byRecallType.l0Linked = l0LinkedCount;
-            metrics.event.selected += l0LinkedCount;
-        }
    }

    xbLog.info(MODULE_ID,
-        `Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} l0Linked=+${l0LinkedCount} (${lexTime}ms)`
+        `Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (${lexTime}ms)`
    );

    // ═══════════════════════════════════════════════════════════════════
@@ -1248,7 +1207,56 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    metrics.timing.diffusion = metrics.diffusion?.time || 0;

    // ═══════════════════════════════════════════════════════════════════
-    // 阶段 7: Causation Trace
+    // Stage 8: L0 → L2 反向查找（后置，基于最终 l0Selected）
+    // ═══════════════════════════════════════════════════════════════════
+
+    const recalledL0Floors = new Set(l0Selected.map(x => x.floor));
+
+    for (const event of allEvents) {
+        if (existingEventIds.has(event.id)) continue;
+
+        const range = parseFloorRange(event.summary);
+        if (!range) continue;
+
+        let hasOverlap = false;
+        for (const floor of recalledL0Floors) {
+            if (floor >= range.start && floor <= range.end) {
+                hasOverlap = true;
+                break;
+            }
+        }
+        if (!hasOverlap) continue;
+
+        // Dense similarity 门槛（与 Lexical Event 对齐）
+        const evVec = eventVectorMap.get(event.id);
+        const sim = evVec?.length ? cosineSimilarity(queryVector_v1, evVec) : 0;
+        if (sim < CONFIG.LEXICAL_EVENT_DENSE_MIN) continue;
+
+        // 实体分类：与所有路径统一标准
+        const participants = (event.participants || []).map(p => normalize(p));
+        const hasEntityMatch = focusSetForLexical.size > 0
+            && participants.some(p => focusSetForLexical.has(p));
+
+        eventHits.push({
+            event,
+            similarity: sim,
+            _recallType: hasEntityMatch ? 'DIRECT' : 'RELATED',
+        });
+        existingEventIds.add(event.id);
+        l0LinkedCount++;
+    }
+
+    if (metrics && l0LinkedCount > 0) {
+        metrics.event.byRecallType.l0Linked = l0LinkedCount;
+        metrics.event.selected += l0LinkedCount;
+    }
+
+    xbLog.info(MODULE_ID,
+        `L0-linked events: ${recalledL0Floors.size} floors → ${l0LinkedCount} events linked (sim≥${CONFIG.LEXICAL_EVENT_DENSE_MIN})`
+    );
+
+    // ═══════════════════════════════════════════════════════════════════
+    // 阶段 9: Causation Trace
    // ═══════════════════════════════════════════════════════════════════

    const { results: causalMap, maxDepth: causalMaxDepth } = traceCausation(eventHits, eventIndex);
@@ -1288,7 +1296,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
    console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0} → ${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`);
    console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`);
-    console.log(`Events: ${eventHits.length} hits, ${causalChain.length} causal`);
+    console.log(`Events: ${eventHits.length} hits (l0Linked=+${l0LinkedCount}), ${causalChain.length} causal`);
    console.log(`Diffusion: ${metrics.diffusion?.seedCount || 0} seeds → ${metrics.diffusion?.pprActivated || 0} activated → ${metrics.diffusion?.finalCount || 0} final (${metrics.diffusion?.time || 0}ms)`);
    console.groupEnd();