Refine diffusion graph channels and drop legacy who compatibility

This commit is contained in:
2026-02-13 15:56:22 +08:00
parent 9ba120364c
commit 6aa1547d6f
6 changed files with 110 additions and 180 deletions

View File

@@ -32,6 +32,7 @@
import { xbLog } from '../../../../core/debug-core.js';
import { getContext } from '../../../../../../../extensions.js';
import { tokenizeForIndex } from '../utils/tokenizer.js';
const MODULE_ID = 'diffusion';
@@ -46,12 +47,11 @@ const CONFIG = {
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
// Edge weight channel coefficients
// Rationale: Rimmon-Kenan (2002) hierarchy: characters > events > setting > themes
// No standalone WHO channel: rely on interaction/action/location only.
GAMMA: {
who: 0.50, // entity co-occurrence — Jaccard
what: 0.25, // directed pair overlap — Szymkiewicz-Simpson
where: 0.15, // location exact match — binary
how: 0.10, // dynamics tag co-occurrence — Jaccard
what: 0.55, // interaction pair overlap — Szymkiewicz-Simpson
where: 0.15, // location exact match — binary
how: 0.30, // action-term co-occurrence — Jaccard
},
// Post-verification (Cosine Gate)
@@ -94,17 +94,13 @@ function cosineSimilarity(a, b) {
// ═══════════════════════════════════════════════════════════════════════════
/**
* WHO channel: entity set = who edges.s edges.t
* Endpoint entity set from edges.s/edges.t (used for candidate pair generation).
* @param {object} atom
* @param {Set<string>} excludeEntities - entities to exclude (e.g. name1)
* @returns {Set<string>}
*/
function extractEntities(atom, excludeEntities = new Set()) {
const set = new Set();
for (const w of (atom.who || [])) {
const n = normalize(w);
if (n && !excludeEntities.has(n)) set.add(n);
}
for (const e of (atom.edges || [])) {
const s = normalize(e?.s);
const t = normalize(e?.t);
@@ -115,18 +111,19 @@ function extractEntities(atom, excludeEntities = new Set()) {
}
/**
* WHAT channel: directed interaction pairs "AB" (strict direction — option A)
* WHAT channel: interaction pairs "AB" (direction-insensitive).
* @param {object} atom
* @param {Set<string>} excludeEntities
* @returns {Set<string>}
*/
function extractDirectedPairs(atom, excludeEntities = new Set()) {
function extractInteractionPairs(atom, excludeEntities = new Set()) {
const set = new Set();
for (const e of (atom.edges || [])) {
const s = normalize(e?.s);
const t = normalize(e?.t);
if (s && t && !excludeEntities.has(s) && !excludeEntities.has(t)) {
set.add(`${s}\u2192${t}`);
const pair = [s, t].sort().join('\u2194');
set.add(pair);
}
}
return set;
@@ -142,15 +139,20 @@ function extractLocation(atom) {
}
/**
* HOW channel: dynamics tags set
* HOW channel: action terms from edges.r
* @param {object} atom
* @param {Set<string>} excludeEntities
* @returns {Set<string>}
*/
function extractDynamics(atom) {
function extractActionTerms(atom, excludeEntities = new Set()) {
const set = new Set();
for (const d of (atom.dynamics || [])) {
const n = normalize(d);
if (n) set.add(n);
for (const e of (atom.edges || [])) {
const rel = String(e?.r || '').trim();
if (!rel) continue;
for (const token of tokenizeForIndex(rel)) {
const t = normalize(token);
if (t && !excludeEntities.has(t)) set.add(t);
}
}
return set;
}
@@ -198,8 +200,8 @@ function overlapCoefficient(a, b) {
// Graph construction
//
// Candidate pairs discovered via inverted indices on entities and locations.
// Dynamics-only pairs excluded from candidate generation (γ_HOW = 0.10 is
// too weak to justify O(N²) blowup from 8-tag combinatorics).
// HOW-only pairs are still excluded from candidate generation to avoid O(N²);
// all channel weights are evaluated for the entity/location candidate set.
// All four channels evaluated for every candidate pair.
// ═══════════════════════════════════════════════════════════════════════════
@@ -207,14 +209,14 @@ function overlapCoefficient(a, b) {
* Pre-extract features for all atoms
* @param {object[]} allAtoms
* @param {Set<string>} excludeEntities
* @returns {object[]} feature objects with entities/directedPairs/location/dynamics
* @returns {object[]} feature objects with entities/interactionPairs/location/actionTerms
*/
function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
return allAtoms.map(atom => ({
entities: extractEntities(atom, excludeEntities),
directedPairs: extractDirectedPairs(atom, excludeEntities),
interactionPairs: extractInteractionPairs(atom, excludeEntities),
location: extractLocation(atom),
dynamics: extractDynamics(atom),
actionTerms: extractActionTerms(atom, excludeEntities),
}));
}
@@ -279,10 +281,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
collectPairsFromIndex(entityIndex, pairSet, N);
collectPairsFromIndex(locationIndex, pairSet, N);
// Compute four-channel edge weights for all candidates
// Compute three-channel edge weights for all candidates
const neighbors = Array.from({ length: N }, () => []);
let edgeCount = 0;
const channelStats = { who: 0, what: 0, where: 0, how: 0 };
const channelStats = { what: 0, where: 0, how: 0 };
for (const packed of pairSet) {
const i = Math.floor(packed / N);
@@ -291,13 +293,11 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
const fi = features[i];
const fj = features[j];
const wWho = jaccard(fi.entities, fj.entities);
const wWhat = overlapCoefficient(fi.directedPairs, fj.directedPairs);
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
const wWhere = (fi.location && fi.location === fj.location) ? 1.0 : 0.0;
const wHow = jaccard(fi.dynamics, fj.dynamics);
const wHow = jaccard(fi.actionTerms, fj.actionTerms);
const weight =
CONFIG.GAMMA.who * wWho +
CONFIG.GAMMA.what * wWhat +
CONFIG.GAMMA.where * wWhere +
CONFIG.GAMMA.how * wHow;
@@ -307,7 +307,6 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
neighbors[j].push({ target: i, weight });
edgeCount++;
if (wWho > 0) channelStats.who++;
if (wWhat > 0) channelStats.what++;
if (wWhere > 0) channelStats.where++;
if (wHow > 0) channelStats.how++;
@@ -318,8 +317,7 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
xbLog.info(MODULE_ID,
`Graph: ${N} nodes, ${edgeCount} edges ` +
`(who=${channelStats.who} what=${channelStats.what} ` +
`where=${channelStats.where} how=${channelStats.how}) ` +
`(what=${channelStats.what} where=${channelStats.where} how=${channelStats.how}) ` +
`(${buildTime}ms)`
);
@@ -593,7 +591,7 @@ function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector)
* @param {object[]} seeds - l0Selected from recall Stage 6
* Each: { atomId, rerankScore, similarity, atom, ... }
* @param {object[]} allAtoms - getStateAtoms() result
* Each: { atomId, floor, semantic, who, edges, dynamics, where }
* Each: { atomId, floor, semantic, edges, where }
* @param {object[]} stateVectors - getAllStateVectors() result
* Each: { atomId, floor, vector: Float32Array }
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
@@ -760,7 +758,7 @@ function fillMetricsEmpty(metrics) {
cosineGateNoVector: 0,
finalCount: 0,
scoreDistribution: { min: 0, max: 0, mean: 0 },
byChannel: { who: 0, what: 0, where: 0, how: 0 },
byChannel: { what: 0, where: 0, how: 0 },
time: 0,
};
}
@@ -782,7 +780,7 @@ function fillMetrics(metrics, data) {
cosineGateNoVector: data.cosineGateNoVector || 0,
finalCount: data.finalCount || 0,
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
byChannel: data.channelStats || { who: 0, what: 0, where: 0, how: 0 },
byChannel: data.channelStats || { what: 0, where: 0, how: 0 },
time: data.time || 0,
};
}