Refine diffusion graph channels and drop legacy who compatibility
This commit is contained in:
@@ -32,6 +32,7 @@
|
||||
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { tokenizeForIndex } from '../utils/tokenizer.js';
|
||||
|
||||
const MODULE_ID = 'diffusion';
|
||||
|
||||
@@ -46,12 +47,11 @@ const CONFIG = {
|
||||
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
|
||||
|
||||
// Edge weight channel coefficients
|
||||
// Rationale: Rimmon-Kenan (2002) hierarchy: characters > events > setting > themes
|
||||
// No standalone WHO channel: rely on interaction/action/location only.
|
||||
GAMMA: {
|
||||
who: 0.50, // entity co-occurrence — Jaccard
|
||||
what: 0.25, // directed pair overlap — Szymkiewicz-Simpson
|
||||
where: 0.15, // location exact match — binary
|
||||
how: 0.10, // dynamics tag co-occurrence — Jaccard
|
||||
what: 0.55, // interaction pair overlap — Szymkiewicz-Simpson
|
||||
where: 0.15, // location exact match — binary
|
||||
how: 0.30, // action-term co-occurrence — Jaccard
|
||||
},
|
||||
|
||||
// Post-verification (Cosine Gate)
|
||||
@@ -94,17 +94,13 @@ function cosineSimilarity(a, b) {
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* WHO channel: entity set = who ∪ edges.s ∪ edges.t
|
||||
* Endpoint entity set from edges.s/edges.t (used for candidate pair generation).
|
||||
* @param {object} atom
|
||||
* @param {Set<string>} excludeEntities - entities to exclude (e.g. name1)
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function extractEntities(atom, excludeEntities = new Set()) {
|
||||
const set = new Set();
|
||||
for (const w of (atom.who || [])) {
|
||||
const n = normalize(w);
|
||||
if (n && !excludeEntities.has(n)) set.add(n);
|
||||
}
|
||||
for (const e of (atom.edges || [])) {
|
||||
const s = normalize(e?.s);
|
||||
const t = normalize(e?.t);
|
||||
@@ -115,18 +111,19 @@ function extractEntities(atom, excludeEntities = new Set()) {
|
||||
}
|
||||
|
||||
/**
|
||||
* WHAT channel: directed interaction pairs "A→B" (strict direction — option A)
|
||||
* WHAT channel: interaction pairs "A↔B" (direction-insensitive).
|
||||
* @param {object} atom
|
||||
* @param {Set<string>} excludeEntities
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function extractDirectedPairs(atom, excludeEntities = new Set()) {
|
||||
function extractInteractionPairs(atom, excludeEntities = new Set()) {
|
||||
const set = new Set();
|
||||
for (const e of (atom.edges || [])) {
|
||||
const s = normalize(e?.s);
|
||||
const t = normalize(e?.t);
|
||||
if (s && t && !excludeEntities.has(s) && !excludeEntities.has(t)) {
|
||||
set.add(`${s}\u2192${t}`);
|
||||
const pair = [s, t].sort().join('\u2194');
|
||||
set.add(pair);
|
||||
}
|
||||
}
|
||||
return set;
|
||||
@@ -142,15 +139,20 @@ function extractLocation(atom) {
|
||||
}
|
||||
|
||||
/**
|
||||
* HOW channel: dynamics tags set
|
||||
* HOW channel: action terms from edges.r
|
||||
* @param {object} atom
|
||||
* @param {Set<string>} excludeEntities
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function extractDynamics(atom) {
|
||||
function extractActionTerms(atom, excludeEntities = new Set()) {
|
||||
const set = new Set();
|
||||
for (const d of (atom.dynamics || [])) {
|
||||
const n = normalize(d);
|
||||
if (n) set.add(n);
|
||||
for (const e of (atom.edges || [])) {
|
||||
const rel = String(e?.r || '').trim();
|
||||
if (!rel) continue;
|
||||
for (const token of tokenizeForIndex(rel)) {
|
||||
const t = normalize(token);
|
||||
if (t && !excludeEntities.has(t)) set.add(t);
|
||||
}
|
||||
}
|
||||
return set;
|
||||
}
|
||||
@@ -198,8 +200,8 @@ function overlapCoefficient(a, b) {
|
||||
// Graph construction
|
||||
//
|
||||
// Candidate pairs discovered via inverted indices on entities and locations.
|
||||
// Dynamics-only pairs excluded from candidate generation (γ_HOW = 0.10 is
|
||||
// too weak to justify O(N²) blowup from 8-tag combinatorics).
|
||||
// HOW-only pairs are still excluded from candidate generation to avoid O(N²);
|
||||
// all channel weights are evaluated for the entity/location candidate set.
|
||||
// All four channels evaluated for every candidate pair.
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@@ -207,14 +209,14 @@ function overlapCoefficient(a, b) {
|
||||
* Pre-extract features for all atoms
|
||||
* @param {object[]} allAtoms
|
||||
* @param {Set<string>} excludeEntities
|
||||
* @returns {object[]} feature objects with entities/directedPairs/location/dynamics
|
||||
* @returns {object[]} feature objects with entities/interactionPairs/location/actionTerms
|
||||
*/
|
||||
function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
|
||||
return allAtoms.map(atom => ({
|
||||
entities: extractEntities(atom, excludeEntities),
|
||||
directedPairs: extractDirectedPairs(atom, excludeEntities),
|
||||
interactionPairs: extractInteractionPairs(atom, excludeEntities),
|
||||
location: extractLocation(atom),
|
||||
dynamics: extractDynamics(atom),
|
||||
actionTerms: extractActionTerms(atom, excludeEntities),
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -279,10 +281,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
collectPairsFromIndex(entityIndex, pairSet, N);
|
||||
collectPairsFromIndex(locationIndex, pairSet, N);
|
||||
|
||||
// Compute four-channel edge weights for all candidates
|
||||
// Compute three-channel edge weights for all candidates
|
||||
const neighbors = Array.from({ length: N }, () => []);
|
||||
let edgeCount = 0;
|
||||
const channelStats = { who: 0, what: 0, where: 0, how: 0 };
|
||||
const channelStats = { what: 0, where: 0, how: 0 };
|
||||
|
||||
for (const packed of pairSet) {
|
||||
const i = Math.floor(packed / N);
|
||||
@@ -291,13 +293,11 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
const fi = features[i];
|
||||
const fj = features[j];
|
||||
|
||||
const wWho = jaccard(fi.entities, fj.entities);
|
||||
const wWhat = overlapCoefficient(fi.directedPairs, fj.directedPairs);
|
||||
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
|
||||
const wWhere = (fi.location && fi.location === fj.location) ? 1.0 : 0.0;
|
||||
const wHow = jaccard(fi.dynamics, fj.dynamics);
|
||||
const wHow = jaccard(fi.actionTerms, fj.actionTerms);
|
||||
|
||||
const weight =
|
||||
CONFIG.GAMMA.who * wWho +
|
||||
CONFIG.GAMMA.what * wWhat +
|
||||
CONFIG.GAMMA.where * wWhere +
|
||||
CONFIG.GAMMA.how * wHow;
|
||||
@@ -307,7 +307,6 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
neighbors[j].push({ target: i, weight });
|
||||
edgeCount++;
|
||||
|
||||
if (wWho > 0) channelStats.who++;
|
||||
if (wWhat > 0) channelStats.what++;
|
||||
if (wWhere > 0) channelStats.where++;
|
||||
if (wHow > 0) channelStats.how++;
|
||||
@@ -318,8 +317,7 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
||||
|
||||
xbLog.info(MODULE_ID,
|
||||
`Graph: ${N} nodes, ${edgeCount} edges ` +
|
||||
`(who=${channelStats.who} what=${channelStats.what} ` +
|
||||
`where=${channelStats.where} how=${channelStats.how}) ` +
|
||||
`(what=${channelStats.what} where=${channelStats.where} how=${channelStats.how}) ` +
|
||||
`(${buildTime}ms)`
|
||||
);
|
||||
|
||||
@@ -593,7 +591,7 @@ function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector)
|
||||
* @param {object[]} seeds - l0Selected from recall Stage 6
|
||||
* Each: { atomId, rerankScore, similarity, atom, ... }
|
||||
* @param {object[]} allAtoms - getStateAtoms() result
|
||||
* Each: { atomId, floor, semantic, who, edges, dynamics, where }
|
||||
* Each: { atomId, floor, semantic, edges, where }
|
||||
* @param {object[]} stateVectors - getAllStateVectors() result
|
||||
* Each: { atomId, floor, vector: Float32Array }
|
||||
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
|
||||
@@ -760,7 +758,7 @@ function fillMetricsEmpty(metrics) {
|
||||
cosineGateNoVector: 0,
|
||||
finalCount: 0,
|
||||
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
||||
byChannel: { who: 0, what: 0, where: 0, how: 0 },
|
||||
byChannel: { what: 0, where: 0, how: 0 },
|
||||
time: 0,
|
||||
};
|
||||
}
|
||||
@@ -782,7 +780,7 @@ function fillMetrics(metrics, data) {
|
||||
cosineGateNoVector: data.cosineGateNoVector || 0,
|
||||
finalCount: data.finalCount || 0,
|
||||
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
|
||||
byChannel: data.channelStats || { who: 0, what: 0, where: 0, how: 0 },
|
||||
byChannel: data.channelStats || { what: 0, where: 0, how: 0 },
|
||||
time: data.time || 0,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user