refactor diffusion to r-sem edges with time window and add rVector I/O
This commit is contained in:
@@ -184,6 +184,19 @@ const VECTOR_WARNING_COOLDOWN_MS = 120000; // 2分钟内不重复提醒
|
|||||||
|
|
||||||
const EXT_PROMPT_KEY = "LittleWhiteBox_StorySummary";
|
const EXT_PROMPT_KEY = "LittleWhiteBox_StorySummary";
|
||||||
const MIN_INJECTION_DEPTH = 2;
|
const MIN_INJECTION_DEPTH = 2;
|
||||||
|
const R_AGG_MAX_CHARS = 256;
|
||||||
|
|
||||||
|
function buildRAggregateText(atom) {
|
||||||
|
const uniq = new Set();
|
||||||
|
for (const edge of (atom?.edges || [])) {
|
||||||
|
const r = String(edge?.r || "").trim();
|
||||||
|
if (!r) continue;
|
||||||
|
uniq.add(r);
|
||||||
|
}
|
||||||
|
const joined = [...uniq].join(" ; ");
|
||||||
|
if (!joined) return String(atom?.semantic || "").trim();
|
||||||
|
return joined.length > R_AGG_MAX_CHARS ? joined.slice(0, R_AGG_MAX_CHARS) : joined;
|
||||||
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 分词器预热(依赖 tokenizer.js 内部状态机,支持失败重试)
|
// 分词器预热(依赖 tokenizer.js 内部状态机,支持失败重试)
|
||||||
@@ -447,13 +460,21 @@ async function handleGenerateVectors(vectorCfg) {
|
|||||||
if (vectorCancelled) break;
|
if (vectorCancelled) break;
|
||||||
|
|
||||||
const batch = atoms.slice(i, i + batchSize);
|
const batch = atoms.slice(i, i + batchSize);
|
||||||
const texts = batch.map(a => a.semantic);
|
const semTexts = batch.map(a => a.semantic);
|
||||||
|
const rTexts = batch.map(a => buildRAggregateText(a));
|
||||||
try {
|
try {
|
||||||
const vectors = await embed(texts, vectorCfg, { signal: vectorAbortController.signal });
|
const vectors = await embed(semTexts.concat(rTexts), vectorCfg, { signal: vectorAbortController.signal });
|
||||||
|
const split = semTexts.length;
|
||||||
|
if (!Array.isArray(vectors) || vectors.length < split * 2) {
|
||||||
|
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
|
||||||
|
}
|
||||||
|
const semVectors = vectors.slice(0, split);
|
||||||
|
const rVectors = vectors.slice(split, split + split);
|
||||||
const items = batch.map((a, j) => ({
|
const items = batch.map((a, j) => ({
|
||||||
atomId: a.atomId,
|
atomId: a.atomId,
|
||||||
floor: a.floor,
|
floor: a.floor,
|
||||||
vector: vectors[j],
|
vector: semVectors[j],
|
||||||
|
rVector: rVectors[j] || semVectors[j],
|
||||||
}));
|
}));
|
||||||
await saveStateVectors(chatId, items, fingerprint);
|
await saveStateVectors(chatId, items, fingerprint);
|
||||||
l0Completed += batch.length;
|
l0Completed += batch.length;
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ const MODULE_ID = 'state-integration';
|
|||||||
const CONCURRENCY = 50;
|
const CONCURRENCY = 50;
|
||||||
const STAGGER_DELAY = 15;
|
const STAGGER_DELAY = 15;
|
||||||
const DEBUG_CONCURRENCY = true;
|
const DEBUG_CONCURRENCY = true;
|
||||||
|
const R_AGG_MAX_CHARS = 256;
|
||||||
|
|
||||||
let initialized = false;
|
let initialized = false;
|
||||||
let extractionCancelled = false;
|
let extractionCancelled = false;
|
||||||
@@ -112,6 +113,18 @@ function buildL0InputText(userMessage, aiMessage) {
|
|||||||
return parts.join('\n\n---\n\n').trim();
|
return parts.join('\n\n---\n\n').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildRAggregateText(atom) {
|
||||||
|
const uniq = new Set();
|
||||||
|
for (const edge of (atom?.edges || [])) {
|
||||||
|
const r = String(edge?.r || '').trim();
|
||||||
|
if (!r) continue;
|
||||||
|
uniq.add(r);
|
||||||
|
}
|
||||||
|
const joined = [...uniq].join(' ; ');
|
||||||
|
if (!joined) return String(atom?.semantic || '').trim();
|
||||||
|
return joined.length > R_AGG_MAX_CHARS ? joined.slice(0, R_AGG_MAX_CHARS) : joined;
|
||||||
|
}
|
||||||
|
|
||||||
export async function incrementalExtractAtoms(chatId, chat, onProgress, options = {}) {
|
export async function incrementalExtractAtoms(chatId, chat, onProgress, options = {}) {
|
||||||
const { maxFloors = Infinity } = options;
|
const { maxFloors = Infinity } = options;
|
||||||
if (!chatId || !chat?.length) return { built: 0 };
|
if (!chatId || !chat?.length) return { built: 0 };
|
||||||
@@ -271,21 +284,36 @@ async function vectorizeAtoms(chatId, atoms, onProgress) {
|
|||||||
const vectorCfg = getVectorConfig();
|
const vectorCfg = getVectorConfig();
|
||||||
if (!vectorCfg?.enabled) return;
|
if (!vectorCfg?.enabled) return;
|
||||||
|
|
||||||
const texts = atoms.map(a => a.semantic);
|
const semanticTexts = atoms.map(a => a.semantic);
|
||||||
|
const rTexts = atoms.map(a => buildRAggregateText(a));
|
||||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||||
const batchSize = 20;
|
const batchSize = 20;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const allVectors = [];
|
const allVectors = [];
|
||||||
|
|
||||||
for (let i = 0; i < texts.length; i += batchSize) {
|
for (let i = 0; i < semanticTexts.length; i += batchSize) {
|
||||||
if (extractionCancelled) break;
|
if (extractionCancelled) break;
|
||||||
|
|
||||||
const batch = texts.slice(i, i + batchSize);
|
const semBatch = semanticTexts.slice(i, i + batchSize);
|
||||||
const vectors = await embed(batch, { timeout: 30000 });
|
const rBatch = rTexts.slice(i, i + batchSize);
|
||||||
allVectors.push(...vectors);
|
const payload = semBatch.concat(rBatch);
|
||||||
|
const vectors = await embed(payload, { timeout: 30000 });
|
||||||
|
const split = semBatch.length;
|
||||||
|
if (!Array.isArray(vectors) || vectors.length < split * 2) {
|
||||||
|
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
|
||||||
|
}
|
||||||
|
const semVectors = vectors.slice(0, split);
|
||||||
|
const rVectors = vectors.slice(split, split + split);
|
||||||
|
|
||||||
onProgress?.(allVectors.length, texts.length);
|
for (let j = 0; j < split; j++) {
|
||||||
|
allVectors.push({
|
||||||
|
vector: semVectors[j],
|
||||||
|
rVector: rVectors[j] || semVectors[j],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
onProgress?.(allVectors.length, semanticTexts.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (extractionCancelled) return;
|
if (extractionCancelled) return;
|
||||||
@@ -293,7 +321,8 @@ async function vectorizeAtoms(chatId, atoms, onProgress) {
|
|||||||
const items = atoms.slice(0, allVectors.length).map((a, i) => ({
|
const items = atoms.slice(0, allVectors.length).map((a, i) => ({
|
||||||
atomId: a.atomId,
|
atomId: a.atomId,
|
||||||
floor: a.floor,
|
floor: a.floor,
|
||||||
vector: allVectors[i],
|
vector: allVectors[i].vector,
|
||||||
|
rVector: allVectors[i].rVector,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
await saveStateVectors(chatId, items, fingerprint);
|
await saveStateVectors(chatId, items, fingerprint);
|
||||||
@@ -380,16 +409,24 @@ async function vectorizeAtomsSimple(chatId, atoms) {
|
|||||||
const vectorCfg = getVectorConfig();
|
const vectorCfg = getVectorConfig();
|
||||||
if (!vectorCfg?.enabled) return;
|
if (!vectorCfg?.enabled) return;
|
||||||
|
|
||||||
const texts = atoms.map(a => a.semantic);
|
const semanticTexts = atoms.map(a => a.semantic);
|
||||||
|
const rTexts = atoms.map(a => buildRAggregateText(a));
|
||||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const vectors = await embed(texts, { timeout: 30000 });
|
const vectors = await embed(semanticTexts.concat(rTexts), { timeout: 30000 });
|
||||||
|
const split = semanticTexts.length;
|
||||||
|
if (!Array.isArray(vectors) || vectors.length < split * 2) {
|
||||||
|
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
|
||||||
|
}
|
||||||
|
const semVectors = vectors.slice(0, split);
|
||||||
|
const rVectors = vectors.slice(split, split + split);
|
||||||
|
|
||||||
const items = atoms.map((a, i) => ({
|
const items = atoms.map((a, i) => ({
|
||||||
atomId: a.atomId,
|
atomId: a.atomId,
|
||||||
floor: a.floor,
|
floor: a.floor,
|
||||||
vector: vectors[i],
|
vector: semVectors[i],
|
||||||
|
rVector: rVectors[i] || semVectors[i],
|
||||||
}));
|
}));
|
||||||
|
|
||||||
await saveStateVectors(chatId, items, fingerprint);
|
await saveStateVectors(chatId, items, fingerprint);
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
//
|
//
|
||||||
// Algorithm:
|
// Algorithm:
|
||||||
// 1. Build undirected weighted graph over all L0 atoms
|
// 1. Build undirected weighted graph over all L0 atoms
|
||||||
// Four channels: WHO/WHAT/WHERE/HOW (Jaccard/Overlap/ExactMatch)
|
// Candidate edges: WHAT + R semantic; WHO/WHERE are reweight-only
|
||||||
// 2. Personalized PageRank (Power Iteration)
|
// 2. Personalized PageRank (Power Iteration)
|
||||||
// Seeds weighted by rerankScore — Haveliwala (2002) topic-sensitive variant
|
// Seeds weighted by rerankScore — Haveliwala (2002) topic-sensitive variant
|
||||||
// α = 0.15 restart probability — Page et al. (1998)
|
// α = 0.15 restart probability — Page et al. (1998)
|
||||||
@@ -32,7 +32,6 @@
|
|||||||
|
|
||||||
import { xbLog } from '../../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
import { getContext } from '../../../../../../../extensions.js';
|
import { getContext } from '../../../../../../../extensions.js';
|
||||||
import { tokenizeForIndex } from '../utils/tokenizer.js';
|
|
||||||
|
|
||||||
const MODULE_ID = 'diffusion';
|
const MODULE_ID = 'diffusion';
|
||||||
|
|
||||||
@@ -47,22 +46,27 @@ const CONFIG = {
|
|||||||
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
|
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
|
||||||
|
|
||||||
// Edge weight channel coefficients
|
// Edge weight channel coefficients
|
||||||
// Candidate generation uses WHAT/HOW only.
|
// Candidate generation uses WHAT + R semantic only.
|
||||||
// WHO/WHERE are reweight-only signals.
|
// WHO/WHERE are reweight-only signals.
|
||||||
GAMMA: {
|
GAMMA: {
|
||||||
what: 0.45, // interaction pair overlap — Szymkiewicz-Simpson
|
what: 0.40, // interaction pair overlap
|
||||||
how: 0.30, // action-term co-occurrence — Jaccard
|
rSem: 0.40, // semantic similarity over edges.r aggregate
|
||||||
who: 0.15, // endpoint entity overlap — Jaccard (reweight-only)
|
who: 0.10, // endpoint entity overlap (reweight-only)
|
||||||
where: 0.10, // location exact match — damped (reweight-only)
|
where: 0.05, // location exact match (reweight-only)
|
||||||
|
time: 0.05, // temporal decay score
|
||||||
},
|
},
|
||||||
|
// R semantic candidate generation
|
||||||
|
R_SEM_MIN_SIM: 0.62,
|
||||||
|
R_SEM_TOPK: 8,
|
||||||
|
TIME_WINDOW_MAX: 80,
|
||||||
|
TIME_DECAY_DIVISOR: 12,
|
||||||
WHERE_MAX_GROUP_SIZE: 16, // skip location-only pair expansion for over-common places
|
WHERE_MAX_GROUP_SIZE: 16, // skip location-only pair expansion for over-common places
|
||||||
WHERE_FREQ_DAMP_PIVOT: 6, // location freq <= pivot keeps full WHERE score
|
WHERE_FREQ_DAMP_PIVOT: 6, // location freq <= pivot keeps full WHERE score
|
||||||
WHERE_FREQ_DAMP_MIN: 0.20, // lower bound for damped WHERE contribution
|
WHERE_FREQ_DAMP_MIN: 0.20, // lower bound for damped WHERE contribution
|
||||||
HOW_MAX_GROUP_SIZE: 24, // skip ultra-common action terms to avoid dense pair explosion
|
|
||||||
|
|
||||||
// Post-verification (Cosine Gate)
|
// Post-verification (Cosine Gate)
|
||||||
COSINE_GATE: 0.46, // min cosine(queryVector, stateVector)
|
COSINE_GATE: 0.46, // min cosine(queryVector, stateVector)
|
||||||
SCORE_FLOOR: 0.12, // min finalScore = PPR_normalized × cosine
|
SCORE_FLOOR: 0.10, // min finalScore = PPR_normalized × cosine
|
||||||
DIFFUSION_CAP: 100, // max diffused nodes (excluding seeds)
|
DIFFUSION_CAP: 100, // max diffused nodes (excluding seeds)
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -144,23 +148,14 @@ function extractLocation(atom) {
|
|||||||
return normalize(atom.where);
|
return normalize(atom.where);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
function getFloorDistance(a, b) {
|
||||||
* HOW channel: action terms from edges.r
|
const fa = Number(a?.floor || 0);
|
||||||
* @param {object} atom
|
const fb = Number(b?.floor || 0);
|
||||||
* @param {Set<string>} excludeEntities
|
return Math.abs(fa - fb);
|
||||||
* @returns {Set<string>}
|
}
|
||||||
*/
|
|
||||||
function extractActionTerms(atom, excludeEntities = new Set()) {
|
function getTimeScore(distance) {
|
||||||
const set = new Set();
|
return Math.exp(-distance / CONFIG.TIME_DECAY_DIVISOR);
|
||||||
for (const e of (atom.edges || [])) {
|
|
||||||
const rel = String(e?.r || '').trim();
|
|
||||||
if (!rel) continue;
|
|
||||||
for (const token of tokenizeForIndex(rel)) {
|
|
||||||
const t = normalize(token);
|
|
||||||
if (t && !excludeEntities.has(t)) set.add(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return set;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
@@ -205,35 +200,31 @@ function overlapCoefficient(a, b) {
|
|||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// Graph construction
|
// Graph construction
|
||||||
//
|
//
|
||||||
// Candidate pairs discovered via inverted indices on entities and locations.
|
// Candidate pairs discovered via WHAT inverted index and R semantic top-k.
|
||||||
// HOW-only pairs are still excluded from candidate generation to avoid O(N²);
|
// WHO/WHERE are reweight-only signals and never create candidate pairs.
|
||||||
// all channel weights are evaluated for the entity/location candidate set.
|
|
||||||
// All four channels evaluated for every candidate pair.
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pre-extract features for all atoms
|
* Pre-extract features for all atoms
|
||||||
* @param {object[]} allAtoms
|
* @param {object[]} allAtoms
|
||||||
* @param {Set<string>} excludeEntities
|
* @param {Set<string>} excludeEntities
|
||||||
* @returns {object[]} feature objects with entities/interactionPairs/location/actionTerms
|
* @returns {object[]} feature objects with entities/interactionPairs/location
|
||||||
*/
|
*/
|
||||||
function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
|
function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
|
||||||
return allAtoms.map(atom => ({
|
return allAtoms.map(atom => ({
|
||||||
entities: extractEntities(atom, excludeEntities),
|
entities: extractEntities(atom, excludeEntities),
|
||||||
interactionPairs: extractInteractionPairs(atom, excludeEntities),
|
interactionPairs: extractInteractionPairs(atom, excludeEntities),
|
||||||
location: extractLocation(atom),
|
location: extractLocation(atom),
|
||||||
actionTerms: extractActionTerms(atom, excludeEntities),
|
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build inverted index: value → list of atom indices
|
* Build inverted index: value → list of atom indices
|
||||||
* @param {object[]} features
|
* @param {object[]} features
|
||||||
* @returns {{ whatIndex: Map, howIndex: Map, locationFreq: Map }}
|
* @returns {{ whatIndex: Map, locationFreq: Map }}
|
||||||
*/
|
*/
|
||||||
function buildInvertedIndices(features) {
|
function buildInvertedIndices(features) {
|
||||||
const whatIndex = new Map();
|
const whatIndex = new Map();
|
||||||
const howIndex = new Map();
|
|
||||||
const locationFreq = new Map();
|
const locationFreq = new Map();
|
||||||
|
|
||||||
for (let i = 0; i < features.length; i++) {
|
for (let i = 0; i < features.length; i++) {
|
||||||
@@ -241,15 +232,11 @@ function buildInvertedIndices(features) {
|
|||||||
if (!whatIndex.has(pair)) whatIndex.set(pair, []);
|
if (!whatIndex.has(pair)) whatIndex.set(pair, []);
|
||||||
whatIndex.get(pair).push(i);
|
whatIndex.get(pair).push(i);
|
||||||
}
|
}
|
||||||
for (const action of features[i].actionTerms) {
|
|
||||||
if (!howIndex.has(action)) howIndex.set(action, []);
|
|
||||||
howIndex.get(action).push(i);
|
|
||||||
}
|
|
||||||
const loc = features[i].location;
|
const loc = features[i].location;
|
||||||
if (loc) locationFreq.set(loc, (locationFreq.get(loc) || 0) + 1);
|
if (loc) locationFreq.set(loc, (locationFreq.get(loc) || 0) + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return { whatIndex, howIndex, locationFreq };
|
return { whatIndex, locationFreq };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -274,38 +261,88 @@ function collectPairsFromIndex(index, pairSet, N) {
|
|||||||
* Build weighted undirected graph over L0 atoms.
|
* Build weighted undirected graph over L0 atoms.
|
||||||
*
|
*
|
||||||
* @param {object[]} allAtoms
|
* @param {object[]} allAtoms
|
||||||
|
* @param {object[]} stateVectors
|
||||||
* @param {Set<string>} excludeEntities
|
* @param {Set<string>} excludeEntities
|
||||||
* @returns {{ neighbors: object[][], edgeCount: number, channelStats: object, buildTime: number }}
|
* @returns {{ neighbors: object[][], edgeCount: number, channelStats: object, buildTime: number }}
|
||||||
*/
|
*/
|
||||||
function buildGraph(allAtoms, excludeEntities = new Set()) {
|
function buildGraph(allAtoms, stateVectors = [], excludeEntities = new Set()) {
|
||||||
const N = allAtoms.length;
|
const N = allAtoms.length;
|
||||||
const T0 = performance.now();
|
const T0 = performance.now();
|
||||||
|
|
||||||
const features = extractAllFeatures(allAtoms, excludeEntities);
|
const features = extractAllFeatures(allAtoms, excludeEntities);
|
||||||
const { whatIndex, howIndex, locationFreq } = buildInvertedIndices(features);
|
const { whatIndex, locationFreq } = buildInvertedIndices(features);
|
||||||
|
|
||||||
// Candidate pairs: only WHAT/HOW can create edges
|
// Candidate pairs: WHAT + R semantic
|
||||||
const pairSetByWhat = new Set();
|
const pairSetByWhat = new Set();
|
||||||
const pairSetByHow = new Set();
|
const pairSetByRSem = new Set();
|
||||||
|
const rSemByPair = new Map();
|
||||||
const pairSet = new Set();
|
const pairSet = new Set();
|
||||||
collectPairsFromIndex(whatIndex, pairSetByWhat, N);
|
collectPairsFromIndex(whatIndex, pairSetByWhat, N);
|
||||||
let skippedHowGroups = 0;
|
|
||||||
for (const [term, indices] of howIndex.entries()) {
|
const rVectorByAtomId = new Map(
|
||||||
if (!term) continue;
|
(stateVectors || [])
|
||||||
if (indices.length > CONFIG.HOW_MAX_GROUP_SIZE) {
|
.filter(v => v?.atomId && v?.rVector?.length)
|
||||||
skippedHowGroups++;
|
.map(v => [v.atomId, v.rVector])
|
||||||
continue;
|
);
|
||||||
|
const rVectors = allAtoms.map(a => rVectorByAtomId.get(a.atomId) || null);
|
||||||
|
|
||||||
|
const directedNeighbors = Array.from({ length: N }, () => []);
|
||||||
|
let rSemSimSum = 0;
|
||||||
|
let rSemSimCount = 0;
|
||||||
|
let topKPrunedPairs = 0;
|
||||||
|
let timeWindowFilteredPairs = 0;
|
||||||
|
|
||||||
|
// Enumerate only pairs within floor window to avoid O(N^2) full scan.
|
||||||
|
const sortedByFloor = allAtoms
|
||||||
|
.map((atom, idx) => ({ idx, floor: Number(atom?.floor || 0) }))
|
||||||
|
.sort((a, b) => a.floor - b.floor);
|
||||||
|
|
||||||
|
for (let left = 0; left < sortedByFloor.length; left++) {
|
||||||
|
const i = sortedByFloor[left].idx;
|
||||||
|
const baseFloor = sortedByFloor[left].floor;
|
||||||
|
|
||||||
|
for (let right = left + 1; right < sortedByFloor.length; right++) {
|
||||||
|
const floorDelta = sortedByFloor[right].floor - baseFloor;
|
||||||
|
if (floorDelta > CONFIG.TIME_WINDOW_MAX) break;
|
||||||
|
|
||||||
|
const j = sortedByFloor[right].idx;
|
||||||
|
const vi = rVectors[i];
|
||||||
|
const vj = rVectors[j];
|
||||||
|
if (!vi?.length || !vj?.length) continue;
|
||||||
|
|
||||||
|
const sim = cosineSimilarity(vi, vj);
|
||||||
|
if (sim < CONFIG.R_SEM_MIN_SIM) continue;
|
||||||
|
|
||||||
|
directedNeighbors[i].push({ target: j, sim });
|
||||||
|
directedNeighbors[j].push({ target: i, sim });
|
||||||
|
rSemSimSum += sim;
|
||||||
|
rSemSimCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < N; i++) {
|
||||||
|
const arr = directedNeighbors[i];
|
||||||
|
if (!arr.length) continue;
|
||||||
|
arr.sort((a, b) => b.sim - a.sim);
|
||||||
|
if (arr.length > CONFIG.R_SEM_TOPK) {
|
||||||
|
topKPrunedPairs += arr.length - CONFIG.R_SEM_TOPK;
|
||||||
|
}
|
||||||
|
for (const n of arr.slice(0, CONFIG.R_SEM_TOPK)) {
|
||||||
|
const lo = Math.min(i, n.target);
|
||||||
|
const hi = Math.max(i, n.target);
|
||||||
|
const packed = lo * N + hi;
|
||||||
|
pairSetByRSem.add(packed);
|
||||||
|
const prev = rSemByPair.get(packed) || 0;
|
||||||
|
if (n.sim > prev) rSemByPair.set(packed, n.sim);
|
||||||
}
|
}
|
||||||
const oneHowMap = new Map([[term, indices]]);
|
|
||||||
collectPairsFromIndex(oneHowMap, pairSetByHow, N);
|
|
||||||
}
|
}
|
||||||
for (const p of pairSetByWhat) pairSet.add(p);
|
for (const p of pairSetByWhat) pairSet.add(p);
|
||||||
for (const p of pairSetByHow) pairSet.add(p);
|
for (const p of pairSetByRSem) pairSet.add(p);
|
||||||
|
|
||||||
// Compute edge weights for all candidates
|
// Compute edge weights for all candidates
|
||||||
const neighbors = Array.from({ length: N }, () => []);
|
const neighbors = Array.from({ length: N }, () => []);
|
||||||
let edgeCount = 0;
|
let edgeCount = 0;
|
||||||
const channelStats = { what: 0, where: 0, how: 0, who: 0 };
|
const channelStats = { what: 0, where: 0, rSem: 0, who: 0 };
|
||||||
let reweightWhoUsed = 0;
|
let reweightWhoUsed = 0;
|
||||||
let reweightWhereUsed = 0;
|
let reweightWhereUsed = 0;
|
||||||
|
|
||||||
@@ -313,11 +350,18 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
|||||||
const i = Math.floor(packed / N);
|
const i = Math.floor(packed / N);
|
||||||
const j = packed % N;
|
const j = packed % N;
|
||||||
|
|
||||||
|
const distance = getFloorDistance(allAtoms[i], allAtoms[j]);
|
||||||
|
if (distance > CONFIG.TIME_WINDOW_MAX) {
|
||||||
|
timeWindowFilteredPairs++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const wTime = getTimeScore(distance);
|
||||||
|
|
||||||
const fi = features[i];
|
const fi = features[i];
|
||||||
const fj = features[j];
|
const fj = features[j];
|
||||||
|
|
||||||
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
|
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
|
||||||
const wHow = jaccard(fi.actionTerms, fj.actionTerms);
|
const wRSem = rSemByPair.get(packed) || 0;
|
||||||
const wWho = jaccard(fi.entities, fj.entities);
|
const wWho = jaccard(fi.entities, fj.entities);
|
||||||
let wWhere = 0.0;
|
let wWhere = 0.0;
|
||||||
if (fi.location && fi.location === fj.location) {
|
if (fi.location && fi.location === fj.location) {
|
||||||
@@ -331,9 +375,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
|||||||
|
|
||||||
const weight =
|
const weight =
|
||||||
CONFIG.GAMMA.what * wWhat +
|
CONFIG.GAMMA.what * wWhat +
|
||||||
CONFIG.GAMMA.how * wHow +
|
CONFIG.GAMMA.rSem * wRSem +
|
||||||
CONFIG.GAMMA.who * wWho +
|
CONFIG.GAMMA.who * wWho +
|
||||||
CONFIG.GAMMA.where * wWhere;
|
CONFIG.GAMMA.where * wWhere +
|
||||||
|
CONFIG.GAMMA.time * wTime;
|
||||||
|
|
||||||
if (weight > 0) {
|
if (weight > 0) {
|
||||||
neighbors[i].push({ target: j, weight });
|
neighbors[i].push({ target: j, weight });
|
||||||
@@ -341,7 +386,7 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
|||||||
edgeCount++;
|
edgeCount++;
|
||||||
|
|
||||||
if (wWhat > 0) channelStats.what++;
|
if (wWhat > 0) channelStats.what++;
|
||||||
if (wHow > 0) channelStats.how++;
|
if (wRSem > 0) channelStats.rSem++;
|
||||||
if (wWho > 0) channelStats.who++;
|
if (wWho > 0) channelStats.who++;
|
||||||
if (wWhere > 0) channelStats.where++;
|
if (wWhere > 0) channelStats.where++;
|
||||||
if (wWho > 0) reweightWhoUsed++;
|
if (wWho > 0) reweightWhoUsed++;
|
||||||
@@ -353,10 +398,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
|||||||
|
|
||||||
xbLog.info(MODULE_ID,
|
xbLog.info(MODULE_ID,
|
||||||
`Graph: ${N} nodes, ${edgeCount} edges ` +
|
`Graph: ${N} nodes, ${edgeCount} edges ` +
|
||||||
`(candidate_by_what=${pairSetByWhat.size} candidate_by_how=${pairSetByHow.size}) ` +
|
`(candidate_by_what=${pairSetByWhat.size} candidate_by_r_sem=${pairSetByRSem.size}) ` +
|
||||||
`(what=${channelStats.what} how=${channelStats.how} who=${channelStats.who} where=${channelStats.where}) ` +
|
`(what=${channelStats.what} r_sem=${channelStats.rSem} who=${channelStats.who} where=${channelStats.where}) ` +
|
||||||
`(reweight_who_used=${reweightWhoUsed} reweight_where_used=${reweightWhereUsed}) ` +
|
`(reweight_who_used=${reweightWhoUsed} reweight_where_used=${reweightWhereUsed}) ` +
|
||||||
`(howSkippedGroups=${skippedHowGroups}) ` +
|
`(time_window_filtered=${timeWindowFilteredPairs} topk_pruned=${topKPrunedPairs}) ` +
|
||||||
`(${buildTime}ms)`
|
`(${buildTime}ms)`
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -370,7 +415,10 @@ function buildGraph(allAtoms, excludeEntities = new Set()) {
|
|||||||
buildTime,
|
buildTime,
|
||||||
candidatePairs: pairSet.size,
|
candidatePairs: pairSet.size,
|
||||||
pairsFromWhat: pairSetByWhat.size,
|
pairsFromWhat: pairSetByWhat.size,
|
||||||
pairsFromHow: pairSetByHow.size,
|
pairsFromRSem: pairSetByRSem.size,
|
||||||
|
rSemAvgSim: rSemSimCount ? Number((rSemSimSum / rSemSimCount).toFixed(3)) : 0,
|
||||||
|
timeWindowFilteredPairs,
|
||||||
|
topKPrunedPairs,
|
||||||
reweightWhoUsed,
|
reweightWhoUsed,
|
||||||
reweightWhereUsed,
|
reweightWhereUsed,
|
||||||
edgeDensity,
|
edgeDensity,
|
||||||
@@ -646,7 +694,7 @@ function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector)
|
|||||||
* @param {object[]} allAtoms - getStateAtoms() result
|
* @param {object[]} allAtoms - getStateAtoms() result
|
||||||
* Each: { atomId, floor, semantic, edges, where }
|
* Each: { atomId, floor, semantic, edges, where }
|
||||||
* @param {object[]} stateVectors - getAllStateVectors() result
|
* @param {object[]} stateVectors - getAllStateVectors() result
|
||||||
* Each: { atomId, floor, vector: Float32Array }
|
* Each: { atomId, floor, vector: Float32Array, rVector?: Float32Array }
|
||||||
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
|
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
|
||||||
* @param {object|null} metrics - metrics object (optional, mutated in-place)
|
* @param {object|null} metrics - metrics object (optional, mutated in-place)
|
||||||
* @returns {object[]} Additional L0 atoms for l0Selected
|
* @returns {object[]} Additional L0 atoms for l0Selected
|
||||||
@@ -693,7 +741,7 @@ export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, met
|
|||||||
|
|
||||||
// ─── 2. Build graph ──────────────────────────────────────────────
|
// ─── 2. Build graph ──────────────────────────────────────────────
|
||||||
|
|
||||||
const graph = buildGraph(allAtoms, excludeEntities);
|
const graph = buildGraph(allAtoms, stateVectors, excludeEntities);
|
||||||
|
|
||||||
if (graph.edgeCount === 0) {
|
if (graph.edgeCount === 0) {
|
||||||
fillMetrics(metrics, {
|
fillMetrics(metrics, {
|
||||||
@@ -703,7 +751,10 @@ export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, met
|
|||||||
channelStats: graph.channelStats,
|
channelStats: graph.channelStats,
|
||||||
candidatePairs: graph.candidatePairs,
|
candidatePairs: graph.candidatePairs,
|
||||||
pairsFromWhat: graph.pairsFromWhat,
|
pairsFromWhat: graph.pairsFromWhat,
|
||||||
pairsFromHow: graph.pairsFromHow,
|
pairsFromRSem: graph.pairsFromRSem,
|
||||||
|
rSemAvgSim: graph.rSemAvgSim,
|
||||||
|
timeWindowFilteredPairs: graph.timeWindowFilteredPairs,
|
||||||
|
topKPrunedPairs: graph.topKPrunedPairs,
|
||||||
edgeDensity: graph.edgeDensity,
|
edgeDensity: graph.edgeDensity,
|
||||||
reweightWhoUsed: graph.reweightWhoUsed,
|
reweightWhoUsed: graph.reweightWhoUsed,
|
||||||
reweightWhereUsed: graph.reweightWhereUsed,
|
reweightWhereUsed: graph.reweightWhereUsed,
|
||||||
@@ -755,7 +806,10 @@ export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, met
|
|||||||
channelStats: graph.channelStats,
|
channelStats: graph.channelStats,
|
||||||
candidatePairs: graph.candidatePairs,
|
candidatePairs: graph.candidatePairs,
|
||||||
pairsFromWhat: graph.pairsFromWhat,
|
pairsFromWhat: graph.pairsFromWhat,
|
||||||
pairsFromHow: graph.pairsFromHow,
|
pairsFromRSem: graph.pairsFromRSem,
|
||||||
|
rSemAvgSim: graph.rSemAvgSim,
|
||||||
|
timeWindowFilteredPairs: graph.timeWindowFilteredPairs,
|
||||||
|
topKPrunedPairs: graph.topKPrunedPairs,
|
||||||
edgeDensity: graph.edgeDensity,
|
edgeDensity: graph.edgeDensity,
|
||||||
reweightWhoUsed: graph.reweightWhoUsed,
|
reweightWhoUsed: graph.reweightWhoUsed,
|
||||||
reweightWhereUsed: graph.reweightWhereUsed,
|
reweightWhereUsed: graph.reweightWhereUsed,
|
||||||
@@ -826,10 +880,13 @@ function fillMetricsEmpty(metrics) {
|
|||||||
cosineGateNoVector: 0,
|
cosineGateNoVector: 0,
|
||||||
finalCount: 0,
|
finalCount: 0,
|
||||||
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
||||||
byChannel: { what: 0, where: 0, how: 0, who: 0 },
|
byChannel: { what: 0, where: 0, rSem: 0, who: 0 },
|
||||||
candidatePairs: 0,
|
candidatePairs: 0,
|
||||||
pairsFromWhat: 0,
|
pairsFromWhat: 0,
|
||||||
pairsFromHow: 0,
|
pairsFromRSem: 0,
|
||||||
|
rSemAvgSim: 0,
|
||||||
|
timeWindowFilteredPairs: 0,
|
||||||
|
topKPrunedPairs: 0,
|
||||||
edgeDensity: 0,
|
edgeDensity: 0,
|
||||||
reweightWhoUsed: 0,
|
reweightWhoUsed: 0,
|
||||||
reweightWhereUsed: 0,
|
reweightWhereUsed: 0,
|
||||||
@@ -856,10 +913,13 @@ function fillMetrics(metrics, data) {
|
|||||||
postGatePassRate: data.postGatePassRate || 0,
|
postGatePassRate: data.postGatePassRate || 0,
|
||||||
finalCount: data.finalCount || 0,
|
finalCount: data.finalCount || 0,
|
||||||
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
|
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
|
||||||
byChannel: data.channelStats || { what: 0, where: 0, how: 0, who: 0 },
|
byChannel: data.channelStats || { what: 0, where: 0, rSem: 0, who: 0 },
|
||||||
candidatePairs: data.candidatePairs || 0,
|
candidatePairs: data.candidatePairs || 0,
|
||||||
pairsFromWhat: data.pairsFromWhat || 0,
|
pairsFromWhat: data.pairsFromWhat || 0,
|
||||||
pairsFromHow: data.pairsFromHow || 0,
|
pairsFromRSem: data.pairsFromRSem || 0,
|
||||||
|
rSemAvgSim: data.rSemAvgSim || 0,
|
||||||
|
timeWindowFilteredPairs: data.timeWindowFilteredPairs || 0,
|
||||||
|
topKPrunedPairs: data.topKPrunedPairs || 0,
|
||||||
edgeDensity: data.edgeDensity || 0,
|
edgeDensity: data.edgeDensity || 0,
|
||||||
reweightWhoUsed: data.reweightWhoUsed || 0,
|
reweightWhoUsed: data.reweightWhoUsed || 0,
|
||||||
reweightWhereUsed: data.reweightWhereUsed || 0,
|
reweightWhereUsed: data.reweightWhereUsed || 0,
|
||||||
|
|||||||
@@ -120,7 +120,10 @@ export function createMetrics() {
|
|||||||
graphEdges: 0,
|
graphEdges: 0,
|
||||||
candidatePairs: 0,
|
candidatePairs: 0,
|
||||||
pairsFromWhat: 0,
|
pairsFromWhat: 0,
|
||||||
pairsFromHow: 0,
|
pairsFromRSem: 0,
|
||||||
|
rSemAvgSim: 0,
|
||||||
|
timeWindowFilteredPairs: 0,
|
||||||
|
topKPrunedPairs: 0,
|
||||||
edgeDensity: 0,
|
edgeDensity: 0,
|
||||||
reweightWhoUsed: 0,
|
reweightWhoUsed: 0,
|
||||||
reweightWhereUsed: 0,
|
reweightWhereUsed: 0,
|
||||||
@@ -133,7 +136,7 @@ export function createMetrics() {
|
|||||||
postGatePassRate: 0,
|
postGatePassRate: 0,
|
||||||
finalCount: 0,
|
finalCount: 0,
|
||||||
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
scoreDistribution: { min: 0, max: 0, mean: 0 },
|
||||||
byChannel: { what: 0, where: 0, how: 0, who: 0 },
|
byChannel: { what: 0, where: 0, rSem: 0, who: 0 },
|
||||||
time: 0,
|
time: 0,
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -376,11 +379,13 @@ export function formatMetricsLog(metrics) {
|
|||||||
lines.push('[Diffusion] PPR Spreading Activation');
|
lines.push('[Diffusion] PPR Spreading Activation');
|
||||||
lines.push(`├─ seeds: ${m.diffusion.seedCount}`);
|
lines.push(`├─ seeds: ${m.diffusion.seedCount}`);
|
||||||
lines.push(`├─ graph: ${m.diffusion.graphNodes} nodes, ${m.diffusion.graphEdges} edges`);
|
lines.push(`├─ graph: ${m.diffusion.graphNodes} nodes, ${m.diffusion.graphEdges} edges`);
|
||||||
lines.push(`├─ candidate_pairs: ${m.diffusion.candidatePairs || 0} (what=${m.diffusion.pairsFromWhat || 0}, how=${m.diffusion.pairsFromHow || 0})`);
|
lines.push(`├─ candidate_pairs: ${m.diffusion.candidatePairs || 0} (what=${m.diffusion.pairsFromWhat || 0}, r_sem=${m.diffusion.pairsFromRSem || 0})`);
|
||||||
|
lines.push(`├─ r_sem_avg_sim: ${m.diffusion.rSemAvgSim || 0}`);
|
||||||
|
lines.push(`├─ pair_filters: time_window=${m.diffusion.timeWindowFilteredPairs || 0}, topk_pruned=${m.diffusion.topKPrunedPairs || 0}`);
|
||||||
lines.push(`├─ edge_density: ${m.diffusion.edgeDensity || 0}%`);
|
lines.push(`├─ edge_density: ${m.diffusion.edgeDensity || 0}%`);
|
||||||
if (m.diffusion.graphEdges > 0) {
|
if (m.diffusion.graphEdges > 0) {
|
||||||
const ch = m.diffusion.byChannel || {};
|
const ch = m.diffusion.byChannel || {};
|
||||||
lines.push(`│ ├─ by_channel: what=${ch.what || 0}, how=${ch.how || 0}, who=${ch.who || 0}, where=${ch.where || 0}`);
|
lines.push(`│ ├─ by_channel: what=${ch.what || 0}, r_sem=${ch.rSem || 0}, who=${ch.who || 0}, where=${ch.where || 0}`);
|
||||||
lines.push(`│ └─ reweight_used: who=${m.diffusion.reweightWhoUsed || 0}, where=${m.diffusion.reweightWhereUsed || 0}`);
|
lines.push(`│ └─ reweight_used: who=${m.diffusion.reweightWhoUsed || 0}, where=${m.diffusion.reweightWhereUsed || 0}`);
|
||||||
}
|
}
|
||||||
if (m.diffusion.iterations > 0) {
|
if (m.diffusion.iterations > 0) {
|
||||||
|
|||||||
@@ -205,6 +205,8 @@ export async function saveStateVectors(chatId, items, fingerprint) {
|
|||||||
floor: item.floor,
|
floor: item.floor,
|
||||||
vector: float32ToBuffer(new Float32Array(item.vector)),
|
vector: float32ToBuffer(new Float32Array(item.vector)),
|
||||||
dims: item.vector.length,
|
dims: item.vector.length,
|
||||||
|
rVector: item.rVector?.length ? float32ToBuffer(new Float32Array(item.rVector)) : null,
|
||||||
|
rDims: item.rVector?.length ? item.rVector.length : 0,
|
||||||
fingerprint,
|
fingerprint,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
@@ -222,6 +224,7 @@ export async function getAllStateVectors(chatId) {
|
|||||||
return records.map(r => ({
|
return records.map(r => ({
|
||||||
...r,
|
...r,
|
||||||
vector: bufferToFloat32(r.vector),
|
vector: bufferToFloat32(r.vector),
|
||||||
|
rVector: r.rVector ? bufferToFloat32(r.rVector) : null,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// Vector Import/Export
|
// Vector Import/Export
|
||||||
// 向量数据导入导出(当前 chatId 级别)
|
// 向量数据导入导出(当前 chatId 级别)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
@@ -30,7 +30,7 @@ import { getEngineFingerprint } from '../utils/embedder.js';
|
|||||||
import { getVectorConfig } from '../../data/config.js';
|
import { getVectorConfig } from '../../data/config.js';
|
||||||
|
|
||||||
const MODULE_ID = 'vector-io';
|
const MODULE_ID = 'vector-io';
|
||||||
const EXPORT_VERSION = 1;
|
const EXPORT_VERSION = 2;
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 工具函数
|
// 工具函数
|
||||||
@@ -139,9 +139,15 @@ export async function exportVectors(onProgress) {
|
|||||||
// state vectors
|
// state vectors
|
||||||
const sortedStateVectors = [...stateVectors].sort((a, b) => String(a.atomId).localeCompare(String(b.atomId)));
|
const sortedStateVectors = [...stateVectors].sort((a, b) => String(a.atomId).localeCompare(String(b.atomId)));
|
||||||
const stateVectorsOrdered = sortedStateVectors.map(v => v.vector);
|
const stateVectorsOrdered = sortedStateVectors.map(v => v.vector);
|
||||||
|
const rDims = sortedStateVectors.find(v => v.rVector?.length)?.rVector?.length || dims;
|
||||||
|
const stateRVectorsOrdered = sortedStateVectors.map(v =>
|
||||||
|
v.rVector?.length ? v.rVector : new Array(rDims).fill(0)
|
||||||
|
);
|
||||||
const stateVectorsJsonl = sortedStateVectors.map(v => JSON.stringify({
|
const stateVectorsJsonl = sortedStateVectors.map(v => JSON.stringify({
|
||||||
atomId: v.atomId,
|
atomId: v.atomId,
|
||||||
floor: v.floor,
|
floor: v.floor,
|
||||||
|
hasRVector: !!(v.rVector?.length),
|
||||||
|
rDims: v.rVector?.length || 0,
|
||||||
})).join('\n');
|
})).join('\n');
|
||||||
|
|
||||||
// manifest
|
// manifest
|
||||||
@@ -156,6 +162,8 @@ export async function exportVectors(onProgress) {
|
|||||||
eventCount: sortedEventVectors.length,
|
eventCount: sortedEventVectors.length,
|
||||||
stateAtomCount: stateAtoms.length,
|
stateAtomCount: stateAtoms.length,
|
||||||
stateVectorCount: stateVectors.length,
|
stateVectorCount: stateVectors.length,
|
||||||
|
stateRVectorCount: sortedStateVectors.filter(v => v.rVector?.length).length,
|
||||||
|
rDims,
|
||||||
lastChunkFloor: meta.lastChunkFloor ?? -1,
|
lastChunkFloor: meta.lastChunkFloor ?? -1,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -173,6 +181,9 @@ export async function exportVectors(onProgress) {
|
|||||||
'state_vectors.bin': stateVectorsOrdered.length
|
'state_vectors.bin': stateVectorsOrdered.length
|
||||||
? float32ToBytes(stateVectorsOrdered, dims)
|
? float32ToBytes(stateVectorsOrdered, dims)
|
||||||
: new Uint8Array(0),
|
: new Uint8Array(0),
|
||||||
|
'state_r_vectors.bin': stateRVectorsOrdered.length
|
||||||
|
? float32ToBytes(stateRVectorsOrdered, rDims)
|
||||||
|
: new Uint8Array(0),
|
||||||
}, { level: 1 }); // 降低压缩级别,速度优先
|
}, { level: 1 }); // 降低压缩级别,速度优先
|
||||||
|
|
||||||
onProgress?.('下载文件...');
|
onProgress?.('下载文件...');
|
||||||
@@ -226,7 +237,7 @@ export async function importVectors(file, onProgress) {
|
|||||||
|
|
||||||
const manifest = JSON.parse(strFromU8(unzipped['manifest.json']));
|
const manifest = JSON.parse(strFromU8(unzipped['manifest.json']));
|
||||||
|
|
||||||
if (manifest.version !== EXPORT_VERSION) {
|
if (![1, 2].includes(manifest.version)) {
|
||||||
throw new Error(`不支持的版本: ${manifest.version}`);
|
throw new Error(`不支持的版本: ${manifest.version}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -275,11 +286,17 @@ export async function importVectors(file, onProgress) {
|
|||||||
const stateVectorsJsonl = unzipped['state_vectors.jsonl'] ? strFromU8(unzipped['state_vectors.jsonl']) : '';
|
const stateVectorsJsonl = unzipped['state_vectors.jsonl'] ? strFromU8(unzipped['state_vectors.jsonl']) : '';
|
||||||
const stateVectorMetas = stateVectorsJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
|
const stateVectorMetas = stateVectorsJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
|
||||||
|
|
||||||
// 解析 L0 state vectors
|
// Parse L0 semantic vectors
|
||||||
const stateVectorsBytes = unzipped['state_vectors.bin'];
|
const stateVectorsBytes = unzipped['state_vectors.bin'];
|
||||||
const stateVectors = (stateVectorsBytes && stateVectorMetas.length)
|
const stateVectors = (stateVectorsBytes && stateVectorMetas.length)
|
||||||
? bytesToFloat32(stateVectorsBytes, manifest.dims)
|
? bytesToFloat32(stateVectorsBytes, manifest.dims)
|
||||||
: [];
|
: [];
|
||||||
|
// Parse optional L0 r-vectors (for diffusion r-sem edges)
|
||||||
|
const stateRVectorsBytes = unzipped['state_r_vectors.bin'];
|
||||||
|
const stateRVectors = (stateRVectorsBytes && stateVectorMetas.length)
|
||||||
|
? bytesToFloat32(stateRVectorsBytes, manifest.rDims || manifest.dims)
|
||||||
|
: [];
|
||||||
|
const hasRVectorMeta = stateVectorMetas.some(m => typeof m.hasRVector === 'boolean');
|
||||||
|
|
||||||
// 校验数量
|
// 校验数量
|
||||||
if (chunkMetas.length !== chunkVectors.length) {
|
if (chunkMetas.length !== chunkVectors.length) {
|
||||||
@@ -291,6 +308,9 @@ export async function importVectors(file, onProgress) {
|
|||||||
if (stateVectorMetas.length !== stateVectors.length) {
|
if (stateVectorMetas.length !== stateVectors.length) {
|
||||||
throw new Error(`state 向量数量不匹配: 元数据 ${stateVectorMetas.length}, 向量 ${stateVectors.length}`);
|
throw new Error(`state 向量数量不匹配: 元数据 ${stateVectorMetas.length}, 向量 ${stateVectors.length}`);
|
||||||
}
|
}
|
||||||
|
if (stateRVectors.length > 0 && stateVectorMetas.length !== stateRVectors.length) {
|
||||||
|
throw new Error(`state r-vector count mismatch: meta=${stateVectorMetas.length}, vectors=${stateRVectors.length}`);
|
||||||
|
}
|
||||||
|
|
||||||
onProgress?.('清空旧数据...');
|
onProgress?.('清空旧数据...');
|
||||||
|
|
||||||
@@ -337,12 +357,13 @@ export async function importVectors(file, onProgress) {
|
|||||||
saveStateAtoms(stateAtoms);
|
saveStateAtoms(stateAtoms);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 写入 state vectors
|
// Write state vectors (semantic + optional r-vector)
|
||||||
if (stateVectorMetas.length > 0) {
|
if (stateVectorMetas.length > 0) {
|
||||||
const stateVectorItems = stateVectorMetas.map((meta, idx) => ({
|
const stateVectorItems = stateVectorMetas.map((meta, idx) => ({
|
||||||
atomId: meta.atomId,
|
atomId: meta.atomId,
|
||||||
floor: meta.floor,
|
floor: meta.floor,
|
||||||
vector: stateVectors[idx],
|
vector: stateVectors[idx],
|
||||||
|
rVector: (stateRVectors[idx] && (!hasRVectorMeta || meta.hasRVector)) ? stateRVectors[idx] : null,
|
||||||
}));
|
}));
|
||||||
await saveStateVectors(chatId, stateVectorItems, manifest.fingerprint);
|
await saveStateVectors(chatId, stateVectorItems, manifest.fingerprint);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user