refactor diffusion to r-sem edges with time window and add rVector I/O

This commit is contained in:
2026-02-15 00:36:21 +08:00
parent a787e58833
commit 5dd1d6b50b
6 changed files with 238 additions and 91 deletions

View File

@@ -32,6 +32,7 @@ const MODULE_ID = 'state-integration';
const CONCURRENCY = 50;
const STAGGER_DELAY = 15;
const DEBUG_CONCURRENCY = true;
const R_AGG_MAX_CHARS = 256;
let initialized = false;
let extractionCancelled = false;
@@ -112,6 +113,18 @@ function buildL0InputText(userMessage, aiMessage) {
return parts.join('\n\n---\n\n').trim();
}
function buildRAggregateText(atom) {
const uniq = new Set();
for (const edge of (atom?.edges || [])) {
const r = String(edge?.r || '').trim();
if (!r) continue;
uniq.add(r);
}
const joined = [...uniq].join(' ; ');
if (!joined) return String(atom?.semantic || '').trim();
return joined.length > R_AGG_MAX_CHARS ? joined.slice(0, R_AGG_MAX_CHARS) : joined;
}
export async function incrementalExtractAtoms(chatId, chat, onProgress, options = {}) {
const { maxFloors = Infinity } = options;
if (!chatId || !chat?.length) return { built: 0 };
@@ -271,21 +284,36 @@ async function vectorizeAtoms(chatId, atoms, onProgress) {
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
const texts = atoms.map(a => a.semantic);
const semanticTexts = atoms.map(a => a.semantic);
const rTexts = atoms.map(a => buildRAggregateText(a));
const fingerprint = getEngineFingerprint(vectorCfg);
const batchSize = 20;
try {
const allVectors = [];
for (let i = 0; i < texts.length; i += batchSize) {
for (let i = 0; i < semanticTexts.length; i += batchSize) {
if (extractionCancelled) break;
const batch = texts.slice(i, i + batchSize);
const vectors = await embed(batch, { timeout: 30000 });
allVectors.push(...vectors);
const semBatch = semanticTexts.slice(i, i + batchSize);
const rBatch = rTexts.slice(i, i + batchSize);
const payload = semBatch.concat(rBatch);
const vectors = await embed(payload, { timeout: 30000 });
const split = semBatch.length;
if (!Array.isArray(vectors) || vectors.length < split * 2) {
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
}
const semVectors = vectors.slice(0, split);
const rVectors = vectors.slice(split, split + split);
onProgress?.(allVectors.length, texts.length);
for (let j = 0; j < split; j++) {
allVectors.push({
vector: semVectors[j],
rVector: rVectors[j] || semVectors[j],
});
}
onProgress?.(allVectors.length, semanticTexts.length);
}
if (extractionCancelled) return;
@@ -293,7 +321,8 @@ async function vectorizeAtoms(chatId, atoms, onProgress) {
const items = atoms.slice(0, allVectors.length).map((a, i) => ({
atomId: a.atomId,
floor: a.floor,
vector: allVectors[i],
vector: allVectors[i].vector,
rVector: allVectors[i].rVector,
}));
await saveStateVectors(chatId, items, fingerprint);
@@ -380,16 +409,24 @@ async function vectorizeAtomsSimple(chatId, atoms) {
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
const texts = atoms.map(a => a.semantic);
const semanticTexts = atoms.map(a => a.semantic);
const rTexts = atoms.map(a => buildRAggregateText(a));
const fingerprint = getEngineFingerprint(vectorCfg);
try {
const vectors = await embed(texts, { timeout: 30000 });
const vectors = await embed(semanticTexts.concat(rTexts), { timeout: 30000 });
const split = semanticTexts.length;
if (!Array.isArray(vectors) || vectors.length < split * 2) {
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
}
const semVectors = vectors.slice(0, split);
const rVectors = vectors.slice(split, split + split);
const items = atoms.map((a, i) => ({
atomId: a.atomId,
floor: a.floor,
vector: vectors[i],
vector: semVectors[i],
rVector: rVectors[i] || semVectors[i],
}));
await saveStateVectors(chatId, items, fingerprint);