refactor diffusion to r-sem edges with time window and add rVector I/O
This commit is contained in:
@@ -32,6 +32,7 @@ const MODULE_ID = 'state-integration';
|
||||
const CONCURRENCY = 50;
|
||||
const STAGGER_DELAY = 15;
|
||||
const DEBUG_CONCURRENCY = true;
|
||||
const R_AGG_MAX_CHARS = 256;
|
||||
|
||||
let initialized = false;
|
||||
let extractionCancelled = false;
|
||||
@@ -112,6 +113,18 @@ function buildL0InputText(userMessage, aiMessage) {
|
||||
return parts.join('\n\n---\n\n').trim();
|
||||
}
|
||||
|
||||
function buildRAggregateText(atom) {
|
||||
const uniq = new Set();
|
||||
for (const edge of (atom?.edges || [])) {
|
||||
const r = String(edge?.r || '').trim();
|
||||
if (!r) continue;
|
||||
uniq.add(r);
|
||||
}
|
||||
const joined = [...uniq].join(' ; ');
|
||||
if (!joined) return String(atom?.semantic || '').trim();
|
||||
return joined.length > R_AGG_MAX_CHARS ? joined.slice(0, R_AGG_MAX_CHARS) : joined;
|
||||
}
|
||||
|
||||
export async function incrementalExtractAtoms(chatId, chat, onProgress, options = {}) {
|
||||
const { maxFloors = Infinity } = options;
|
||||
if (!chatId || !chat?.length) return { built: 0 };
|
||||
@@ -271,21 +284,36 @@ async function vectorizeAtoms(chatId, atoms, onProgress) {
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) return;
|
||||
|
||||
const texts = atoms.map(a => a.semantic);
|
||||
const semanticTexts = atoms.map(a => a.semantic);
|
||||
const rTexts = atoms.map(a => buildRAggregateText(a));
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
const batchSize = 20;
|
||||
|
||||
try {
|
||||
const allVectors = [];
|
||||
|
||||
for (let i = 0; i < texts.length; i += batchSize) {
|
||||
for (let i = 0; i < semanticTexts.length; i += batchSize) {
|
||||
if (extractionCancelled) break;
|
||||
|
||||
const batch = texts.slice(i, i + batchSize);
|
||||
const vectors = await embed(batch, { timeout: 30000 });
|
||||
allVectors.push(...vectors);
|
||||
const semBatch = semanticTexts.slice(i, i + batchSize);
|
||||
const rBatch = rTexts.slice(i, i + batchSize);
|
||||
const payload = semBatch.concat(rBatch);
|
||||
const vectors = await embed(payload, { timeout: 30000 });
|
||||
const split = semBatch.length;
|
||||
if (!Array.isArray(vectors) || vectors.length < split * 2) {
|
||||
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
|
||||
}
|
||||
const semVectors = vectors.slice(0, split);
|
||||
const rVectors = vectors.slice(split, split + split);
|
||||
|
||||
onProgress?.(allVectors.length, texts.length);
|
||||
for (let j = 0; j < split; j++) {
|
||||
allVectors.push({
|
||||
vector: semVectors[j],
|
||||
rVector: rVectors[j] || semVectors[j],
|
||||
});
|
||||
}
|
||||
|
||||
onProgress?.(allVectors.length, semanticTexts.length);
|
||||
}
|
||||
|
||||
if (extractionCancelled) return;
|
||||
@@ -293,7 +321,8 @@ async function vectorizeAtoms(chatId, atoms, onProgress) {
|
||||
const items = atoms.slice(0, allVectors.length).map((a, i) => ({
|
||||
atomId: a.atomId,
|
||||
floor: a.floor,
|
||||
vector: allVectors[i],
|
||||
vector: allVectors[i].vector,
|
||||
rVector: allVectors[i].rVector,
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
@@ -380,16 +409,24 @@ async function vectorizeAtomsSimple(chatId, atoms) {
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) return;
|
||||
|
||||
const texts = atoms.map(a => a.semantic);
|
||||
const semanticTexts = atoms.map(a => a.semantic);
|
||||
const rTexts = atoms.map(a => buildRAggregateText(a));
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, { timeout: 30000 });
|
||||
const vectors = await embed(semanticTexts.concat(rTexts), { timeout: 30000 });
|
||||
const split = semanticTexts.length;
|
||||
if (!Array.isArray(vectors) || vectors.length < split * 2) {
|
||||
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
|
||||
}
|
||||
const semVectors = vectors.slice(0, split);
|
||||
const rVectors = vectors.slice(split, split + split);
|
||||
|
||||
const items = atoms.map((a, i) => ({
|
||||
atomId: a.atomId,
|
||||
floor: a.floor,
|
||||
vector: vectors[i],
|
||||
vector: semVectors[i],
|
||||
rVector: rVectors[i] || semVectors[i],
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
|
||||
Reference in New Issue
Block a user