refactor diffusion to r-sem edges with time window and add rVector I/O

This commit is contained in:
2026-02-15 00:36:21 +08:00
parent a787e58833
commit 5dd1d6b50b
6 changed files with 238 additions and 91 deletions

View File

@@ -184,6 +184,19 @@ const VECTOR_WARNING_COOLDOWN_MS = 120000; // 2分钟内不重复提醒
const EXT_PROMPT_KEY = "LittleWhiteBox_StorySummary";
const MIN_INJECTION_DEPTH = 2;
const R_AGG_MAX_CHARS = 256;
function buildRAggregateText(atom) {
const uniq = new Set();
for (const edge of (atom?.edges || [])) {
const r = String(edge?.r || "").trim();
if (!r) continue;
uniq.add(r);
}
const joined = [...uniq].join(" ; ");
if (!joined) return String(atom?.semantic || "").trim();
return joined.length > R_AGG_MAX_CHARS ? joined.slice(0, R_AGG_MAX_CHARS) : joined;
}
// ═══════════════════════════════════════════════════════════════════════════
// 分词器预热(依赖 tokenizer.js 内部状态机,支持失败重试)
@@ -447,13 +460,21 @@ async function handleGenerateVectors(vectorCfg) {
if (vectorCancelled) break;
const batch = atoms.slice(i, i + batchSize);
const texts = batch.map(a => a.semantic);
const semTexts = batch.map(a => a.semantic);
const rTexts = batch.map(a => buildRAggregateText(a));
try {
const vectors = await embed(texts, vectorCfg, { signal: vectorAbortController.signal });
const vectors = await embed(semTexts.concat(rTexts), vectorCfg, { signal: vectorAbortController.signal });
const split = semTexts.length;
if (!Array.isArray(vectors) || vectors.length < split * 2) {
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
}
const semVectors = vectors.slice(0, split);
const rVectors = vectors.slice(split, split + split);
const items = batch.map((a, j) => ({
atomId: a.atomId,
floor: a.floor,
vector: vectors[j],
vector: semVectors[j],
rVector: rVectors[j] || semVectors[j],
}));
await saveStateVectors(chatId, items, fingerprint);
l0Completed += batch.length;