Fix garbled text checks and L0 LLM handling

This commit is contained in:
2026-02-06 15:08:20 +08:00
parent 44ca06f9b9
commit 56e30bfe02
9 changed files with 163 additions and 48 deletions

View File

@@ -50,7 +50,7 @@ function sanitizeFacts(parsed) {
};
if (isRel && item.trend) {
const validTrends = ['??', '??', '??', '??', '??', '??', '??'];
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融'];
if (validTrends.includes(item.trend)) {
fact.trend = item.trend;
}

View File

@@ -100,19 +100,25 @@ Acknowledged. Now reviewing the incremental summarization specifications:
├─ progress: 0.0 to 1.0
└─ newMoment: 仅记录本次新增的关键时刻
[Fact Tracking - SPO ???]
?? ??: ?? & ???????
?? ??: ??????????????????
?? SPO ??:
? s: ??????/????
? p: ??????????????
? o: ???
?? KV ??: s+p ??????????
?? isState ????????:
? true = ????????????/??/??/???
? false = ??????????????
?? trend: ?????????/??/??/??/??/??/???
?? retracted: true ???????
[Fact Tracking - SPO / World Facts]
We maintain a small "world state" as SPO triples.
Each update is a JSON object: {s, p, o, isState, trend?, retracted?}
Core rules:
1) Keyed by (s + p). If a new update has the same (s+p), it overwrites the previous value.
2) Only output facts that are NEW or CHANGED in the new dialogue. Do NOT repeat unchanged facts.
3) isState meaning:
- isState: true -> core constraints that must stay stable and should NEVER be auto-deleted
(identity, location, life/death, ownership, relationship status, binding rules)
- isState: false -> non-core facts / soft memories that may be pruned by capacity limits later
4) Relationship facts:
- Use predicate format: "对X的看法" (X is the target person)
- trend is required for relationship facts, one of:
破裂 | 厌恶 | 反感 | 陌生 | 投缘 | 亲密 | 交融
5) Retraction (deletion):
- To delete a fact, output: {s, p, retracted: true}
6) Predicate normalization:
- Reuse existing predicates whenever possible, avoid inventing synonyms.
Ready to process incremental summary requests with strict deduplication.`,
@@ -432,4 +438,4 @@ export async function generateSummary(options) {
console.groupEnd();
return rawOutput;
}
}

View File

@@ -360,20 +360,20 @@ function initVectorUI() {
};
$('btn-clear-vectors').onclick = () => {
if (confirm('?????????')) postMsg('VECTOR_CLEAR');
if (confirm('确定清空所有向量数据?')) postMsg('VECTOR_CLEAR');
};
$('btn-cancel-vectors').onclick = () => postMsg('VECTOR_CANCEL_GENERATE');
$('btn-export-vectors').onclick = () => {
$('btn-export-vectors').disabled = true;
$('vector-io-status').textContent = '???...';
$('vector-io-status').textContent = '导出中...';
postMsg('VECTOR_EXPORT');
};
$('btn-import-vectors').onclick = () => {
$('btn-import-vectors').disabled = true;
$('vector-io-status').textContent = '???...';
$('vector-io-status').textContent = '导入中...';
postMsg('VECTOR_IMPORT_PICK');
};

View File

@@ -26,7 +26,13 @@ export function isBatchCancelled() {
const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话(用户发言+角色回复中提取4-8个关键锚点。
只输出JSON
输入格式
<round>
<user>...</user>
<assistant>...</assistant>
</round>
只输出严格JSON不要解释不要前后多余文字
{"atoms":[{"t":"类型","s":"主体","v":"值","f":"来源"}]}
类型t
@@ -72,13 +78,13 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
if (userMessage?.mes?.trim()) {
const userText = filterText(userMessage.mes);
parts.push(`【用户:${userName}\n${userText}`);
parts.push(`<user name="${userName}">\n${userText}\n</user>`);
}
const aiText = filterText(aiMessage.mes);
parts.push(`【角色:${aiName}\n${aiText}`);
parts.push(`<assistant name="${aiName}">\n${aiText}\n</assistant>`);
const input = parts.join('\n\n---\n\n');
const input = `<round>\n${parts.join('\n')}\n</round>`;
xbLog.info(MODULE_ID, `floor ${aiFloor} 发送输入 len=${input.length}`);
@@ -89,43 +95,46 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
const response = await callLLM([
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: input },
{ role: 'assistant', content: '收到,开始提取并仅输出 JSON。' },
], {
temperature: 0.2,
max_tokens: 500,
timeout,
});
if (!response || !String(response).trim()) {
const rawText = String(response || '');
if (!rawText.trim()) {
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:响应为空`);
if (attempt < RETRY_COUNT) {
await sleep(RETRY_DELAY);
continue;
}
return [];
return null;
}
let parsed;
try {
parsed = parseJson(response);
parsed = parseJson(rawText);
} catch (e) {
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败JSON 异常`);
if (attempt < RETRY_COUNT) {
await sleep(RETRY_DELAY);
continue;
}
return [];
return null;
}
if (!parsed?.atoms || !Array.isArray(parsed.atoms)) {
xbLog.warn(MODULE_ID, `floor ${aiFloor} atoms 缺失raw="${rawText.slice(0, 300)}"`);
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败atoms 缺失`);
if (attempt < RETRY_COUNT) {
await sleep(RETRY_DELAY);
continue;
}
return [];
return null;
}
return parsed.atoms
const filtered = parsed.atoms
.filter(a => a?.t && a?.v)
.map((a, idx) => ({
atomId: `atom-${aiFloor}-${idx}`,
@@ -136,9 +145,13 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
source: a.f === 'u' ? 'user' : 'ai',
semantic: buildSemantic(a, userName, aiName),
}));
if (!filtered.length) {
xbLog.warn(MODULE_ID, `floor ${aiFloor} atoms 为空raw="${rawText.slice(0, 300)}"`);
}
return filtered;
} catch (e) {
if (batchCancelled) return [];
if (batchCancelled) return null;
if (attempt < RETRY_COUNT) {
xbLog.warn(MODULE_ID, `floor ${aiFloor}${attempt + 1}次失败,重试...`, e?.message);
@@ -146,11 +159,11 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
continue;
}
xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e);
return [];
return null;
}
}
return [];
return null;
}
/**

View File

@@ -1,10 +1,12 @@
// ═══════════════════════════════════════════════════════════════════════════
// ═══════════════════════════════════════════════════════════════════════════
// vector/llm/llm-service.js
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../../core/debug-core.js';
import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'vector-llm-service';
const SILICONFLOW_API_URL = 'https://api.siliconflow.cn';
const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
// 唯一 ID 计数器
let callCounter = 0;
@@ -36,11 +38,17 @@ export async function callLLM(messages, options = {}) {
} = options;
const mod = getStreamingModule();
if (!mod) throw new Error('生成模块未加载');
if (!mod) throw new Error('Streaming module not ready');
const cfg = getVectorConfig();
const apiKey = cfg?.online?.key || '';
if (!apiKey) {
throw new Error('L0 requires siliconflow API key');
}
const top64 = b64UrlEncode(JSON.stringify(messages));
// 每次调用用唯一 ID避免 session 冲突
// 每次调用用唯一 ID避免 session 冲突
const uniqueId = generateUniqueId('l0');
const args = {
@@ -50,6 +58,10 @@ export async function callLLM(messages, options = {}) {
id: uniqueId,
temperature: String(temperature),
max_tokens: String(max_tokens),
api: 'openai',
apiurl: SILICONFLOW_API_URL,
apipassword: apiKey,
model: DEFAULT_L0_MODEL,
};
try {

View File

@@ -142,7 +142,11 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) {
try {
const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
if (!atoms?.length) {
if (atoms == null) {
throw new Error('llm_failed');
}
if (!atoms.length) {
setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
} else {
atoms.forEach(a => a.chatId = chatId);

View File

@@ -1627,7 +1627,7 @@ function rollbackToPreviousOf(messageId) {
const prevId = id - 1;
if (prevId < 0) return;
// ???? 1.0 ???????
// 1.0: restore from snapshot if available
const snap = getSnapshot(prevId);
if (snap) {
const normalized = normalizeSnapshotRecord(snap);
@@ -1645,7 +1645,7 @@ async function rollbackToPreviousOfAsync(messageId) {
const id = Number(messageId);
if (Number.isNaN(id)) return;
// ???????? floor>=id ? L0
// Notify L0 rollback hook for floor >= id
if (typeof globalThis.LWB_StateRollbackHook === 'function') {
try {
await globalThis.LWB_StateRollbackHook(id);
@@ -1660,7 +1660,7 @@ async function rollbackToPreviousOfAsync(messageId) {
if (mode === '2.0') {
try {
const mod = await import('./state2/index.js');
await mod.restoreStateV2ToFloor(prevId); // prevId<0 ???
await mod.restoreStateV2ToFloor(prevId); // prevId < 0 handled by implementation
} catch (e) {
console.error('[variablesCore][2.0] restoreStateV2ToFloor failed:', e);
}
@@ -1682,7 +1682,7 @@ async function rebuildVariablesFromScratch() {
await mod.restoreStateV2ToFloor(lastId);
return;
}
// 1.0 旧逻辑
// 1.0 legacy logic
setVarDict({});
const chat = getContext()?.chat || [];
for (let i = 0; i < chat.length; i++) {
@@ -1876,7 +1876,7 @@ async function applyVariablesForMessage(messageId) {
} catch (e) {
parseErrors++;
if (debugOn) {
try { xbLog.error(MODULE_ID, `plot-log 解析失败:楼<EFBFBD>?${messageId} <EFBFBD>?${idx + 1} 预览=${preview(b)}`, e); } catch {}
try { xbLog.error(MODULE_ID, `plot-log 解析失败:楼${messageId} ${idx + 1} 预览=${preview(b)}`, e); } catch {}
}
return;
}
@@ -1907,7 +1907,7 @@ async function applyVariablesForMessage(messageId) {
try {
xbLog.warn(
MODULE_ID,
`plot-log 未产生可执行指令:楼<EFBFBD>?${messageId} 块数=${blocks.length} 解析条目=${parsedPartsTotal} 解析失败=${parseErrors} 预览=${preview(blocks[0])}`
`plot-log 未产生可执行指令:楼${messageId} 块数=${blocks.length} 解析条目=${parsedPartsTotal} 解析失败=${parseErrors} 预览=${preview(blocks[0])}`
);
} catch {}
}
@@ -2183,7 +2183,7 @@ async function applyVariablesForMessage(messageId) {
const denied = guardDenied ? `,被规则拦截=${guardDenied}` : '';
xbLog.warn(
MODULE_ID,
`plot-log 指令执行后无变化:楼<EFBFBD>?${messageId} 指令<EFBFBD>?${ops.length}${denied} 示例=${preview(JSON.stringify(guardDeniedSamples))}`
`plot-log 指令执行后无变化:楼${messageId} 指令${ops.length}${denied} 示例=${preview(JSON.stringify(guardDeniedSamples))}`
);
} catch {}
}
@@ -2321,7 +2321,7 @@ function bindEvents() {
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
// ? ?? await????? apply ????????????
// Roll back first so re-apply uses the edited message
await rollbackToPreviousOfAsync(id);
setTimeout(async () => {
@@ -2358,7 +2358,7 @@ function bindEvents() {
lastSwipedId = id;
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
// ? ?? await???????????????
// Roll back first so swipe applies cleanly
await rollbackToPreviousOfAsync(id);
const tId = setTimeout(async () => {
@@ -2377,10 +2377,10 @@ function bindEvents() {
const id = getMsgIdStrict(data);
if (typeof id !== 'number') return;
// ? ????????await ???????
// Roll back first before delete handling
await rollbackToPreviousOfAsync(id);
// 2.0:物理删除消息 => 同步清理 WAL/ckpt避免膨胀
// 2.0: physical delete -> trim WAL/ckpt to avoid bloat
if (getVariablesMode() === '2.0') {
try {
const mod = await import('./state2/index.js');

View File

@@ -3,7 +3,7 @@
"private": true,
"type": "module",
"scripts": {
"lint": "eslint \"**/*.js\"",
"lint": "node scripts/check-garbled.js && eslint \"**/*.js\"",
"lint:fix": "eslint \"**/*.js\" --fix"
},
"devDependencies": {

80
scripts/check-garbled.js Normal file
View File

@@ -0,0 +1,80 @@
/* eslint-env node */
import fs from 'fs';
import path from 'path';
const root = process.cwd();
const includeExts = new Set(['.js', '.html', '.css']);
const ignoreDirs = new Set(['node_modules', '.git']);
const patterns = [
{ name: 'question-marks', regex: /\?\?\?/g },
{ name: 'replacement-char', regex: /\uFFFD/g },
];
function isIgnoredDir(dirName) {
return ignoreDirs.has(dirName);
}
function walk(dir, files = []) {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
if (isIgnoredDir(entry.name)) continue;
walk(path.join(dir, entry.name), files);
} else if (entry.isFile()) {
const ext = path.extname(entry.name);
if (includeExts.has(ext)) {
files.push(path.join(dir, entry.name));
}
}
}
return files;
}
function scanFile(filePath) {
let content = '';
try {
content = fs.readFileSync(filePath, 'utf8');
} catch {
return [];
}
const lines = content.split(/\r?\n/);
const hits = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
for (const { name, regex } of patterns) {
regex.lastIndex = 0;
if (regex.test(line)) {
const preview = line.replace(/\t/g, '\\t').slice(0, 200);
hits.push({ line: i + 1, name, preview });
}
}
}
return hits;
}
const files = walk(root);
const issues = [];
for (const file of files) {
const hits = scanFile(file);
if (hits.length) {
issues.push({ file, hits });
}
}
if (issues.length) {
console.error('Garbled text check failed:');
for (const issue of issues) {
const rel = path.relative(root, issue.file);
for (const hit of issue.hits) {
console.error(`- ${rel}:${hit.line} [${hit.name}] ${hit.preview}`);
}
}
process.exit(1);
} else {
console.log('Garbled text check passed.');
}