Fix garbled text checks and L0 LLM handling
This commit is contained in:
@@ -50,7 +50,7 @@ function sanitizeFacts(parsed) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (isRel && item.trend) {
|
if (isRel && item.trend) {
|
||||||
const validTrends = ['??', '??', '??', '??', '??', '??', '??'];
|
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融'];
|
||||||
if (validTrends.includes(item.trend)) {
|
if (validTrends.includes(item.trend)) {
|
||||||
fact.trend = item.trend;
|
fact.trend = item.trend;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -100,19 +100,25 @@ Acknowledged. Now reviewing the incremental summarization specifications:
|
|||||||
├─ progress: 0.0 to 1.0
|
├─ progress: 0.0 to 1.0
|
||||||
└─ newMoment: 仅记录本次新增的关键时刻
|
└─ newMoment: 仅记录本次新增的关键时刻
|
||||||
|
|
||||||
[Fact Tracking - SPO ???]
|
[Fact Tracking - SPO / World Facts]
|
||||||
?? ??: ?? & ???????
|
We maintain a small "world state" as SPO triples.
|
||||||
?? ??: ??????????????????
|
Each update is a JSON object: {s, p, o, isState, trend?, retracted?}
|
||||||
?? SPO ??:
|
|
||||||
? s: ??????/????
|
Core rules:
|
||||||
? p: ??????????????
|
1) Keyed by (s + p). If a new update has the same (s+p), it overwrites the previous value.
|
||||||
? o: ???
|
2) Only output facts that are NEW or CHANGED in the new dialogue. Do NOT repeat unchanged facts.
|
||||||
?? KV ??: s+p ??????????
|
3) isState meaning:
|
||||||
?? isState ????????:
|
- isState: true -> core constraints that must stay stable and should NEVER be auto-deleted
|
||||||
? true = ????????????/??/??/???
|
(identity, location, life/death, ownership, relationship status, binding rules)
|
||||||
? false = ??????????????
|
- isState: false -> non-core facts / soft memories that may be pruned by capacity limits later
|
||||||
?? trend: ?????????/??/??/??/??/??/???
|
4) Relationship facts:
|
||||||
?? retracted: true ???????
|
- Use predicate format: "对X的看法" (X is the target person)
|
||||||
|
- trend is required for relationship facts, one of:
|
||||||
|
破裂 | 厌恶 | 反感 | 陌生 | 投缘 | 亲密 | 交融
|
||||||
|
5) Retraction (deletion):
|
||||||
|
- To delete a fact, output: {s, p, retracted: true}
|
||||||
|
6) Predicate normalization:
|
||||||
|
- Reuse existing predicates whenever possible, avoid inventing synonyms.
|
||||||
|
|
||||||
Ready to process incremental summary requests with strict deduplication.`,
|
Ready to process incremental summary requests with strict deduplication.`,
|
||||||
|
|
||||||
|
|||||||
@@ -360,20 +360,20 @@ function initVectorUI() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
$('btn-clear-vectors').onclick = () => {
|
$('btn-clear-vectors').onclick = () => {
|
||||||
if (confirm('?????????')) postMsg('VECTOR_CLEAR');
|
if (confirm('确定清空所有向量数据?')) postMsg('VECTOR_CLEAR');
|
||||||
};
|
};
|
||||||
|
|
||||||
$('btn-cancel-vectors').onclick = () => postMsg('VECTOR_CANCEL_GENERATE');
|
$('btn-cancel-vectors').onclick = () => postMsg('VECTOR_CANCEL_GENERATE');
|
||||||
|
|
||||||
$('btn-export-vectors').onclick = () => {
|
$('btn-export-vectors').onclick = () => {
|
||||||
$('btn-export-vectors').disabled = true;
|
$('btn-export-vectors').disabled = true;
|
||||||
$('vector-io-status').textContent = '???...';
|
$('vector-io-status').textContent = '导出中...';
|
||||||
postMsg('VECTOR_EXPORT');
|
postMsg('VECTOR_EXPORT');
|
||||||
};
|
};
|
||||||
|
|
||||||
$('btn-import-vectors').onclick = () => {
|
$('btn-import-vectors').onclick = () => {
|
||||||
$('btn-import-vectors').disabled = true;
|
$('btn-import-vectors').disabled = true;
|
||||||
$('vector-io-status').textContent = '???...';
|
$('vector-io-status').textContent = '导入中...';
|
||||||
postMsg('VECTOR_IMPORT_PICK');
|
postMsg('VECTOR_IMPORT_PICK');
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,13 @@ export function isBatchCancelled() {
|
|||||||
|
|
||||||
const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话(用户发言+角色回复)中提取4-8个关键锚点。
|
const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话(用户发言+角色回复)中提取4-8个关键锚点。
|
||||||
|
|
||||||
只输出JSON:
|
输入格式:
|
||||||
|
<round>
|
||||||
|
<user>...</user>
|
||||||
|
<assistant>...</assistant>
|
||||||
|
</round>
|
||||||
|
|
||||||
|
只输出严格JSON(不要解释,不要前后多余文字):
|
||||||
{"atoms":[{"t":"类型","s":"主体","v":"值","f":"来源"}]}
|
{"atoms":[{"t":"类型","s":"主体","v":"值","f":"来源"}]}
|
||||||
|
|
||||||
类型(t):
|
类型(t):
|
||||||
@@ -72,13 +78,13 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
|||||||
|
|
||||||
if (userMessage?.mes?.trim()) {
|
if (userMessage?.mes?.trim()) {
|
||||||
const userText = filterText(userMessage.mes);
|
const userText = filterText(userMessage.mes);
|
||||||
parts.push(`【用户:${userName}】\n${userText}`);
|
parts.push(`<user name="${userName}">\n${userText}\n</user>`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const aiText = filterText(aiMessage.mes);
|
const aiText = filterText(aiMessage.mes);
|
||||||
parts.push(`【角色:${aiName}】\n${aiText}`);
|
parts.push(`<assistant name="${aiName}">\n${aiText}\n</assistant>`);
|
||||||
|
|
||||||
const input = parts.join('\n\n---\n\n');
|
const input = `<round>\n${parts.join('\n')}\n</round>`;
|
||||||
|
|
||||||
xbLog.info(MODULE_ID, `floor ${aiFloor} 发送输入 len=${input.length}`);
|
xbLog.info(MODULE_ID, `floor ${aiFloor} 发送输入 len=${input.length}`);
|
||||||
|
|
||||||
@@ -89,43 +95,46 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
|||||||
const response = await callLLM([
|
const response = await callLLM([
|
||||||
{ role: 'system', content: SYSTEM_PROMPT },
|
{ role: 'system', content: SYSTEM_PROMPT },
|
||||||
{ role: 'user', content: input },
|
{ role: 'user', content: input },
|
||||||
|
{ role: 'assistant', content: '收到,开始提取并仅输出 JSON。' },
|
||||||
], {
|
], {
|
||||||
temperature: 0.2,
|
temperature: 0.2,
|
||||||
max_tokens: 500,
|
max_tokens: 500,
|
||||||
timeout,
|
timeout,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response || !String(response).trim()) {
|
const rawText = String(response || '');
|
||||||
|
if (!rawText.trim()) {
|
||||||
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:响应为空`);
|
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:响应为空`);
|
||||||
if (attempt < RETRY_COUNT) {
|
if (attempt < RETRY_COUNT) {
|
||||||
await sleep(RETRY_DELAY);
|
await sleep(RETRY_DELAY);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return [];
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
let parsed;
|
let parsed;
|
||||||
try {
|
try {
|
||||||
parsed = parseJson(response);
|
parsed = parseJson(rawText);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:JSON 异常`);
|
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:JSON 异常`);
|
||||||
if (attempt < RETRY_COUNT) {
|
if (attempt < RETRY_COUNT) {
|
||||||
await sleep(RETRY_DELAY);
|
await sleep(RETRY_DELAY);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return [];
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!parsed?.atoms || !Array.isArray(parsed.atoms)) {
|
if (!parsed?.atoms || !Array.isArray(parsed.atoms)) {
|
||||||
|
xbLog.warn(MODULE_ID, `floor ${aiFloor} atoms 缺失,raw="${rawText.slice(0, 300)}"`);
|
||||||
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:atoms 缺失`);
|
xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:atoms 缺失`);
|
||||||
if (attempt < RETRY_COUNT) {
|
if (attempt < RETRY_COUNT) {
|
||||||
await sleep(RETRY_DELAY);
|
await sleep(RETRY_DELAY);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return [];
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return parsed.atoms
|
const filtered = parsed.atoms
|
||||||
.filter(a => a?.t && a?.v)
|
.filter(a => a?.t && a?.v)
|
||||||
.map((a, idx) => ({
|
.map((a, idx) => ({
|
||||||
atomId: `atom-${aiFloor}-${idx}`,
|
atomId: `atom-${aiFloor}-${idx}`,
|
||||||
@@ -136,9 +145,13 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
|||||||
source: a.f === 'u' ? 'user' : 'ai',
|
source: a.f === 'u' ? 'user' : 'ai',
|
||||||
semantic: buildSemantic(a, userName, aiName),
|
semantic: buildSemantic(a, userName, aiName),
|
||||||
}));
|
}));
|
||||||
|
if (!filtered.length) {
|
||||||
|
xbLog.warn(MODULE_ID, `floor ${aiFloor} atoms 为空,raw="${rawText.slice(0, 300)}"`);
|
||||||
|
}
|
||||||
|
return filtered;
|
||||||
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (batchCancelled) return [];
|
if (batchCancelled) return null;
|
||||||
|
|
||||||
if (attempt < RETRY_COUNT) {
|
if (attempt < RETRY_COUNT) {
|
||||||
xbLog.warn(MODULE_ID, `floor ${aiFloor} 第${attempt + 1}次失败,重试...`, e?.message);
|
xbLog.warn(MODULE_ID, `floor ${aiFloor} 第${attempt + 1}次失败,重试...`, e?.message);
|
||||||
@@ -146,11 +159,11 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e);
|
xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e);
|
||||||
return [];
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return [];
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// vector/llm/llm-service.js
|
// vector/llm/llm-service.js
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { xbLog } from '../../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
|
import { getVectorConfig } from '../../data/config.js';
|
||||||
|
|
||||||
const MODULE_ID = 'vector-llm-service';
|
const MODULE_ID = 'vector-llm-service';
|
||||||
|
const SILICONFLOW_API_URL = 'https://api.siliconflow.cn';
|
||||||
|
const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
|
||||||
|
|
||||||
// 唯一 ID 计数器
|
// 唯一 ID 计数器
|
||||||
let callCounter = 0;
|
let callCounter = 0;
|
||||||
@@ -36,11 +38,17 @@ export async function callLLM(messages, options = {}) {
|
|||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const mod = getStreamingModule();
|
const mod = getStreamingModule();
|
||||||
if (!mod) throw new Error('生成模块未加载');
|
if (!mod) throw new Error('Streaming module not ready');
|
||||||
|
|
||||||
|
const cfg = getVectorConfig();
|
||||||
|
const apiKey = cfg?.online?.key || '';
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error('L0 requires siliconflow API key');
|
||||||
|
}
|
||||||
|
|
||||||
const top64 = b64UrlEncode(JSON.stringify(messages));
|
const top64 = b64UrlEncode(JSON.stringify(messages));
|
||||||
|
|
||||||
// ★ 每次调用用唯一 ID,避免 session 冲突
|
// 每次调用用唯一 ID,避免 session 冲突
|
||||||
const uniqueId = generateUniqueId('l0');
|
const uniqueId = generateUniqueId('l0');
|
||||||
|
|
||||||
const args = {
|
const args = {
|
||||||
@@ -50,6 +58,10 @@ export async function callLLM(messages, options = {}) {
|
|||||||
id: uniqueId,
|
id: uniqueId,
|
||||||
temperature: String(temperature),
|
temperature: String(temperature),
|
||||||
max_tokens: String(max_tokens),
|
max_tokens: String(max_tokens),
|
||||||
|
api: 'openai',
|
||||||
|
apiurl: SILICONFLOW_API_URL,
|
||||||
|
apipassword: apiKey,
|
||||||
|
model: DEFAULT_L0_MODEL,
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -142,7 +142,11 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) {
|
|||||||
try {
|
try {
|
||||||
const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
|
const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
|
||||||
|
|
||||||
if (!atoms?.length) {
|
if (atoms == null) {
|
||||||
|
throw new Error('llm_failed');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!atoms.length) {
|
||||||
setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
|
setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
|
||||||
} else {
|
} else {
|
||||||
atoms.forEach(a => a.chatId = chatId);
|
atoms.forEach(a => a.chatId = chatId);
|
||||||
|
|||||||
@@ -1627,7 +1627,7 @@ function rollbackToPreviousOf(messageId) {
|
|||||||
const prevId = id - 1;
|
const prevId = id - 1;
|
||||||
if (prevId < 0) return;
|
if (prevId < 0) return;
|
||||||
|
|
||||||
// ???? 1.0 ???????
|
// 1.0: restore from snapshot if available
|
||||||
const snap = getSnapshot(prevId);
|
const snap = getSnapshot(prevId);
|
||||||
if (snap) {
|
if (snap) {
|
||||||
const normalized = normalizeSnapshotRecord(snap);
|
const normalized = normalizeSnapshotRecord(snap);
|
||||||
@@ -1645,7 +1645,7 @@ async function rollbackToPreviousOfAsync(messageId) {
|
|||||||
const id = Number(messageId);
|
const id = Number(messageId);
|
||||||
if (Number.isNaN(id)) return;
|
if (Number.isNaN(id)) return;
|
||||||
|
|
||||||
// ???????? floor>=id ? L0
|
// Notify L0 rollback hook for floor >= id
|
||||||
if (typeof globalThis.LWB_StateRollbackHook === 'function') {
|
if (typeof globalThis.LWB_StateRollbackHook === 'function') {
|
||||||
try {
|
try {
|
||||||
await globalThis.LWB_StateRollbackHook(id);
|
await globalThis.LWB_StateRollbackHook(id);
|
||||||
@@ -1660,7 +1660,7 @@ async function rollbackToPreviousOfAsync(messageId) {
|
|||||||
if (mode === '2.0') {
|
if (mode === '2.0') {
|
||||||
try {
|
try {
|
||||||
const mod = await import('./state2/index.js');
|
const mod = await import('./state2/index.js');
|
||||||
await mod.restoreStateV2ToFloor(prevId); // prevId<0 ???
|
await mod.restoreStateV2ToFloor(prevId); // prevId < 0 handled by implementation
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('[variablesCore][2.0] restoreStateV2ToFloor failed:', e);
|
console.error('[variablesCore][2.0] restoreStateV2ToFloor failed:', e);
|
||||||
}
|
}
|
||||||
@@ -1682,7 +1682,7 @@ async function rebuildVariablesFromScratch() {
|
|||||||
await mod.restoreStateV2ToFloor(lastId);
|
await mod.restoreStateV2ToFloor(lastId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// 1.0 旧逻辑
|
// 1.0 legacy logic
|
||||||
setVarDict({});
|
setVarDict({});
|
||||||
const chat = getContext()?.chat || [];
|
const chat = getContext()?.chat || [];
|
||||||
for (let i = 0; i < chat.length; i++) {
|
for (let i = 0; i < chat.length; i++) {
|
||||||
@@ -1876,7 +1876,7 @@ async function applyVariablesForMessage(messageId) {
|
|||||||
} catch (e) {
|
} catch (e) {
|
||||||
parseErrors++;
|
parseErrors++;
|
||||||
if (debugOn) {
|
if (debugOn) {
|
||||||
try { xbLog.error(MODULE_ID, `plot-log 解析失败:楼<EFBFBD>?${messageId} <EFBFBD>?${idx + 1} 预览=${preview(b)}`, e); } catch {}
|
try { xbLog.error(MODULE_ID, `plot-log 解析失败:楼层${messageId} 块${idx + 1} 预览=${preview(b)}`, e); } catch {}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1907,7 +1907,7 @@ async function applyVariablesForMessage(messageId) {
|
|||||||
try {
|
try {
|
||||||
xbLog.warn(
|
xbLog.warn(
|
||||||
MODULE_ID,
|
MODULE_ID,
|
||||||
`plot-log 未产生可执行指令:楼<EFBFBD>?${messageId} 块数=${blocks.length} 解析条目=${parsedPartsTotal} 解析失败=${parseErrors} 预览=${preview(blocks[0])}`
|
`plot-log 未产生可执行指令:楼层${messageId} 块数=${blocks.length} 解析条目=${parsedPartsTotal} 解析失败=${parseErrors} 预览=${preview(blocks[0])}`
|
||||||
);
|
);
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
@@ -2183,7 +2183,7 @@ async function applyVariablesForMessage(messageId) {
|
|||||||
const denied = guardDenied ? `,被规则拦截=${guardDenied}` : '';
|
const denied = guardDenied ? `,被规则拦截=${guardDenied}` : '';
|
||||||
xbLog.warn(
|
xbLog.warn(
|
||||||
MODULE_ID,
|
MODULE_ID,
|
||||||
`plot-log 指令执行后无变化:楼<EFBFBD>?${messageId} 指令<EFBFBD>?${ops.length}${denied} 示例=${preview(JSON.stringify(guardDeniedSamples))}`
|
`plot-log 指令执行后无变化:楼层${messageId} 指令数${ops.length}${denied} 示例=${preview(JSON.stringify(guardDeniedSamples))}`
|
||||||
);
|
);
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
@@ -2321,7 +2321,7 @@ function bindEvents() {
|
|||||||
|
|
||||||
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
|
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
|
||||||
|
|
||||||
// ? ?? await????? apply ????????????
|
// Roll back first so re-apply uses the edited message
|
||||||
await rollbackToPreviousOfAsync(id);
|
await rollbackToPreviousOfAsync(id);
|
||||||
|
|
||||||
setTimeout(async () => {
|
setTimeout(async () => {
|
||||||
@@ -2358,7 +2358,7 @@ function bindEvents() {
|
|||||||
lastSwipedId = id;
|
lastSwipedId = id;
|
||||||
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
|
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
|
||||||
|
|
||||||
// ? ?? await???????????????
|
// Roll back first so swipe applies cleanly
|
||||||
await rollbackToPreviousOfAsync(id);
|
await rollbackToPreviousOfAsync(id);
|
||||||
|
|
||||||
const tId = setTimeout(async () => {
|
const tId = setTimeout(async () => {
|
||||||
@@ -2377,10 +2377,10 @@ function bindEvents() {
|
|||||||
const id = getMsgIdStrict(data);
|
const id = getMsgIdStrict(data);
|
||||||
if (typeof id !== 'number') return;
|
if (typeof id !== 'number') return;
|
||||||
|
|
||||||
// ? ????????await ???????
|
// Roll back first before delete handling
|
||||||
await rollbackToPreviousOfAsync(id);
|
await rollbackToPreviousOfAsync(id);
|
||||||
|
|
||||||
// ✅ 2.0:物理删除消息 => 同步清理 WAL/ckpt,避免膨胀
|
// 2.0: physical delete -> trim WAL/ckpt to avoid bloat
|
||||||
if (getVariablesMode() === '2.0') {
|
if (getVariablesMode() === '2.0') {
|
||||||
try {
|
try {
|
||||||
const mod = await import('./state2/index.js');
|
const mod = await import('./state2/index.js');
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"lint": "eslint \"**/*.js\"",
|
"lint": "node scripts/check-garbled.js && eslint \"**/*.js\"",
|
||||||
"lint:fix": "eslint \"**/*.js\" --fix"
|
"lint:fix": "eslint \"**/*.js\" --fix"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
80
scripts/check-garbled.js
Normal file
80
scripts/check-garbled.js
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
/* eslint-env node */
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
|
||||||
|
const root = process.cwd();
|
||||||
|
const includeExts = new Set(['.js', '.html', '.css']);
|
||||||
|
const ignoreDirs = new Set(['node_modules', '.git']);
|
||||||
|
|
||||||
|
const patterns = [
|
||||||
|
{ name: 'question-marks', regex: /\?\?\?/g },
|
||||||
|
{ name: 'replacement-char', regex: /\uFFFD/g },
|
||||||
|
];
|
||||||
|
|
||||||
|
function isIgnoredDir(dirName) {
|
||||||
|
return ignoreDirs.has(dirName);
|
||||||
|
}
|
||||||
|
|
||||||
|
function walk(dir, files = []) {
|
||||||
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||||
|
for (const entry of entries) {
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
if (isIgnoredDir(entry.name)) continue;
|
||||||
|
walk(path.join(dir, entry.name), files);
|
||||||
|
} else if (entry.isFile()) {
|
||||||
|
const ext = path.extname(entry.name);
|
||||||
|
if (includeExts.has(ext)) {
|
||||||
|
files.push(path.join(dir, entry.name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scanFile(filePath) {
|
||||||
|
let content = '';
|
||||||
|
try {
|
||||||
|
content = fs.readFileSync(filePath, 'utf8');
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = content.split(/\r?\n/);
|
||||||
|
const hits = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < lines.length; i++) {
|
||||||
|
const line = lines[i];
|
||||||
|
for (const { name, regex } of patterns) {
|
||||||
|
regex.lastIndex = 0;
|
||||||
|
if (regex.test(line)) {
|
||||||
|
const preview = line.replace(/\t/g, '\\t').slice(0, 200);
|
||||||
|
hits.push({ line: i + 1, name, preview });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hits;
|
||||||
|
}
|
||||||
|
|
||||||
|
const files = walk(root);
|
||||||
|
const issues = [];
|
||||||
|
|
||||||
|
for (const file of files) {
|
||||||
|
const hits = scanFile(file);
|
||||||
|
if (hits.length) {
|
||||||
|
issues.push({ file, hits });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (issues.length) {
|
||||||
|
console.error('Garbled text check failed:');
|
||||||
|
for (const issue of issues) {
|
||||||
|
const rel = path.relative(root, issue.file);
|
||||||
|
for (const hit of issue.hits) {
|
||||||
|
console.error(`- ${rel}:${hit.line} [${hit.name}] ${hit.preview}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
} else {
|
||||||
|
console.log('Garbled text check passed.');
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user