2.0变量 , 向量总结正式推送

This commit is contained in:
RT15548
2026-02-16 00:30:59 +08:00
parent 17b1fe9091
commit cd9fe53f84
75 changed files with 48287 additions and 12186 deletions

8
.editorconfig Normal file
View File

@@ -0,0 +1,8 @@
root = true
[*]
charset = utf-8
indent_style = space
indent_size = 4
# end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true

3
.eslintignore Normal file
View File

@@ -0,0 +1,3 @@
libs/**
**/libs/**
**/*.min.js

68
.eslintrc.cjs Normal file
View File

@@ -0,0 +1,68 @@
module.exports = {
root: true,
extends: [
'eslint:recommended',
'plugin:jsdoc/recommended',
],
plugins: [
'jsdoc',
'security',
'no-unsanitized',
],
env: {
browser: true,
jquery: true,
es6: true,
},
globals: {
toastr: 'readonly',
Fuse: 'readonly',
globalThis: 'readonly',
SillyTavern: 'readonly',
ePub: 'readonly',
pdfjsLib: 'readonly',
echarts: 'readonly',
},
parserOptions: {
ecmaVersion: 'latest',
sourceType: 'module',
},
rules: {
'no-eval': 'warn',
'no-implied-eval': 'warn',
'no-new-func': 'warn',
'no-script-url': 'warn',
'no-unsanitized/method': 'warn',
'no-unsanitized/property': 'warn',
'security/detect-object-injection': 'off',
'security/detect-non-literal-regexp': 'off',
'security/detect-unsafe-regex': 'off',
'no-restricted-syntax': [
'warn',
{
selector: 'CallExpression[callee.property.name="postMessage"][arguments.1.value="*"]',
message: 'Avoid postMessage(..., "*"); use a trusted origin or the shared iframe messaging helper.',
},
{
selector: 'CallExpression[callee.property.name="addEventListener"][arguments.0.value="message"]',
message: 'All message listeners must validate origin/source (use isTrustedMessage).',
},
],
'no-undef': 'error',
'no-unused-vars': ['warn', { args: 'none' }],
'eqeqeq': 'off',
'no-empty': ['error', { allowEmptyCatch: true }],
'no-inner-declarations': 'off',
'no-constant-condition': ['error', { checkLoops: false }],
'no-useless-catch': 'off',
'no-control-regex': 'off',
'no-mixed-spaces-and-tabs': 'off',
'jsdoc/require-jsdoc': 'off',
'jsdoc/require-param': 'off',
'jsdoc/require-returns': 'off',
'jsdoc/require-param-description': 'off',
'jsdoc/require-returns-description': 'off',
'jsdoc/check-types': 'off',
'jsdoc/tag-lines': 'off',
},
};

1
.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
* text=auto eol=lf

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
node_modules/

121
README.md
View File

@@ -1,120 +1,7 @@
# LittleWhiteBox
# LittleWhiteBox
## 📁 目录结构
一个面向 SillyTavern 的多功能扩展,包含剧情总结/记忆系统、变量系统、任务与多种面板能力。集成了画图、流式生成、模板编辑、调试面板等组件,适合用于复杂玩法与长期剧情记录。
```
LittleWhiteBox/
├── index.js # 入口:初始化/注册所有模块
├── manifest.json # 插件清单:版本/依赖/入口
├── settings.html # 主设置页:模块开关/UI
├── style.css # 全局样式
├── README.md # 说明文档
├── .eslintrc.cjs # ESLint 规则
├── .eslintignore # ESLint 忽略
├── .gitignore # Git 忽略
├── package.json # 开发依赖/脚本
├── package-lock.json # 依赖锁定
├── jsconfig.json # 编辑器提示
├── core/ # 核心基础设施不直接做功能UI
│ ├── constants.js # 常量/路径
│ ├── event-manager.js # 统一事件管理
│ ├── debug-core.js # 日志/缓存注册
│ ├── slash-command.js # 斜杠命令封装
│ ├── variable-path.js # 变量路径解析
│ ├── server-storage.js # 服务器存储(防抖/重试)
│ ├── wrapper-inline.js # iframe 内联脚本
│ └── iframe-messaging.js # postMessage 封装与 origin 校验
├── widgets/ # 通用UI组件跨功能复用
│ ├── message-toolbar.js # 消息区工具条注册/管理
│ └── button-collapse.js # 消息区按钮收纳
├── modules/ # 功能模块每个功能自带UI
│ ├── control-audio.js # 音频权限控制
│ ├── iframe-renderer.js # iframe 渲染
│ ├── immersive-mode.js # 沉浸模式
│ ├── message-preview.js # 消息预览/拦截
│ ├── streaming-generation.js # 生成相关功能xbgenraw
│ │
│ ├── debug-panel/ # 调试面板
│ │ ├── debug-panel.js # 悬浮窗控制
│ │ └── debug-panel.html # UI
│ │
│ ├── fourth-wall/ # 四次元壁
│ │ ├── fourth-wall.js # 逻辑
│ │ ├── fourth-wall.html # UI
│ │ ├── fw-image.js # 图像交互
│ │ ├── fw-message-enhancer.js # 消息增强
│ │ ├── fw-prompt.js # 提示词编辑
│ │ └── fw-voice.js # 语音展示
│ │
│ ├── novel-draw/ # 画图
│ │ ├── novel-draw.js # 主逻辑
│ │ ├── novel-draw.html # UI
│ │ ├── llm-service.js # LLM 分析
│ │ ├── floating-panel.js # 悬浮面板
│ │ ├── gallery-cache.js # 缓存
│ │ ├── image-live-effect.js # Live 动效
│ │ ├── cloud-presets.js # 云预设
│ │ └── TAG编写指南.md # 文档
│ │
│ ├── tts/ # TTS
│ │ ├── tts.js # 主逻辑
│ │ ├── tts-auth-provider.js # 鉴权
│ │ ├── tts-free-provider.js # 试用
│ │ ├── tts-api.js # API
│ │ ├── tts-text.js # 文本处理
│ │ ├── tts-player.js # 播放器
│ │ ├── tts-panel.js # 气泡UI
│ │ ├── tts-cache.js # 缓存
│ │ ├── tts-overlay.html # 设置UI
│ │ ├── tts-voices.js # 音色数据
│ │ ├── 开通管理.png # 说明图
│ │ ├── 获取ID和KEY.png # 说明图
│ │ └── 声音复刻.png # 说明图
│ │
│ ├── scheduled-tasks/ # 定时任务
│ │ ├── scheduled-tasks.js # 调度
│ │ ├── scheduled-tasks.html # UI
│ │ └── embedded-tasks.html # 嵌入UI
│ │
│ ├── template-editor/ # 模板编辑器
│ │ ├── template-editor.js # 逻辑
│ │ └── template-editor.html # UI
│ │
│ ├── story-outline/ # 故事大纲
│ │ ├── story-outline.js # 逻辑
│ │ ├── story-outline.html # UI
│ │ └── story-outline-prompt.js # 提示词
│ │
│ ├── story-summary/ # 剧情总结
│ │ ├── story-summary.js # 逻辑
│ │ ├── story-summary.html # UI
│ │ └── llm-service.js # LLM 服务
│ │
│ └── variables/ # 变量系统
│ ├── var-commands.js # 命令
│ ├── varevent-editor.js # 编辑器
│ ├── variables-core.js # 核心
│ └── variables-panel.js # 面板
├── bridges/ # 外部服务桥接
│ ├── call-generate-service.js # ST 生成服务
│ ├── worldbook-bridge.js # 世界书桥接
│ └── wrapper-iframe.js # iframe 客户端脚本
├── libs/ # 第三方库
│ └── pixi.min.js # PixiJS
└── docs/ # 许可/声明
├── COPYRIGHT
├── LICENSE.md
└── NOTICE
## 许可证
node_modules/ # 本地依赖(不提交)
```
## 📄 许可证
详见 `docs/LICENSE.md`
详见 `docs/LICENSE.md`

View File

@@ -1,4 +1,4 @@
import { EventCenter } from "./event-manager.js";
import { EventCenter } from "./event-manager.js";
const DEFAULT_MAX_LOGS = 200;
@@ -110,14 +110,14 @@ class LoggerCore {
});
}
info(moduleId, message) {
this._log("info", moduleId, message, null);
info(moduleId, ...args) {
const msg = args.map(a => (typeof a === 'string' ? a : safeStringify(a))).join(' ');
this._log('info', moduleId, msg, null);
}
warn(moduleId, message) {
this._log("warn", moduleId, message, null);
warn(moduleId, ...args) {
const msg = args.map(a => (typeof a === 'string' ? a : safeStringify(a))).join(' ');
this._log('warn', moduleId, msg, null);
}
error(moduleId, message, err) {
this._log("error", moduleId, message, err || null);
}

View File

@@ -183,3 +183,4 @@ export const StoryOutlineStorage = new StorageFile('LittleWhiteBox_StoryOutline.
export const NovelDrawStorage = new StorageFile('LittleWhiteBox_NovelDraw.json', { debounceMs: 800 });
export const TtsStorage = new StorageFile('LittleWhiteBox_TTS.json', { debounceMs: 800 });
export const CommonSettingStorage = new StorageFile('LittleWhiteBox_CommonSettings.json', { debounceMs: 1000 });
export const VectorStorage = new StorageFile('LittleWhiteBox_Vectors.json', { debounceMs: 3000 });

View File

@@ -93,3 +93,7 @@ For complete license terms, see LICENSE.md
For attribution requirements, see COPYRIGHT
Last updated: 2025-01-14
TinySegmenter 0.2
Copyright (c) 2008 Taku Kudo
MIT License
http://www.chasen.org/~taku/software/TinySegmenter/

View File

@@ -40,6 +40,7 @@ extension_settings[EXT_ID] = extension_settings[EXT_ID] || {
audio: { enabled: true },
variablesPanel: { enabled: false },
variablesCore: { enabled: true },
variablesMode: '1.0',
storySummary: { enabled: true },
storyOutline: { enabled: false },
novelDraw: { enabled: false },
@@ -273,7 +274,7 @@ function toggleSettingsControls(enabled) {
'scheduled_tasks_enabled', 'xiaobaix_template_enabled',
'xiaobaix_immersive_enabled', 'xiaobaix_fourth_wall_enabled',
'xiaobaix_audio_enabled', 'xiaobaix_variables_panel_enabled',
'xiaobaix_use_blob', 'xiaobaix_variables_core_enabled', 'Wrapperiframe', 'xiaobaix_render_enabled',
'xiaobaix_use_blob', 'xiaobaix_variables_core_enabled', 'xiaobaix_variables_mode', 'Wrapperiframe', 'xiaobaix_render_enabled',
'xiaobaix_max_rendered', 'xiaobaix_story_outline_enabled', 'xiaobaix_story_summary_enabled',
'xiaobaix_novel_draw_enabled', 'xiaobaix_novel_draw_open_settings',
'xiaobaix_tts_enabled', 'xiaobaix_tts_open_settings'
@@ -430,6 +431,15 @@ async function setupSettings() {
});
});
// variables mode selector
$("#xiaobaix_variables_mode")
.val(settings.variablesMode || "1.0")
.on("change", function () {
settings.variablesMode = String($(this).val() || "1.0");
saveSettingsDebounced();
toastr.info(`变量系统已切换为 ${settings.variablesMode}`);
});
$("#xiaobaix_novel_draw_open_settings").on("click", function () {
if (!isXiaobaixEnabled) return;
if (settings.novelDraw?.enabled && window.xiaobaixNovelDraw?.openSettings) {

5912
libs/dexie.mjs Normal file

File diff suppressed because it is too large Load Diff

2665
libs/fflate.mjs Normal file

File diff suppressed because it is too large Load Diff

25
libs/jieba-wasm/LICENSE Normal file
View File

@@ -0,0 +1,25 @@
Copyright (c) 2018 fengkx <liangkx8237@gmail.com>
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

134
libs/jieba-wasm/README.md Normal file
View File

@@ -0,0 +1,134 @@
# jieba-wasm
> [jieba-rs](https://github.com/messense/jieba-rs) 的 wasm binding
_编译成 WASM 摆脱编译 Node Addon 的烦恼_
# Usage
## Node.js
```js
const {
cut,
cut_all,
cut_for_search,
tokenize,
add_word,
} = require("jieba-wasm");
cut("中华人民共和国武汉市长江大桥", true);
// [ '中华人民共和国', '武汉市', '长江大桥' ]
cut_all("中华人民共和国武汉市长江大桥", true);
/*
[
'中', '中华',
'中华人民', '中华人民共和国',
'华', '华人',
'人', '人民',
'人民共和国', '民',
'共', '共和',
'共和国', '和',
'国', '武',
'武汉', '武汉市',
'汉', '市',
'市长', '长',
'长江', '长江大桥',
'江', '大',
'大桥', '桥'
]
*/
cut_for_search("中华人民共和国武汉市长江大桥", true);
/*
[
'中华', '华人',
'人民', '共和',
'共和国', '中华人民共和国',
'武汉', '武汉市',
'长江', '大桥',
'长江大桥'
]
*/
tokenize("中华人民共和国武汉市长江大桥", "default", true);
/*
[
{ word: '中华人民共和国', start: 0, end: 7 },
{ word: '武汉市', start: 7, end: 10 },
{ word: '长江大桥', start: 10, end: 14 }
]
*/
tokenize("中华人民共和国武汉市长江大桥", "search", true);
/*
[
{ word: '中华', start: 0, end: 2 },
{ word: '华人', start: 1, end: 3 },
{ word: '人民', start: 2, end: 4 },
{ word: '共和', start: 4, end: 6 },
{ word: '共和国', start: 4, end: 7 },
{ word: '中华人民共和国', start: 0, end: 7 },
{ word: '武汉', start: 7, end: 9 },
{ word: '武汉市', start: 7, end: 10 },
{ word: '长江', start: 10, end: 12 },
{ word: '大桥', start: 12, end: 14 },
{ word: '长江大桥', start: 10, end: 14 }
]
*/
cut("桥大江长市汉武的省北湖国和共民人华中");
/*
[
'桥', '大江', '长',
'市', '汉', '武',
'的', '省', '北湖',
'国', '和', '共',
'民', '人', '华中'
]
*/
["桥大江长", "市汉武", "省北湖", "国和共民人华中"].map((word) => {
add_word(word);
});
cut("桥大江长市汉武的省北湖国和共民人华中");
// ["桥大江长", "市汉武", "的", "省北湖", "国和共民人华中"];
with_dict("自动借书机 1 n"); // 导入自定义字典,词条格式:词语 词频 词性(可选),以换行符分隔
cut("你好我是一个自动借书机");
// ["你好", "我", "是", "一个", "自动借书机"];
```
## Browser
```ts
import init, { cut } from 'jieba-wasm';
// 重要:使用前必须初始化
await init();
cut("中华人民共和国武汉市长江大桥", true);
// [ '中华人民共和国', '武汉市', '长江大桥' ]
```
# 示例 Demo
## 安装依赖
安装 wasm-bindgen 和 wasm-opt
```bash
cargo install wasm-bindgen-cli --locked
cargo install wasm-opt --locked
```
## 前期准备
首先保证存在 rust 环境,然后运行以下命令
```bash
npm run build:cargo
npm run build
```
## 运行浏览器端示例
```bash
cd demo/web
npm install
npm run dev
```
# Piror Art
https://github.com/messense/jieba-rs

73
libs/jieba-wasm/jieba_rs_wasm.d.ts vendored Normal file
View File

@@ -0,0 +1,73 @@
/* tslint:disable */
/* eslint-disable */
export function cut(text: string, hmm?: boolean | null): string[];
export function cut_all(text: string): string[];
export function cut_for_search(text: string, hmm?: boolean | null): string[];
export function tokenize(text: string, mode: string, hmm?: boolean | null): Token[];
export function add_word(word: string, freq?: number | null, tag?: string | null): number;
export function tag(sentence: string, hmm?: boolean | null): Tag[];
export function with_dict(dict: string): void;
/** Represents a single token with its word and position. */
export interface Token {
word: string;
start: number;
end: number;
}
/** Represents a single word and its part-of-speech tag. */
export interface Tag {
word: string;
tag: string;
}
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
export interface InitOutput {
readonly memory: WebAssembly.Memory;
readonly cut: (a: number, b: number, c: number) => [number, number];
readonly cut_all: (a: number, b: number) => [number, number];
readonly cut_for_search: (a: number, b: number, c: number) => [number, number];
readonly tokenize: (a: number, b: number, c: number, d: number, e: number) => [number, number, number, number];
readonly add_word: (a: number, b: number, c: number, d: number, e: number) => number;
readonly tag: (a: number, b: number, c: number) => [number, number];
readonly with_dict: (a: number, b: number) => [number, number];
readonly rust_zstd_wasm_shim_qsort: (a: number, b: number, c: number, d: number) => void;
readonly rust_zstd_wasm_shim_malloc: (a: number) => number;
readonly rust_zstd_wasm_shim_memcmp: (a: number, b: number, c: number) => number;
readonly rust_zstd_wasm_shim_calloc: (a: number, b: number) => number;
readonly rust_zstd_wasm_shim_free: (a: number) => void;
readonly rust_zstd_wasm_shim_memcpy: (a: number, b: number, c: number) => number;
readonly rust_zstd_wasm_shim_memmove: (a: number, b: number, c: number) => number;
readonly rust_zstd_wasm_shim_memset: (a: number, b: number, c: number) => number;
readonly __wbindgen_malloc: (a: number, b: number) => number;
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
readonly __wbindgen_export_2: WebAssembly.Table;
readonly __externref_drop_slice: (a: number, b: number) => void;
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
readonly __externref_table_dealloc: (a: number) => void;
readonly __wbindgen_start: () => void;
}
export type SyncInitInput = BufferSource | WebAssembly.Module;
/**
* Instantiates the given `module`, which can either be bytes or
* a precompiled `WebAssembly.Module`.
*
* @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
*
* @returns {InitOutput}
*/
export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
/**
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
* for everything else, calls `WebAssembly.instantiate` directly.
*
* @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
*
* @returns {Promise<InitOutput>}
*/
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;

View File

@@ -0,0 +1,438 @@
let wasm;
let cachedUint8ArrayMemory0 = null;
function getUint8ArrayMemory0() {
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
}
return cachedUint8ArrayMemory0;
}
let cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
const MAX_SAFARI_DECODE_BYTES = 2146435072;
let numBytesDecoded = 0;
function decodeText(ptr, len) {
numBytesDecoded += len;
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
cachedTextDecoder.decode();
numBytesDecoded = len;
}
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
}
function getStringFromWasm0(ptr, len) {
ptr = ptr >>> 0;
return decodeText(ptr, len);
}
function debugString(val) {
// primitive types
const type = typeof val;
if (type == 'number' || type == 'boolean' || val == null) {
return `${val}`;
}
if (type == 'string') {
return `"${val}"`;
}
if (type == 'symbol') {
const description = val.description;
if (description == null) {
return 'Symbol';
} else {
return `Symbol(${description})`;
}
}
if (type == 'function') {
const name = val.name;
if (typeof name == 'string' && name.length > 0) {
return `Function(${name})`;
} else {
return 'Function';
}
}
// objects
if (Array.isArray(val)) {
const length = val.length;
let debug = '[';
if (length > 0) {
debug += debugString(val[0]);
}
for(let i = 1; i < length; i++) {
debug += ', ' + debugString(val[i]);
}
debug += ']';
return debug;
}
// Test for built-in
const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val));
let className;
if (builtInMatches && builtInMatches.length > 1) {
className = builtInMatches[1];
} else {
// Failed to match the standard '[object ClassName]'
return toString.call(val);
}
if (className == 'Object') {
// we're a user defined class or Object
// JSON.stringify avoids problems with cycles, and is generally much
// easier than looping through ownProperties of `val`.
try {
return 'Object(' + JSON.stringify(val) + ')';
} catch (_) {
return 'Object';
}
}
// errors
if (val instanceof Error) {
return `${val.name}: ${val.message}\n${val.stack}`;
}
// TODO we could test for more things here, like `Set`s and `Map`s.
return className;
}
let WASM_VECTOR_LEN = 0;
const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
? function (arg, view) {
return cachedTextEncoder.encodeInto(arg, view);
}
: function (arg, view) {
const buf = cachedTextEncoder.encode(arg);
view.set(buf);
return {
read: arg.length,
written: buf.length
};
});
function passStringToWasm0(arg, malloc, realloc) {
if (realloc === undefined) {
const buf = cachedTextEncoder.encode(arg);
const ptr = malloc(buf.length, 1) >>> 0;
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
WASM_VECTOR_LEN = buf.length;
return ptr;
}
let len = arg.length;
let ptr = malloc(len, 1) >>> 0;
const mem = getUint8ArrayMemory0();
let offset = 0;
for (; offset < len; offset++) {
const code = arg.charCodeAt(offset);
if (code > 0x7F) break;
mem[ptr + offset] = code;
}
if (offset !== len) {
if (offset !== 0) {
arg = arg.slice(offset);
}
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
const ret = encodeString(arg, view);
offset += ret.written;
ptr = realloc(ptr, len, offset, 1) >>> 0;
}
WASM_VECTOR_LEN = offset;
return ptr;
}
let cachedDataViewMemory0 = null;
function getDataViewMemory0() {
if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
}
return cachedDataViewMemory0;
}
function isLikeNone(x) {
return x === undefined || x === null;
}
function getArrayJsValueFromWasm0(ptr, len) {
ptr = ptr >>> 0;
const mem = getDataViewMemory0();
const result = [];
for (let i = ptr; i < ptr + 4 * len; i += 4) {
result.push(wasm.__wbindgen_export_2.get(mem.getUint32(i, true)));
}
wasm.__externref_drop_slice(ptr, len);
return result;
}
/**
* @param {string} text
* @param {boolean | null} [hmm]
* @returns {string[]}
*/
export function cut(text, hmm) {
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len0 = WASM_VECTOR_LEN;
const ret = wasm.cut(ptr0, len0, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
return v2;
}
/**
* @param {string} text
* @returns {string[]}
*/
export function cut_all(text) {
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len0 = WASM_VECTOR_LEN;
const ret = wasm.cut_all(ptr0, len0);
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
return v2;
}
/**
* @param {string} text
* @param {boolean | null} [hmm]
* @returns {string[]}
*/
export function cut_for_search(text, hmm) {
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len0 = WASM_VECTOR_LEN;
const ret = wasm.cut_for_search(ptr0, len0, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
return v2;
}
function takeFromExternrefTable0(idx) {
const value = wasm.__wbindgen_export_2.get(idx);
wasm.__externref_table_dealloc(idx);
return value;
}
/**
* @param {string} text
* @param {string} mode
* @param {boolean | null} [hmm]
* @returns {Token[]}
*/
export function tokenize(text, mode, hmm) {
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len0 = WASM_VECTOR_LEN;
const ptr1 = passStringToWasm0(mode, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len1 = WASM_VECTOR_LEN;
const ret = wasm.tokenize(ptr0, len0, ptr1, len1, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
if (ret[3]) {
throw takeFromExternrefTable0(ret[2]);
}
var v3 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
return v3;
}
/**
* @param {string} word
* @param {number | null} [freq]
* @param {string | null} [tag]
* @returns {number}
*/
export function add_word(word, freq, tag) {
const ptr0 = passStringToWasm0(word, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len0 = WASM_VECTOR_LEN;
var ptr1 = isLikeNone(tag) ? 0 : passStringToWasm0(tag, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
var len1 = WASM_VECTOR_LEN;
const ret = wasm.add_word(ptr0, len0, isLikeNone(freq) ? 0x100000001 : (freq) >>> 0, ptr1, len1);
return ret >>> 0;
}
/**
* @param {string} sentence
* @param {boolean | null} [hmm]
* @returns {Tag[]}
*/
export function tag(sentence, hmm) {
const ptr0 = passStringToWasm0(sentence, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len0 = WASM_VECTOR_LEN;
const ret = wasm.tag(ptr0, len0, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
return v2;
}
/**
* @param {string} dict
*/
export function with_dict(dict) {
const ptr0 = passStringToWasm0(dict, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len0 = WASM_VECTOR_LEN;
const ret = wasm.with_dict(ptr0, len0);
if (ret[1]) {
throw takeFromExternrefTable0(ret[0]);
}
}
const EXPECTED_RESPONSE_TYPES = new Set(['basic', 'cors', 'default']);
async function __wbg_load(module, imports) {
if (typeof Response === 'function' && module instanceof Response) {
if (typeof WebAssembly.instantiateStreaming === 'function') {
try {
return await WebAssembly.instantiateStreaming(module, imports);
} catch (e) {
const validResponse = module.ok && EXPECTED_RESPONSE_TYPES.has(module.type);
if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') {
console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
} else {
throw e;
}
}
}
const bytes = await module.arrayBuffer();
return await WebAssembly.instantiate(bytes, imports);
} else {
const instance = await WebAssembly.instantiate(module, imports);
if (instance instanceof WebAssembly.Instance) {
return { instance, module };
} else {
return instance;
}
}
}
function __wbg_get_imports() {
const imports = {};
imports.wbg = {};
imports.wbg.__wbg_Error_0497d5bdba9362e5 = function(arg0, arg1) {
const ret = Error(getStringFromWasm0(arg0, arg1));
return ret;
};
imports.wbg.__wbg_new_07b483f72211fd66 = function() {
const ret = new Object();
return ret;
};
imports.wbg.__wbg_set_3f1d0b984ed272ed = function(arg0, arg1, arg2) {
arg0[arg1] = arg2;
};
imports.wbg.__wbindgen_bigint_from_u64 = function(arg0) {
const ret = BigInt.asUintN(64, arg0);
return ret;
};
imports.wbg.__wbindgen_debug_string = function(arg0, arg1) {
const ret = debugString(arg1);
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
const len1 = WASM_VECTOR_LEN;
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
};
imports.wbg.__wbindgen_init_externref_table = function() {
const table = wasm.__wbindgen_export_2;
const offset = table.grow(4);
table.set(0, undefined);
table.set(offset + 0, undefined);
table.set(offset + 1, null);
table.set(offset + 2, true);
table.set(offset + 3, false);
;
};
imports.wbg.__wbindgen_number_new = function(arg0) {
const ret = arg0;
return ret;
};
imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
const ret = getStringFromWasm0(arg0, arg1);
return ret;
};
imports.wbg.__wbindgen_throw = function(arg0, arg1) {
throw new Error(getStringFromWasm0(arg0, arg1));
};
return imports;
}
function __wbg_init_memory(imports, memory) {
}
function __wbg_finalize_init(instance, module) {
wasm = instance.exports;
__wbg_init.__wbindgen_wasm_module = module;
cachedDataViewMemory0 = null;
cachedUint8ArrayMemory0 = null;
wasm.__wbindgen_start();
return wasm;
}
function initSync(module) {
if (wasm !== undefined) return wasm;
if (typeof module !== 'undefined') {
if (Object.getPrototypeOf(module) === Object.prototype) {
({module} = module)
} else {
console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
}
}
const imports = __wbg_get_imports();
__wbg_init_memory(imports);
if (!(module instanceof WebAssembly.Module)) {
module = new WebAssembly.Module(module);
}
const instance = new WebAssembly.Instance(module, imports);
return __wbg_finalize_init(instance, module);
}
async function __wbg_init(module_or_path) {
if (wasm !== undefined) return wasm;
if (typeof module_or_path !== 'undefined') {
if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
({module_or_path} = module_or_path)
} else {
console.warn('using deprecated parameters for the initialization function; pass a single object instead')
}
}
if (typeof module_or_path === 'undefined') {
module_or_path = new URL('jieba_rs_wasm_bg.wasm', import.meta.url);
}
const imports = __wbg_get_imports();
if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
module_or_path = fetch(module_or_path);
}
__wbg_init_memory(imports);
const { instance, module } = await __wbg_load(await module_or_path, imports);
return __wbg_finalize_init(instance, module);
}
export { initSync };
export default __wbg_init;

Binary file not shown.

View File

@@ -0,0 +1,25 @@
/* tslint:disable */
/* eslint-disable */
export const memory: WebAssembly.Memory;
export const cut: (a: number, b: number, c: number) => [number, number];
export const cut_all: (a: number, b: number) => [number, number];
export const cut_for_search: (a: number, b: number, c: number) => [number, number];
export const tokenize: (a: number, b: number, c: number, d: number, e: number) => [number, number, number, number];
export const add_word: (a: number, b: number, c: number, d: number, e: number) => number;
export const tag: (a: number, b: number, c: number) => [number, number];
export const with_dict: (a: number, b: number) => [number, number];
export const rust_zstd_wasm_shim_qsort: (a: number, b: number, c: number, d: number) => void;
export const rust_zstd_wasm_shim_malloc: (a: number) => number;
export const rust_zstd_wasm_shim_memcmp: (a: number, b: number, c: number) => number;
export const rust_zstd_wasm_shim_calloc: (a: number, b: number) => number;
export const rust_zstd_wasm_shim_free: (a: number) => void;
export const rust_zstd_wasm_shim_memcpy: (a: number, b: number, c: number) => number;
export const rust_zstd_wasm_shim_memmove: (a: number, b: number, c: number) => number;
export const rust_zstd_wasm_shim_memset: (a: number, b: number, c: number) => number;
export const __wbindgen_malloc: (a: number, b: number) => number;
export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
export const __wbindgen_export_2: WebAssembly.Table;
export const __externref_drop_slice: (a: number, b: number) => void;
export const __wbindgen_free: (a: number, b: number, c: number) => void;
export const __externref_table_dealloc: (a: number) => void;
export const __wbindgen_start: () => void;

View File

@@ -0,0 +1,129 @@
{
"name": "jieba-wasm",
"version": "2.4.0",
"description": "WASM binding to jieba-rs",
"main": "./pkg/nodejs/jieba_rs_wasm.js",
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
"exports": {
".": {
"node": {
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
"default": "./pkg/nodejs/jieba_rs_wasm.js"
},
"deno": {
"types": "./pkg/deno/jieba_rs_wasm.d.ts",
"default": "./pkg/deno/jieba_rs_wasm.js"
},
"browser": {
"types": "./pkg/web/jieba_rs_wasm.d.ts",
"default": "./pkg/web/jieba_rs_wasm.js"
},
"import": {
"types": "./pkg/web/jieba_rs_wasm.d.ts",
"default": "./pkg/web/jieba_rs_wasm.js"
},
"require": {
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
"default": "./pkg/nodejs/jieba_rs_wasm.js"
}
},
"./web": {
"types": "./pkg/web/jieba_rs_wasm.d.ts",
"default": "./pkg/web/jieba_rs_wasm.js"
},
"./node": {
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
"default": "./pkg/nodejs/jieba_rs_wasm.js"
},
"./deno": {
"types": "./pkg/deno/jieba_rs_wasm.d.ts",
"default": "./pkg/deno/jieba_rs_wasm.js"
}
},
"directories": {
"test": "tests"
},
"scripts": {
"build": "wireit",
"build:cargo": "wireit",
"build:bundler": "wireit",
"build:nodejs": "wireit",
"build:deno": "wireit",
"build:web": "wireit",
"build:opt": "wireit",
"test": "echo \"Error: no test specified\" && exit 1"
},
"wireit": {
"build:cargo": {
"command": "cargo build --release --target wasm32-unknown-unknown"
},
"build:bundler": {
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/bundler --target bundler",
"dependencies": [
"build:cargo"
]
},
"build:nodejs": {
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/nodejs --target nodejs",
"dependencies": [
"build:cargo"
]
},
"build:deno": {
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/deno --target deno",
"dependencies": [
"build:cargo"
]
},
"build:web": {
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/web --target web",
"dependencies": [
"build:cargo"
]
},
"build": {
"dependencies": [
"build:cargo",
"build:bundler",
"build:nodejs",
"build:deno",
"build:web",
"build:opt"
]
},
"build:opt": {
"command": "node scripts/opt.js",
"dependencies": [
"build:cargo",
"build:bundler",
"build:nodejs",
"build:deno",
"build:web"
]
}
},
"files": [
"pkg/**/*"
],
"repository": {
"type": "git",
"url": "git+https://github.com/fengkx/jieba-wasm.git"
},
"keywords": [
"wasm",
"jieba",
"chinese",
"segment",
"中文分词"
],
"author": "fengkx",
"license": "MIT",
"bugs": {
"url": "https://github.com/fengkx/jieba-wasm/issues"
},
"homepage": "https://github.com/fengkx/jieba-wasm#readme",
"devDependencies": {
"@jsdevtools/ez-spawn": "^3.0.4",
"wireit": "^0.14.4"
}
}

3851
libs/js-yaml.mjs Normal file

File diff suppressed because it is too large Load Diff

2036
libs/minisearch.mjs Normal file

File diff suppressed because it is too large Load Diff

177
libs/tiny-segmenter.js Normal file
View File

@@ -0,0 +1,177 @@
// TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript
// (c) 2008 Taku Kudo <taku@chasen.org>
// TinySegmenter is freely distributable under the terms of a new BSD licence.
// For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt
function TinySegmenter() {
var patterns = {
"[一二三四五六七八九十百千万億兆]":"M",
"[一-龠々〆ヵヶ]":"H",
"[ぁ-ん]":"I",
"[ァ-ヴーア-ン゙ー]":"K",
"[a-zA-Z--]":"A",
"[0-9-]":"N"
}
this.chartype_ = [];
for (var i in patterns) {
var regexp = new RegExp;
regexp.compile(i)
this.chartype_.push([regexp, patterns[i]]);
}
this.BIAS__ = -332
this.BC1__ = {"HH":6,"II":2461,"KH":406,"OH":-1378};
this.BC2__ = {"AA":-3267,"AI":2744,"AN":-878,"HH":-4070,"HM":-1711,"HN":4012,"HO":3761,"IA":1327,"IH":-1184,"II":-1332,"IK":1721,"IO":5492,"KI":3831,"KK":-8741,"MH":-3132,"MK":3334,"OO":-2920};
this.BC3__ = {"HH":996,"HI":626,"HK":-721,"HN":-1307,"HO":-836,"IH":-301,"KK":2762,"MK":1079,"MM":4034,"OA":-1652,"OH":266};
this.BP1__ = {"BB":295,"OB":304,"OO":-125,"UB":352};
this.BP2__ = {"BO":60,"OO":-1762};
this.BQ1__ = {"BHH":1150,"BHM":1521,"BII":-1158,"BIM":886,"BMH":1208,"BNH":449,"BOH":-91,"BOO":-2597,"OHI":451,"OIH":-296,"OKA":1851,"OKH":-1020,"OKK":904,"OOO":2965};
this.BQ2__ = {"BHH":118,"BHI":-1159,"BHM":466,"BIH":-919,"BKK":-1720,"BKO":864,"OHH":-1139,"OHM":-181,"OIH":153,"UHI":-1146};
this.BQ3__ = {"BHH":-792,"BHI":2664,"BII":-299,"BKI":419,"BMH":937,"BMM":8335,"BNN":998,"BOH":775,"OHH":2174,"OHM":439,"OII":280,"OKH":1798,"OKI":-793,"OKO":-2242,"OMH":-2402,"OOO":11699};
this.BQ4__ = {"BHH":-3895,"BIH":3761,"BII":-4654,"BIK":1348,"BKK":-1806,"BMI":-3385,"BOO":-12396,"OAH":926,"OHH":266,"OHK":-2036,"ONN":-973};
this.BW1__ = {",と":660,",同":727,"B1あ":1404,"B1同":542,"、と":660,"、同":727,"」と":1682,"あっ":1505,"いう":1743,"いっ":-2055,"いる":672,"うし":-4817,"うん":665,"から":3472,"がら":600,"こう":-790,"こと":2083,"こん":-1262,"さら":-4143,"さん":4573,"した":2641,"して":1104,"すで":-3399,"そこ":1977,"それ":-871,"たち":1122,"ため":601,"った":3463,"つい":-802,"てい":805,"てき":1249,"でき":1127,"です":3445,"では":844,"とい":-4915,"とみ":1922,"どこ":3887,"ない":5713,"なっ":3015,"など":7379,"なん":-1113,"にし":2468,"には":1498,"にも":1671,"に対":-912,"の一":-501,"の中":741,"ませ":2448,"まで":1711,"まま":2600,"まる":-2155,"やむ":-1947,"よっ":-2565,"れた":2369,"れで":-913,"をし":1860,"を見":731,"亡く":-1886,"京都":2558,"取り":-2784,"大き":-2604,"大阪":1497,"平方":-2314,"引き":-1336,"日本":-195,"本当":-2423,"毎日":-2113,"目指":-724,"B1あ":1404,"B1同":542,"」と":1682};
this.BW2__ = {"..":-11822,"11":-669,"――":-5730,"":-13175,"いう":-1609,"うか":2490,"かし":-1350,"かも":-602,"から":-7194,"かれ":4612,"がい":853,"がら":-3198,"きた":1941,"くな":-1597,"こと":-8392,"この":-4193,"させ":4533,"され":13168,"さん":-3977,"しい":-1819,"しか":-545,"した":5078,"して":972,"しな":939,"その":-3744,"たい":-1253,"たた":-662,"ただ":-3857,"たち":-786,"たと":1224,"たは":-939,"った":4589,"って":1647,"っと":-2094,"てい":6144,"てき":3640,"てく":2551,"ては":-3110,"ても":-3065,"でい":2666,"でき":-1528,"でし":-3828,"です":-4761,"でも":-4203,"とい":1890,"とこ":-1746,"とと":-2279,"との":720,"とみ":5168,"とも":-3941,"ない":-2488,"なが":-1313,"など":-6509,"なの":2614,"なん":3099,"にお":-1615,"にし":2748,"にな":2454,"によ":-7236,"に対":-14943,"に従":-4688,"に関":-11388,"のか":2093,"ので":-7059,"のに":-6041,"のの":-6125,"はい":1073,"はが":-1033,"はず":-2532,"ばれ":1813,"まし":-1316,"まで":-6621,"まれ":5409,"めて":-3153,"もい":2230,"もの":-10713,"らか":-944,"らし":-1611,"らに":-1897,"りし":651,"りま":1620,"れた":4270,"れて":849,"れば":4114,"ろう":6067,"われ":7901,"を通":-11877,"んだ":728,"んな":-4115,"一人":602,"一方":-1375,"一日":970,"一部":-1051,"上が":-4479,"会社":-1116,"出て":2163,"分の":-7758,"同党":970,"同日":-913,"大阪":-2471,"委員":-1250,"少な":-1050,"年度":-8669,"年間":-1626,"府県":-2363,"手権":-1982,"新聞":-4066,"日新":-722,"日本":-7068,"日米":3372,"曜日":-601,"朝鮮":-2355,"本人":-2697,"東京":-1543,"然と":-1384,"社会":-1276,"立て":-990,"第に":-1612,"米国":-4268,"":-669};
this.BW3__ = {"あた":-2194,"あり":719,"ある":3846,"い.":-1185,"い。":-1185,"いい":5308,"いえ":2079,"いく":3029,"いた":2056,"いっ":1883,"いる":5600,"いわ":1527,"うち":1117,"うと":4798,"えと":1454,"か.":2857,"か。":2857,"かけ":-743,"かっ":-4098,"かに":-669,"から":6520,"かり":-2670,"が,":1816,"が、":1816,"がき":-4855,"がけ":-1127,"がっ":-913,"がら":-4977,"がり":-2064,"きた":1645,"けど":1374,"こと":7397,"この":1542,"ころ":-2757,"さい":-714,"さを":976,"し,":1557,"し、":1557,"しい":-3714,"した":3562,"して":1449,"しな":2608,"しま":1200,"す.":-1310,"す。":-1310,"する":6521,"ず,":3426,"ず、":3426,"ずに":841,"そう":428,"た.":8875,"た。":8875,"たい":-594,"たの":812,"たり":-1183,"たる":-853,"だ.":4098,"だ。":4098,"だっ":1004,"った":-4748,"って":300,"てい":6240,"てお":855,"ても":302,"です":1437,"でに":-1482,"では":2295,"とう":-1387,"とし":2266,"との":541,"とも":-3543,"どう":4664,"ない":1796,"なく":-903,"など":2135,"に,":-1021,"に、":-1021,"にし":1771,"にな":1906,"には":2644,"の,":-724,"の、":-724,"の子":-1000,"は,":1337,"は、":1337,"べき":2181,"まし":1113,"ます":6943,"まっ":-1549,"まで":6154,"まれ":-793,"らし":1479,"られ":6820,"るる":3818,"れ,":854,"れ、":854,"れた":1850,"れて":1375,"れば":-3246,"れる":1091,"われ":-605,"んだ":606,"んで":798,"カ月":990,"会議":860,"入り":1232,"大会":2217,"始め":1681,"市":965,"新聞":-5055,"日,":974,"日、":974,"社会":2024,"カ月":990};
this.TC1__ = {"AAA":1093,"HHH":1029,"HHM":580,"HII":998,"HOH":-390,"HOM":-331,"IHI":1169,"IOH":-142,"IOI":-1015,"IOM":467,"MMH":187,"OOI":-1832};
this.TC2__ = {"HHO":2088,"HII":-1023,"HMM":-1154,"IHI":-1965,"KKH":703,"OII":-2649};
this.TC3__ = {"AAA":-294,"HHH":346,"HHI":-341,"HII":-1088,"HIK":731,"HOH":-1486,"IHH":128,"IHI":-3041,"IHO":-1935,"IIH":-825,"IIM":-1035,"IOI":-542,"KHH":-1216,"KKA":491,"KKH":-1217,"KOK":-1009,"MHH":-2694,"MHM":-457,"MHO":123,"MMH":-471,"NNH":-1689,"NNO":662,"OHO":-3393};
this.TC4__ = {"HHH":-203,"HHI":1344,"HHK":365,"HHM":-122,"HHN":182,"HHO":669,"HIH":804,"HII":679,"HOH":446,"IHH":695,"IHO":-2324,"IIH":321,"III":1497,"IIO":656,"IOO":54,"KAK":4845,"KKA":3386,"KKK":3065,"MHH":-405,"MHI":201,"MMH":-241,"MMM":661,"MOM":841};
this.TQ1__ = {"BHHH":-227,"BHHI":316,"BHIH":-132,"BIHH":60,"BIII":1595,"BNHH":-744,"BOHH":225,"BOOO":-908,"OAKK":482,"OHHH":281,"OHIH":249,"OIHI":200,"OIIH":-68};
this.TQ2__ = {"BIHH":-1401,"BIII":-1033,"BKAK":-543,"BOOO":-5591};
this.TQ3__ = {"BHHH":478,"BHHM":-1073,"BHIH":222,"BHII":-504,"BIIH":-116,"BIII":-105,"BMHI":-863,"BMHM":-464,"BOMH":620,"OHHH":346,"OHHI":1729,"OHII":997,"OHMH":481,"OIHH":623,"OIIH":1344,"OKAK":2792,"OKHH":587,"OKKA":679,"OOHH":110,"OOII":-685};
this.TQ4__ = {"BHHH":-721,"BHHM":-3604,"BHII":-966,"BIIH":-607,"BIII":-2181,"OAAA":-2763,"OAKK":180,"OHHH":-294,"OHHI":2446,"OHHO":480,"OHIH":-1573,"OIHH":1935,"OIHI":-493,"OIIH":626,"OIII":-4007,"OKAK":-8156};
this.TW1__ = {"につい":-4681,"東京都":2026};
this.TW2__ = {"ある程":-2049,"いった":-1256,"ころが":-2434,"しょう":3873,"その後":-4430,"だって":-1049,"ていた":1833,"として":-4657,"ともに":-4517,"もので":1882,"一気に":-792,"初めて":-1512,"同時に":-8097,"大きな":-1255,"対して":-2721,"社会党":-3216};
this.TW3__ = {"いただ":-1734,"してい":1314,"として":-4314,"につい":-5483,"にとっ":-5989,"に当た":-6247,"ので,":-727,"ので、":-727,"のもの":-600,"れから":-3752,"十二月":-2287};
this.TW4__ = {"いう.":8576,"いう。":8576,"からな":-2348,"してい":2958,"たが,":1516,"たが、":1516,"ている":1538,"という":1349,"ました":5543,"ません":1097,"ようと":-4258,"よると":5865};
this.UC1__ = {"A":484,"K":93,"M":645,"O":-505};
this.UC2__ = {"A":819,"H":1059,"I":409,"M":3987,"N":5775,"O":646};
this.UC3__ = {"A":-1370,"I":2311};
this.UC4__ = {"A":-2643,"H":1809,"I":-1032,"K":-3450,"M":3565,"N":3876,"O":6646};
this.UC5__ = {"H":313,"I":-1238,"K":-799,"M":539,"O":-831};
this.UC6__ = {"H":-506,"I":-253,"K":87,"M":247,"O":-387};
this.UP1__ = {"O":-214};
this.UP2__ = {"B":69,"O":935};
this.UP3__ = {"B":189};
this.UQ1__ = {"BH":21,"BI":-12,"BK":-99,"BN":142,"BO":-56,"OH":-95,"OI":477,"OK":410,"OO":-2422};
this.UQ2__ = {"BH":216,"BI":113,"OK":1759};
this.UQ3__ = {"BA":-479,"BH":42,"BI":1913,"BK":-7198,"BM":3160,"BN":6427,"BO":14761,"OI":-827,"ON":-3212};
this.UW1__ = {",":156,"、":156,"「":-463,"あ":-941,"う":-127,"が":-553,"き":121,"こ":505,"で":-201,"と":-547,"ど":-123,"に":-789,"の":-185,"は":-847,"も":-466,"や":-470,"よ":182,"ら":-292,"り":208,"れ":169,"を":-446,"ん":-137,"・":-135,"主":-402,"京":-268,"区":-912,"午":871,"国":-460,"大":561,"委":729,"市":-411,"日":-141,"理":361,"生":-408,"県":-386,"都":-718,"「":-463,"・":-135};
this.UW2__ = {",":-829,"、":-829,"":892,"「":-645,"」":3145,"あ":-538,"い":505,"う":134,"お":-502,"か":1454,"が":-856,"く":-412,"こ":1141,"さ":878,"ざ":540,"し":1529,"す":-675,"せ":300,"そ":-1011,"た":188,"だ":1837,"つ":-949,"て":-291,"で":-268,"と":-981,"ど":1273,"な":1063,"に":-1764,"の":130,"は":-409,"ひ":-1273,"べ":1261,"ま":600,"も":-1263,"や":-402,"よ":1639,"り":-579,"る":-694,"れ":571,"を":-2516,"ん":2095,"ア":-587,"カ":306,"キ":568,"ッ":831,"三":-758,"不":-2150,"世":-302,"中":-968,"主":-861,"事":492,"人":-123,"会":978,"保":362,"入":548,"初":-3025,"副":-1566,"北":-3414,"区":-422,"大":-1769,"天":-865,"太":-483,"子":-1519,"学":760,"実":1023,"小":-2009,"市":-813,"年":-1060,"強":1067,"手":-1519,"揺":-1033,"政":1522,"文":-1355,"新":-1682,"日":-1815,"明":-1462,"最":-630,"朝":-1843,"本":-1650,"東":-931,"果":-665,"次":-2378,"民":-180,"気":-1740,"理":752,"発":529,"目":-1584,"相":-242,"県":-1165,"立":-763,"第":810,"米":509,"自":-1353,"行":838,"西":-744,"見":-3874,"調":1010,"議":1198,"込":3041,"開":1758,"間":-1257,"「":-645,"」":3145,"ッ":831,"ア":-587,"カ":306,"キ":568};
this.UW3__ = {",":4889,"1":-800,"":-1723,"、":4889,"々":-2311,"":5827,"」":2670,"〓":-3573,"あ":-2696,"い":1006,"う":2342,"え":1983,"お":-4864,"か":-1163,"が":3271,"く":1004,"け":388,"げ":401,"こ":-3552,"ご":-3116,"さ":-1058,"し":-395,"す":584,"せ":3685,"そ":-5228,"た":842,"ち":-521,"っ":-1444,"つ":-1081,"て":6167,"で":2318,"と":1691,"ど":-899,"な":-2788,"に":2745,"の":4056,"は":4555,"ひ":-2171,"ふ":-1798,"へ":1199,"ほ":-5516,"ま":-4384,"み":-120,"め":1205,"も":2323,"や":-788,"よ":-202,"ら":727,"り":649,"る":5905,"れ":2773,"わ":-1207,"を":6620,"ん":-518,"ア":551,"グ":1319,"ス":874,"ッ":-1350,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278,"・":-3794,"一":-1619,"下":-1759,"世":-2087,"両":3815,"中":653,"主":-758,"予":-1193,"二":974,"人":2742,"今":792,"他":1889,"以":-1368,"低":811,"何":4265,"作":-361,"保":-2439,"元":4858,"党":3593,"全":1574,"公":-3030,"六":755,"共":-1880,"円":5807,"再":3095,"分":457,"初":2475,"別":1129,"前":2286,"副":4437,"力":365,"動":-949,"務":-1872,"化":1327,"北":-1038,"区":4646,"千":-2309,"午":-783,"協":-1006,"口":483,"右":1233,"各":3588,"合":-241,"同":3906,"和":-837,"員":4513,"国":642,"型":1389,"場":1219,"外":-241,"妻":2016,"学":-1356,"安":-423,"実":-1008,"家":1078,"小":-513,"少":-3102,"州":1155,"市":3197,"平":-1804,"年":2416,"広":-1030,"府":1605,"度":1452,"建":-2352,"当":-3885,"得":1905,"思":-1291,"性":1822,"戸":-488,"指":-3973,"政":-2013,"教":-1479,"数":3222,"文":-1489,"新":1764,"日":2099,"旧":5792,"昨":-661,"時":-1248,"曜":-951,"最":-937,"月":4125,"期":360,"李":3094,"村":364,"東":-805,"核":5156,"森":2438,"業":484,"氏":2613,"民":-1694,"決":-1073,"法":1868,"海":-495,"無":979,"物":461,"特":-3850,"生":-273,"用":914,"町":1215,"的":7313,"直":-1835,"省":792,"県":6293,"知":-1528,"私":4231,"税":401,"立":-960,"第":1201,"米":7767,"系":3066,"約":3663,"級":1384,"統":-4229,"総":1163,"線":1255,"者":6457,"能":725,"自":-2869,"英":785,"見":1044,"調":-562,"財":-733,"費":1777,"車":1835,"軍":1375,"込":-1504,"通":-1136,"選":-681,"郎":1026,"郡":4404,"部":1200,"金":2163,"長":421,"開":-1432,"間":1302,"関":-1282,"雨":2009,"電":-1045,"非":2066,"駅":1620,"":-800,"」":2670,"・":-3794,"ッ":-1350,"ア":551,"グ":1319,"ス":874,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278};
this.UW4__ = {",":3930,".":3508,"―":-4841,"、":3930,"。":3508,"":4999,"「":1895,"」":3798,"〓":-5156,"あ":4752,"い":-3435,"う":-640,"え":-2514,"お":2405,"か":530,"が":6006,"き":-4482,"ぎ":-3821,"く":-3788,"け":-4376,"げ":-4734,"こ":2255,"ご":1979,"さ":2864,"し":-843,"じ":-2506,"す":-731,"ず":1251,"せ":181,"そ":4091,"た":5034,"だ":5408,"ち":-3654,"っ":-5882,"つ":-1659,"て":3994,"で":7410,"と":4547,"な":5433,"に":6499,"ぬ":1853,"ね":1413,"の":7396,"は":8578,"ば":1940,"ひ":4249,"び":-4134,"ふ":1345,"へ":6665,"べ":-744,"ほ":1464,"ま":1051,"み":-2082,"む":-882,"め":-5046,"も":4169,"ゃ":-2666,"や":2795,"ょ":-1544,"よ":3351,"ら":-2922,"り":-9726,"る":-14896,"れ":-2613,"ろ":-4570,"わ":-1783,"を":13150,"ん":-2352,"カ":2145,"コ":1789,"セ":1287,"ッ":-724,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637,"・":-4371,"ー":-11870,"一":-2069,"中":2210,"予":782,"事":-190,"井":-1768,"人":1036,"以":544,"会":950,"体":-1286,"作":530,"側":4292,"先":601,"党":-2006,"共":-1212,"内":584,"円":788,"初":1347,"前":1623,"副":3879,"力":-302,"動":-740,"務":-2715,"化":776,"区":4517,"協":1013,"参":1555,"合":-1834,"和":-681,"員":-910,"器":-851,"回":1500,"国":-619,"園":-1200,"地":866,"場":-1410,"塁":-2094,"士":-1413,"多":1067,"大":571,"子":-4802,"学":-1397,"定":-1057,"寺":-809,"小":1910,"屋":-1328,"山":-1500,"島":-2056,"川":-2667,"市":2771,"年":374,"庁":-4556,"後":456,"性":553,"感":916,"所":-1566,"支":856,"改":787,"政":2182,"教":704,"文":522,"方":-856,"日":1798,"時":1829,"最":845,"月":-9066,"木":-485,"来":-442,"校":-360,"業":-1043,"氏":5388,"民":-2716,"気":-910,"沢":-939,"済":-543,"物":-735,"率":672,"球":-1267,"生":-1286,"産":-1101,"田":-2900,"町":1826,"的":2586,"目":922,"省":-3485,"県":2997,"空":-867,"立":-2112,"第":788,"米":2937,"系":786,"約":2171,"経":1146,"統":-1169,"総":940,"線":-994,"署":749,"者":2145,"能":-730,"般":-852,"行":-792,"規":792,"警":-1184,"議":-244,"谷":-1000,"賞":730,"車":-1481,"軍":1158,"輪":-1433,"込":-3370,"近":929,"道":-1291,"選":2596,"郎":-4866,"都":1192,"野":-1100,"銀":-2213,"長":357,"間":-2344,"院":-2297,"際":-2604,"電":-878,"領":-1659,"題":-792,"館":-1984,"首":1749,"高":2120,"「":1895,"」":3798,"・":-4371,"ッ":-724,"ー":-11870,"カ":2145,"コ":1789,"セ":1287,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637};
this.UW5__ = {",":465,".":-299,"1":-514,"E2":-32768,"]":-2762,"、":465,"。":-299,"「":363,"あ":1655,"い":331,"う":-503,"え":1199,"お":527,"か":647,"が":-421,"き":1624,"ぎ":1971,"く":312,"げ":-983,"さ":-1537,"し":-1371,"す":-852,"だ":-1186,"ち":1093,"っ":52,"つ":921,"て":-18,"で":-850,"と":-127,"ど":1682,"な":-787,"に":-1224,"の":-635,"は":-578,"べ":1001,"み":502,"め":865,"ゃ":3350,"ょ":854,"り":-208,"る":429,"れ":504,"わ":419,"を":-1264,"ん":327,"イ":241,"ル":451,"ン":-343,"中":-871,"京":722,"会":-1153,"党":-654,"務":3519,"区":-901,"告":848,"員":2104,"大":-1296,"学":-548,"定":1785,"嵐":-1304,"市":-2991,"席":921,"年":1763,"思":872,"所":-814,"挙":1618,"新":-1682,"日":218,"月":-4353,"査":932,"格":1356,"機":-1508,"氏":-1347,"田":240,"町":-3912,"的":-3149,"相":1319,"省":-1052,"県":-4003,"研":-997,"社":-278,"空":-813,"統":1955,"者":-2233,"表":663,"語":-1073,"議":1219,"選":-1018,"郎":-368,"長":786,"間":1191,"題":2368,"館":-689,"":-514,"":-32768,"「":363,"イ":241,"ル":451,"ン":-343};
this.UW6__ = {",":227,".":808,"1":-270,"E1":306,"、":227,"。":808,"あ":-307,"う":189,"か":241,"が":-73,"く":-121,"こ":-200,"じ":1782,"す":383,"た":-428,"っ":573,"て":-1014,"で":101,"と":-105,"な":-253,"に":-149,"の":-417,"は":-236,"も":-206,"り":187,"る":-135,"を":195,"ル":-673,"ン":-496,"一":-277,"中":201,"件":-800,"会":624,"前":302,"区":1792,"員":-1212,"委":798,"学":-960,"市":887,"広":-695,"後":535,"業":-697,"相":753,"社":-507,"福":974,"空":-822,"者":1811,"連":463,"郎":1082,"":-270,"":306,"ル":-673,"ン":-496};
return this;
}
TinySegmenter.prototype.ctype_ = function(str) {
for (var i in this.chartype_) {
if (str.match(this.chartype_[i][0])) {
return this.chartype_[i][1];
}
}
return "O";
}
TinySegmenter.prototype.ts_ = function(v) {
if (v) { return v; }
return 0;
}
TinySegmenter.prototype.segment = function(input) {
if (input == null || input == undefined || input == "") {
return [];
}
var result = [];
var seg = ["B3","B2","B1"];
var ctype = ["O","O","O"];
var o = input.split("");
for (i = 0; i < o.length; ++i) {
seg.push(o[i]);
ctype.push(this.ctype_(o[i]))
}
seg.push("E1");
seg.push("E2");
seg.push("E3");
ctype.push("O");
ctype.push("O");
ctype.push("O");
var word = seg[3];
var p1 = "U";
var p2 = "U";
var p3 = "U";
for (var i = 4; i < seg.length - 3; ++i) {
var score = this.BIAS__;
var w1 = seg[i-3];
var w2 = seg[i-2];
var w3 = seg[i-1];
var w4 = seg[i];
var w5 = seg[i+1];
var w6 = seg[i+2];
var c1 = ctype[i-3];
var c2 = ctype[i-2];
var c3 = ctype[i-1];
var c4 = ctype[i];
var c5 = ctype[i+1];
var c6 = ctype[i+2];
score += this.ts_(this.UP1__[p1]);
score += this.ts_(this.UP2__[p2]);
score += this.ts_(this.UP3__[p3]);
score += this.ts_(this.BP1__[p1 + p2]);
score += this.ts_(this.BP2__[p2 + p3]);
score += this.ts_(this.UW1__[w1]);
score += this.ts_(this.UW2__[w2]);
score += this.ts_(this.UW3__[w3]);
score += this.ts_(this.UW4__[w4]);
score += this.ts_(this.UW5__[w5]);
score += this.ts_(this.UW6__[w6]);
score += this.ts_(this.BW1__[w2 + w3]);
score += this.ts_(this.BW2__[w3 + w4]);
score += this.ts_(this.BW3__[w4 + w5]);
score += this.ts_(this.TW1__[w1 + w2 + w3]);
score += this.ts_(this.TW2__[w2 + w3 + w4]);
score += this.ts_(this.TW3__[w3 + w4 + w5]);
score += this.ts_(this.TW4__[w4 + w5 + w6]);
score += this.ts_(this.UC1__[c1]);
score += this.ts_(this.UC2__[c2]);
score += this.ts_(this.UC3__[c3]);
score += this.ts_(this.UC4__[c4]);
score += this.ts_(this.UC5__[c5]);
score += this.ts_(this.UC6__[c6]);
score += this.ts_(this.BC1__[c2 + c3]);
score += this.ts_(this.BC2__[c3 + c4]);
score += this.ts_(this.BC3__[c4 + c5]);
score += this.ts_(this.TC1__[c1 + c2 + c3]);
score += this.ts_(this.TC2__[c2 + c3 + c4]);
score += this.ts_(this.TC3__[c3 + c4 + c5]);
score += this.ts_(this.TC4__[c4 + c5 + c6]);
// score += this.ts_(this.TC5__[c4 + c5 + c6]);
score += this.ts_(this.UQ1__[p1 + c1]);
score += this.ts_(this.UQ2__[p2 + c2]);
score += this.ts_(this.UQ3__[p3 + c3]);
score += this.ts_(this.BQ1__[p2 + c2 + c3]);
score += this.ts_(this.BQ2__[p2 + c3 + c4]);
score += this.ts_(this.BQ3__[p3 + c2 + c3]);
score += this.ts_(this.BQ4__[p3 + c3 + c4]);
score += this.ts_(this.TQ1__[p2 + c1 + c2 + c3]);
score += this.ts_(this.TQ2__[p2 + c2 + c3 + c4]);
score += this.ts_(this.TQ3__[p3 + c1 + c2 + c3]);
score += this.ts_(this.TQ4__[p3 + c2 + c3 + c4]);
var p = "O";
if (score > 0) {
result.push(word);
word = "";
p = "B";
}
p1 = p2;
p2 = p3;
p3 = p;
word += seg[i];
}
result.push(word);
return result;
}
export { TinySegmenter };

View File

@@ -1,14 +1,12 @@
{
"display_name": "LittleWhiteBox",
"loading_order": 10,
"requires": [],
"optional": [],
"js": "index.js",
"css": "style.css",
"author": "biex",
"version": "2.4.0",
"homePage": "https://github.com/RT15548/LittleWhiteBox",
"generate_interceptor": "xiaobaixGenerateInterceptor"
}
{
"display_name": "LittleWhiteBox",
"loading_order": 10,
"requires": [],
"optional": [],
"js": "index.js",
"css": "style.css",
"author": "biex",
"version": "2.4.0",
"homePage": "https://github.com/RT15548/LittleWhiteBox",
"generate_interceptor": "xiaobaixGenerateInterceptor"
}

View File

@@ -561,7 +561,7 @@ html, body {
配置
══════════════════════════════════════════════════════════════════════════════ */
const TTS_WORKER_URL = 'https://hstts.velure.top';
const TTS_WORKER_URL = 'https://hstts.velure.codes';
const VALID_EMOTIONS = ['happy', 'sad', 'angry', 'surprise', 'scare', 'hate'];
const EMOTION_ICONS = {

View File

@@ -2,7 +2,7 @@
// 语音模块 - TTS 合成服务
// ════════════════════════════════════════════════════════════════════════════
export const TTS_WORKER_URL = 'https://hstts.velure.top';
export const TTS_WORKER_URL = 'https://hstts.velure.codes';
export const DEFAULT_VOICE = 'female_1';
export const DEFAULT_SPEED = 1.0;

View File

@@ -199,13 +199,6 @@ const DEFAULT_JSON_TEMPLATES = {
]
}
}
}`,
worldNewsRefresh: `{
"world": {
"news": [
{ "title": "新闻标题", "time": "时间(可选)", "content": "新闻内容" }
]
}
}`,
localMapGen: `{
"review": {
@@ -268,7 +261,7 @@ const DEFAULT_PROMPTS = {
stranger: {
u1: v => `你是TRPG数据整理助手。从剧情文本中提取{{user}}遇到的陌生人/NPC整理为JSON数组。`,
a1: () => `明白。请提供【世界观】和【剧情经历】我将提取角色并以JSON数组输出。`,
u2: v => `### 上下文\n\n**1. 世界观:**\n${worldInfo}\n\n**2. {{user}}经历:**\n${history(v.historyCount)}${v.storyOutline ? `\n\n**剧情大纲:**\n${wrap('story_outline', v.storyOutline)}` : ''}${nameList(v.existingContacts, v.existingStrangers)}\n\n### 输出要求\n\n1. 返回一个合法 JSON 数组,使用标准 JSON 语法(键名和字符串都用半角双引号 "\n2. 只提取有具体称呼的角色\n3. 每个角色只需 name / location / info 三个字段\n4. 文本内容中如需使用引号,请使用单引号或中文引号「」或"",不要使用半角双引号 "\n5. 无新角色返回 []\n\n\n模板:${JSON_TEMPLATES.stranger}`,
u2: v => `### 上下文\n\n**1. 世界观:**\n${worldInfo}\n\n**2. {{user}}经历:**\n${history(v.historyCount)}${v.storyOutline ? `\n\n**剧情大纲:**\n${wrap('story_outline', v.storyOutline)}` : ''}${nameList(v.existingContacts, v.existingStrangers)}\n\n### 输出要求\n\n1. 返回一个合法 JSON 数组,使用标准 JSON 语法(键名和字符串都用半角双引号 "\n2. 只提取有具体称呼的角色\n3. 每个角色只需 name / location / info 三个字段\n4. 文本内容中如需使用引号,请使用单引号或中文引号「」或"",不要使用半角双引号 "\n5. 无新角色返回 []\n\n\n模板:${JSON_TEMPLATES.npc}`,
a2: () => `了解开始生成JSON:`
},
worldGenStep1: {
@@ -380,12 +373,6 @@ const DEFAULT_PROMPTS = {
u2: v => `【世界观设定】:\n${worldInfo}\n\n【{{user}}历史】:\n${history(v.historyCount)}\n\n【当前世界状态JSON】可能包含 meta/world/maps 等字段):\n${v.currentWorldData || '{}'}\n\n【JSON模板辅助模式\n${JSON_TEMPLATES.worldSimAssist}`,
a2: () => `开始按 worldSimAssist 模板输出JSON:`
},
worldNewsRefresh: {
u1: v => `你是世界新闻编辑。基于世界观设定与{{user}}近期经历,为世界生成「最新资讯」。\n\n要求:\n1) 只输出 world.news不要输出 maps/meta/其他字段)。\n2) news 至少 ${randomRange(2, 4)} 条;语气轻松、中性,夹带少量日常生活细节;可以包含与主剧情相关的跟进报道。\n3) 只输出符合模板的 JSON禁止解释文字。\n\n- 使用标准 JSON 语法:所有键名与字符串都使用半角双引号\n- 文本内容如需使用引号,请使用单引号或中文引号「」/“”,不要使用半角双引号`,
a1: () => `明白。我将只更新 world.news不改动世界其它字段。请提供当前世界数据。`,
u2: v => `【世界观设定】:\n${worldInfo}\n\n【{{user}}历史】:\n${history(v.historyCount)}\n\n【当前世界状态JSON】可能包含 meta/world/maps 等字段):\n${v.currentWorldData || '{}'}\n\n【JSON模板】\n${JSON_TEMPLATES.worldNewsRefresh}`,
a2: () => `OK, worldNewsRefresh JSON generate start:`
},
localMapGen: {
u1: v => `你是TRPG局部场景生成器。你的任务是根据聊天历史推断{{user}}当前或将要前往的位置(视经历的最后一条消息而定),并为该位置生成详细的局部地图/室内场景。
@@ -602,7 +589,6 @@ export const buildExtractStrangersMessages = v => build('stranger', v);
export const buildWorldGenStep1Messages = v => build('worldGenStep1', v);
export const buildWorldGenStep2Messages = v => build('worldGenStep2', v);
export const buildWorldSimMessages = v => build(v?.mode === 'assist' ? 'worldSimAssist' : 'worldSim', v);
export const buildWorldNewsRefreshMessages = v => build('worldNewsRefresh', v);
export const buildSceneSwitchMessages = v => build('sceneSwitch', v);
export const buildLocalMapGenMessages = v => build('localMapGen', v);
export const buildLocalMapRefreshMessages = v => build('localMapRefresh', v);

File diff suppressed because one or more lines are too long

View File

@@ -21,7 +21,7 @@
*/
// ==================== 1. 导入与常量 ====================
import { extension_settings, saveMetadataDebounced, writeExtensionField } from "../../../../../extensions.js";
import { extension_settings, saveMetadataDebounced } from "../../../../../extensions.js";
import { chat_metadata, name1, processCommands, eventSource, event_types as st_event_types } from "../../../../../../script.js";
import { loadWorldInfo, saveWorldInfo, world_names, world_info } from "../../../../../world-info.js";
import { getContext } from "../../../../../st-context.js";
@@ -33,7 +33,7 @@ import { promptManager } from "../../../../../openai.js";
import {
buildSmsMessages, buildSummaryMessages, buildSmsHistoryContent, buildExistingSummaryContent,
buildNpcGenerationMessages, formatNpcToWorldbookContent, buildExtractStrangersMessages,
buildWorldGenStep1Messages, buildWorldGenStep2Messages, buildWorldSimMessages, buildWorldNewsRefreshMessages, buildSceneSwitchMessages,
buildWorldGenStep1Messages, buildWorldGenStep2Messages, buildWorldSimMessages, buildSceneSwitchMessages,
buildInviteMessages, buildLocalMapGenMessages, buildLocalMapRefreshMessages, buildLocalSceneGenMessages,
buildOverlayHtml, MOBILE_LAYOUT_STYLE, DESKTOP_LAYOUT_STYLE, getPromptConfigPayload, setPromptConfig
} from "./story-outline-prompt.js";
@@ -48,86 +48,6 @@ const DEBUG_KEY = 'LittleWhiteBox_StoryOutline_Debug';
let overlayCreated = false, frameReady = false, pendingMsgs = [], presetCleanup = null, step1Cache = null;
// ==================== PromptConfig (global + character card) ====================
// Global: server storage key `promptConfig` (old behavior)
// Character: character-card extension field (see scheduled-tasks implementation)
const PROMPTS_MODULE_NAME = 'xiaobaix-story-outline-prompts';
let promptConfigGlobal = { jsonTemplates: {}, promptSources: {} };
let promptConfigCharacter = { jsonTemplates: {}, promptSources: {} };
let promptConfigCharacterId = null;
function normalizePromptConfig(cfg) {
const src = (cfg && typeof cfg === 'object') ? cfg : {};
const out = {
jsonTemplates: { ...(src.jsonTemplates || {}) },
promptSources: {},
};
const ps = src.promptSources || src.prompts || {};
Object.entries(ps).forEach(([k, v]) => {
if (!v || typeof v !== 'object' || Array.isArray(v)) return;
out.promptSources[k] = { ...v };
});
return out;
}
function mergePromptConfig(globalCfg, charCfg) {
const g = normalizePromptConfig(globalCfg);
const c = normalizePromptConfig(charCfg);
const mergedPromptSources = { ...(g.promptSources || {}) };
Object.entries(c.promptSources || {}).forEach(([key, parts]) => {
const base = mergedPromptSources[key];
mergedPromptSources[key] = (base && typeof base === 'object' && !Array.isArray(base))
? { ...base, ...(parts || {}) }
: { ...(parts || {}) };
});
return {
jsonTemplates: { ...(g.jsonTemplates || {}), ...(c.jsonTemplates || {}) },
promptSources: mergedPromptSources,
};
}
function getCharacterPromptConfig() {
const ctx = getContext?.();
const charId = ctx?.characterId ?? null;
const char = (charId != null) ? ctx?.characters?.[charId] : null;
const cfg = char?.data?.extensions?.[PROMPTS_MODULE_NAME]?.promptConfig || null;
return normalizePromptConfig(cfg);
}
async function saveCharacterPromptConfig(cfg) {
const ctx = getContext?.();
const charId = ctx?.characterId ?? null;
if (charId == null) return;
const payload = { promptConfig: normalizePromptConfig(cfg) };
await writeExtensionField(Number(charId), PROMPTS_MODULE_NAME, payload);
// Keep in-memory character extension in sync (same pattern as scheduled-tasks).
try {
const char = ctx?.characters?.[charId];
if (char) {
if (!char.data) char.data = {};
if (!char.data.extensions) char.data.extensions = {};
char.data.extensions[PROMPTS_MODULE_NAME] = payload;
}
} catch { }
}
function getPromptConfigPayloadWithStores() {
const base = getPromptConfigPayload?.() || {};
return {
...base,
stores: {
global: normalizePromptConfig(promptConfigGlobal),
character: normalizePromptConfig(promptConfigCharacter),
},
};
}
// ==================== 2. 通用工具 ====================
/** 移动端检测 */
@@ -691,40 +611,17 @@ function postFrame(payload) {
const flushPending = () => { if (!frameReady) return; const f = document.getElementById("xiaobaix-story-outline-iframe"); pendingMsgs.forEach(p => { if (f) postToIframe(f, p, "LittleWhiteBox"); }); pendingMsgs = []; };
async function syncPromptConfigForCurrentCharacter() {
const ctx = getContext?.();
const charId = ctx?.characterId ?? null;
if (promptConfigCharacterId !== charId) {
promptConfigCharacterId = charId;
promptConfigCharacter = getCharacterPromptConfig();
}
try {
const cfg = await StoryOutlineStorage?.get?.('promptConfig', null);
promptConfigGlobal = normalizePromptConfig(cfg);
} catch {
promptConfigGlobal = { jsonTemplates: {}, promptSources: {} };
}
const merged = mergePromptConfig(promptConfigGlobal, promptConfigCharacter);
setPromptConfig?.(merged, false);
postFrame({ type: "PROMPT_CONFIG_UPDATED", promptConfig: getPromptConfigPayloadWithStores() });
}
/** 发送设置到iframe */
function sendSettings() {
const store = getOutlineStore(), { name: charName, desc: charDesc } = getCharInfo();
syncPromptConfigForCurrentCharacter().catch(() => { });
postFrame({
type: "LOAD_SETTINGS", globalSettings: getGlobalSettings(), commSettings: getCommSettings(),
stage: store?.stage ?? 0, deviationScore: store?.deviationScore ?? 0,
simulationTarget: store?.simulationTarget ?? 5, playerLocation: store?.playerLocation ?? '家',
dataChecked: store?.dataChecked || {}, outlineData: store?.outlineData || {}, promptConfig: getPromptConfigPayloadWithStores(),
dataChecked: store?.dataChecked || {}, outlineData: store?.outlineData || {}, promptConfig: getPromptConfigPayload?.(),
characterCardName: charName, characterCardDescription: charDesc,
characterContactSmsHistory: getCharSmsHistory()
});
}
const loadAndSend = () => { const s = getOutlineStore(); if (s?.mapData) postFrame({ type: "LOAD_MAP_DATA", mapData: s.mapData }); sendSettings(); };
@@ -814,7 +711,6 @@ const V = {
wg1: d => !!d && typeof d === 'object', // 只要是对象就行,后续会 normalize
wg2: d => !!((d?.world && (d?.maps || d?.world?.maps)?.outdoor) || (d?.outdoor && d?.inside)),
wga: d => !!((d?.world && d?.maps?.outdoor) || d?.outdoor), ws: d => !!d, w: o => !!o && typeof o === 'object',
wn: d => Array.isArray(d?.world?.news),
lm: o => !!o?.inside?.name && !!o?.inside?.description
};
@@ -1198,34 +1094,6 @@ async function handleSimWorld({ requestId, currentData, isAuto }) {
} catch (e) { replyErr('SIMULATE_WORLD_RESULT', requestId, `推演失败: ${e.message}`); }
}
async function handleRefreshWorldNews({ requestId }) {
try {
const store = getOutlineStore();
const od = store?.outlineData;
if (!od) return replyErr('REFRESH_WORLD_NEWS_RESULT', requestId, '未找到世界数据,请先生成世界');
// Store may persist maps either under `maps` or as `outdoor/indoor` (iframe SAVE_ALL_DATA format).
const maps = od?.maps || { outdoor: od?.outdoor || null, indoor: od?.indoor || null };
const snapshot = {
meta: od?.meta || {},
world: od?.world || {},
maps,
...(od?.timeline ? { timeline: od.timeline } : {}),
};
const msgs = buildWorldNewsRefreshMessages(getCommonPromptVars({
currentWorldData: JSON.stringify(snapshot, null, 2),
}));
const data = await callLLMJson({ messages: msgs, validate: V.wn });
if (!Array.isArray(data?.world?.news)) return replyErr('REFRESH_WORLD_NEWS_RESULT', requestId, '世界新闻刷新失败:无法解析 JSON 数据');
reply('REFRESH_WORLD_NEWS_RESULT', requestId, { success: true, news: data.world.news });
} catch (e) {
replyErr('REFRESH_WORLD_NEWS_RESULT', requestId, `世界新闻刷新失败: ${e.message}`);
}
}
function handleSaveSettings(d) {
if (d.globalSettings) saveGlobalSettings(d.globalSettings);
if (d.commSettings) saveCommSettings(d.commSettings);
@@ -1247,56 +1115,39 @@ function handleSaveSettings(d) {
}
async function handleSavePrompts(d) {
const scope = d?.scope === 'character' ? 'character' : 'global';
// Back-compat: full payload (old iframe) -> treat as global save (old server storage behavior).
// Back-compat: full payload (old iframe)
if (d?.promptConfig) {
promptConfigGlobal = normalizePromptConfig(d.promptConfig);
try { await StoryOutlineStorage?.set?.('promptConfig', promptConfigGlobal); } catch { }
// Re-read current character config (if any) and apply merged.
promptConfigCharacter = getCharacterPromptConfig();
const merged = mergePromptConfig(promptConfigGlobal, promptConfigCharacter);
setPromptConfig?.(merged, false);
postFrame({ type: "PROMPT_CONFIG_UPDATED", promptConfig: getPromptConfigPayloadWithStores() });
const payload = setPromptConfig?.(d.promptConfig, false) || d.promptConfig;
try { await StoryOutlineStorage?.set?.('promptConfig', payload); } catch { }
postFrame({ type: "PROMPT_CONFIG_UPDATED", promptConfig: getPromptConfigPayload?.() });
return;
}
// New: incremental update by key
const key = d?.key;
if (!key) return;
// Always merge against the latest global config from server storage.
try { promptConfigGlobal = normalizePromptConfig(await StoryOutlineStorage?.get?.('promptConfig', null)); } catch { }
// Always merge against the current character-card config.
promptConfigCharacterId = getContext?.()?.characterId ?? null;
promptConfigCharacter = getCharacterPromptConfig();
let current = null;
try { current = await StoryOutlineStorage?.get?.('promptConfig', null); } catch { }
const next = (current && typeof current === 'object') ? {
jsonTemplates: { ...(current.jsonTemplates || {}) },
promptSources: { ...(current.promptSources || {}) },
} : { jsonTemplates: {}, promptSources: {} };
const applyDelta = (cfg) => {
const next = normalizePromptConfig(cfg);
if (d?.reset) {
delete next.promptSources[key];
delete next.jsonTemplates[key];
return next;
}
if (d?.reset) {
delete next.promptSources[key];
delete next.jsonTemplates[key];
} else {
if (d?.prompt && typeof d.prompt === 'object') next.promptSources[key] = d.prompt;
if ('jsonTemplate' in (d || {})) {
if (d.jsonTemplate == null) delete next.jsonTemplates[key];
else next.jsonTemplates[key] = String(d.jsonTemplate ?? '');
}
return next;
};
if (scope === 'character') {
promptConfigCharacter = applyDelta(promptConfigCharacter);
await saveCharacterPromptConfig(promptConfigCharacter);
} else {
promptConfigGlobal = applyDelta(promptConfigGlobal);
try { await StoryOutlineStorage?.set?.('promptConfig', promptConfigGlobal); } catch { }
}
const merged = mergePromptConfig(promptConfigGlobal, promptConfigCharacter);
setPromptConfig?.(merged, false);
postFrame({ type: "PROMPT_CONFIG_UPDATED", promptConfig: getPromptConfigPayloadWithStores() });
const payload = setPromptConfig?.(next, false) || next;
try { await StoryOutlineStorage?.set?.('promptConfig', payload); } catch { }
postFrame({ type: "PROMPT_CONFIG_UPDATED", promptConfig: getPromptConfigPayload?.() });
}
function handleSaveContacts(d) {
@@ -1357,7 +1208,6 @@ const handlers = {
GENERATE_WORLD: handleGenWorld,
RETRY_WORLD_GEN_STEP2: handleRetryStep2,
SIMULATE_WORLD: handleSimWorld,
REFRESH_WORLD_NEWS: handleRefreshWorldNews,
GENERATE_LOCAL_MAP: handleGenLocalMap,
REFRESH_LOCAL_MAP: handleRefreshLocalMap,
GENERATE_LOCAL_SCENE: handleGenLocalScene
@@ -1520,7 +1370,10 @@ document.addEventListener('xiaobaixEnabledChanged', e => {
async function initPromptConfigFromServer() {
try {
await syncPromptConfigForCurrentCharacter();
const cfg = await StoryOutlineStorage?.get?.('promptConfig', null);
if (!cfg) return;
setPromptConfig?.(cfg, false);
postFrame({ type: "PROMPT_CONFIG_UPDATED", promptConfig: getPromptConfigPayload?.() });
} catch { }
}

View File

@@ -0,0 +1,141 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Config (v2 简化版)
// ═══════════════════════════════════════════════════════════════════════════
import { extension_settings } from "../../../../../../extensions.js";
import { EXT_ID } from "../../../core/constants.js";
import { xbLog } from "../../../core/debug-core.js";
import { CommonSettingStorage } from "../../../core/server-storage.js";
const MODULE_ID = 'summaryConfig';
const SUMMARY_CONFIG_KEY = 'storySummaryPanelConfig';
export function getSettings() {
const ext = extension_settings[EXT_ID] ||= {};
ext.storySummary ||= { enabled: true };
return ext;
}
const DEFAULT_FILTER_RULES = [
{ start: '<think>', end: '</think>' },
{ start: '<thinking>', end: '</thinking>' },
];
export function getSummaryPanelConfig() {
const defaults = {
api: { provider: 'st', url: '', key: '', model: '', modelCache: [] },
gen: { temperature: null, top_p: null, top_k: null, presence_penalty: null, frequency_penalty: null },
trigger: {
enabled: false,
interval: 20,
timing: 'before_user',
role: 'system',
useStream: true,
maxPerRun: 100,
wrapperHead: '',
wrapperTail: '',
forceInsertAtEnd: false,
},
vector: null,
};
try {
const raw = localStorage.getItem('summary_panel_config');
if (!raw) return defaults;
const parsed = JSON.parse(raw);
const result = {
api: { ...defaults.api, ...(parsed.api || {}) },
gen: { ...defaults.gen, ...(parsed.gen || {}) },
trigger: { ...defaults.trigger, ...(parsed.trigger || {}) },
};
if (result.trigger.timing === 'manual') result.trigger.enabled = false;
if (result.trigger.useStream === undefined) result.trigger.useStream = true;
return result;
} catch {
return defaults;
}
}
export function saveSummaryPanelConfig(config) {
try {
localStorage.setItem('summary_panel_config', JSON.stringify(config));
CommonSettingStorage.set(SUMMARY_CONFIG_KEY, config);
} catch (e) {
xbLog.error(MODULE_ID, '保存面板配置失败', e);
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 向量配置(简化版 - 只需要 key
// ═══════════════════════════════════════════════════════════════════════════
export function getVectorConfig() {
try {
const raw = localStorage.getItem('summary_panel_config');
if (!raw) return null;
const parsed = JSON.parse(raw);
const cfg = parsed.vector || null;
if (cfg && !cfg.textFilterRules) {
cfg.textFilterRules = [...DEFAULT_FILTER_RULES];
}
// 简化:统一使用硅基
if (cfg) {
cfg.engine = 'online';
cfg.online = cfg.online || {};
cfg.online.provider = 'siliconflow';
cfg.online.model = 'BAAI/bge-m3';
}
return cfg;
} catch {
return null;
}
}
export function getTextFilterRules() {
const cfg = getVectorConfig();
return cfg?.textFilterRules || DEFAULT_FILTER_RULES;
}
export function saveVectorConfig(vectorCfg) {
try {
const raw = localStorage.getItem('summary_panel_config') || '{}';
const parsed = JSON.parse(raw);
// 简化配置
parsed.vector = {
enabled: vectorCfg?.enabled || false,
engine: 'online',
online: {
provider: 'siliconflow',
key: vectorCfg?.online?.key || '',
model: 'BAAI/bge-m3',
},
textFilterRules: vectorCfg?.textFilterRules || DEFAULT_FILTER_RULES,
};
localStorage.setItem('summary_panel_config', JSON.stringify(parsed));
CommonSettingStorage.set(SUMMARY_CONFIG_KEY, parsed);
} catch (e) {
xbLog.error(MODULE_ID, '保存向量配置失败', e);
}
}
export async function loadConfigFromServer() {
try {
const savedConfig = await CommonSettingStorage.get(SUMMARY_CONFIG_KEY, null);
if (savedConfig) {
localStorage.setItem('summary_panel_config', JSON.stringify(savedConfig));
xbLog.info(MODULE_ID, '已从服务器加载面板配置');
return savedConfig;
}
} catch (e) {
xbLog.warn(MODULE_ID, '加载面板配置失败', e);
}
return null;
}

View File

@@ -0,0 +1,26 @@
// Memory Database (Dexie schema)
import Dexie from '../../../libs/dexie.mjs';
const DB_NAME = 'LittleWhiteBox_Memory';
const DB_VERSION = 3; // 升级版本
// Chunk parameters
export const CHUNK_MAX_TOKENS = 200;
const db = new Dexie(DB_NAME);
db.version(DB_VERSION).stores({
meta: 'chatId',
chunks: '[chatId+chunkId], chatId, [chatId+floor]',
chunkVectors: '[chatId+chunkId], chatId',
eventVectors: '[chatId+eventId], chatId',
stateVectors: '[chatId+atomId], chatId, [chatId+floor]', // L0 向量表
});
export { db };
export const metaTable = db.meta;
export const chunksTable = db.chunks;
export const chunkVectorsTable = db.chunkVectors;
export const eventVectorsTable = db.eventVectors;
export const stateVectorsTable = db.stateVectors;

View File

@@ -0,0 +1,442 @@
// Story Summary - Store
// L2 (events/characters/arcs) + L3 (facts) 统一存储
import { getContext, saveMetadataDebounced } from "../../../../../../extensions.js";
import { chat_metadata } from "../../../../../../../script.js";
import { EXT_ID } from "../../../core/constants.js";
import { xbLog } from "../../../core/debug-core.js";
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/storage/chunk-store.js";
const MODULE_ID = 'summaryStore';
const FACTS_LIMIT_PER_SUBJECT = 10;
// ═══════════════════════════════════════════════════════════════════════════
// 基础存取
// ═══════════════════════════════════════════════════════════════════════════
export function getSummaryStore() {
const { chatId } = getContext();
if (!chatId) return null;
chat_metadata.extensions ||= {};
chat_metadata.extensions[EXT_ID] ||= {};
chat_metadata.extensions[EXT_ID].storySummary ||= {};
const store = chat_metadata.extensions[EXT_ID].storySummary;
// ★ 自动迁移旧数据
if (store.json && !store.json.facts) {
const hasOldData = store.json.world?.length || store.json.characters?.relationships?.length;
if (hasOldData) {
store.json.facts = migrateToFacts(store.json);
// 删除旧字段
delete store.json.world;
if (store.json.characters) {
delete store.json.characters.relationships;
}
store.updatedAt = Date.now();
saveSummaryStore();
xbLog.info(MODULE_ID, `自动迁移完成: ${store.json.facts.length} 条 facts`);
}
}
return store;
}
export function saveSummaryStore() {
saveMetadataDebounced?.();
}
export function getKeepVisibleCount() {
const store = getSummaryStore();
return store?.keepVisibleCount ?? 3;
}
export function calcHideRange(boundary) {
if (boundary == null || boundary < 0) return null;
const keepCount = getKeepVisibleCount();
const hideEnd = boundary - keepCount;
if (hideEnd < 0) return null;
return { start: 0, end: hideEnd };
}
export function addSummarySnapshot(store, endMesId) {
store.summaryHistory ||= [];
store.summaryHistory.push({ endMesId });
}
// ═══════════════════════════════════════════════════════════════════════════
// Fact 工具函数
// ═══════════════════════════════════════════════════════════════════════════
/**
* 判断是否为关系类 fact
*/
export function isRelationFact(f) {
return /^对.+的/.test(f.p);
}
// ═══════════════════════════════════════════════════════════════════════════
// 从 facts 提取关系(供关系图 UI 使用)
// ═══════════════════════════════════════════════════════════════════════════
export function extractRelationshipsFromFacts(facts) {
return (facts || [])
.filter(f => !f.retracted && isRelationFact(f))
.map(f => {
const match = f.p.match(/^对(.+)的/);
const to = match ? match[1] : '';
if (!to) return null;
return {
from: f.s,
to,
label: f.o,
trend: f.trend || '陌生',
};
})
.filter(Boolean);
}
/**
* 生成 fact 的唯一键s + p
*/
function factKey(f) {
return `${f.s}::${f.p}`;
}
/**
* 生成下一个 fact ID
*/
function getNextFactId(existingFacts) {
let maxId = 0;
for (const f of existingFacts || []) {
const match = f.id?.match(/^f-(\d+)$/);
if (match) {
maxId = Math.max(maxId, parseInt(match[1], 10));
}
}
return maxId + 1;
}
// ═══════════════════════════════════════════════════════════════════════════
// Facts 合并KV 覆盖模型)
// ═══════════════════════════════════════════════════════════════════════════
export function mergeFacts(existingFacts, updates, floor) {
const map = new Map();
for (const f of existingFacts || []) {
if (!f.retracted) {
map.set(factKey(f), f);
}
}
let nextId = getNextFactId(existingFacts);
for (const u of updates || []) {
if (!u.s || !u.p) continue;
const key = factKey(u);
if (u.retracted === true) {
map.delete(key);
continue;
}
if (!u.o || !String(u.o).trim()) continue;
const existing = map.get(key);
const newFact = {
id: existing?.id || `f-${nextId++}`,
s: u.s.trim(),
p: u.p.trim(),
o: String(u.o).trim(),
since: floor,
_isState: existing?._isState ?? !!u.isState,
};
if (isRelationFact(newFact) && u.trend) {
newFact.trend = u.trend;
}
if (existing?._addedAt != null) {
newFact._addedAt = existing._addedAt;
} else {
newFact._addedAt = floor;
}
map.set(key, newFact);
}
const factsBySubject = new Map();
for (const f of map.values()) {
if (f._isState) continue;
const arr = factsBySubject.get(f.s) || [];
arr.push(f);
factsBySubject.set(f.s, arr);
}
const toRemove = new Set();
for (const arr of factsBySubject.values()) {
if (arr.length > FACTS_LIMIT_PER_SUBJECT) {
arr.sort((a, b) => (a._addedAt || 0) - (b._addedAt || 0));
for (let i = 0; i < arr.length - FACTS_LIMIT_PER_SUBJECT; i++) {
toRemove.add(factKey(arr[i]));
}
}
}
return Array.from(map.values()).filter(f => !toRemove.has(factKey(f)));
}
// ═══════════════════════════════════════════════════════════════════════════
// 旧数据迁移
// ═══════════════════════════════════════════════════════════════════════════
export function migrateToFacts(json) {
if (!json) return [];
// 已有 facts 则跳过迁移
if (json.facts?.length) return json.facts;
const facts = [];
let nextId = 1;
// 迁移 worldworldUpdate 的持久化结果)
for (const w of json.world || []) {
if (!w.category || !w.topic || !w.content) continue;
let s, p;
// 解析 topic 格式status/knowledge/relation 用 "::" 分隔
if (w.topic.includes('::')) {
[s, p] = w.topic.split('::').map(x => x.trim());
} else {
// inventory/rule 类
s = w.topic.trim();
p = w.category;
}
if (!s || !p) continue;
facts.push({
id: `f-${nextId++}`,
s,
p,
o: w.content.trim(),
since: w.floor ?? w._addedAt ?? 0,
_addedAt: w._addedAt ?? w.floor ?? 0,
});
}
// 迁移 relationships
for (const r of json.characters?.relationships || []) {
if (!r.from || !r.to) continue;
facts.push({
id: `f-${nextId++}`,
s: r.from,
p: `${r.to}的看法`,
o: r.label || '未知',
trend: r.trend,
since: r._addedAt ?? 0,
_addedAt: r._addedAt ?? 0,
});
}
return facts;
}
// ═══════════════════════════════════════════════════════════════════════════
// 数据合并L2 + L3
// ═══════════════════════════════════════════════════════════════════════════
export function mergeNewData(oldJson, parsed, endMesId) {
const merged = structuredClone(oldJson || {});
// L2 初始化
merged.keywords ||= [];
merged.events ||= [];
merged.characters ||= {};
merged.characters.main ||= [];
merged.arcs ||= [];
// L3 初始化不再迁移getSummaryStore 已处理)
merged.facts ||= [];
// L2 数据合并
if (parsed.keywords?.length) {
merged.keywords = parsed.keywords.map(k => ({ ...k, _addedAt: endMesId }));
}
(parsed.events || []).forEach(e => {
e._addedAt = endMesId;
merged.events.push(e);
});
// newCharacters
const existingMain = new Set(
(merged.characters.main || []).map(m => typeof m === 'string' ? m : m.name)
);
(parsed.newCharacters || []).forEach(name => {
if (!existingMain.has(name)) {
merged.characters.main.push({ name, _addedAt: endMesId });
}
});
// arcUpdates
const arcMap = new Map((merged.arcs || []).map(a => [a.name, a]));
(parsed.arcUpdates || []).forEach(update => {
const existing = arcMap.get(update.name);
if (existing) {
existing.trajectory = update.trajectory;
existing.progress = update.progress;
if (update.newMoment) {
existing.moments = existing.moments || [];
existing.moments.push({ text: update.newMoment, _addedAt: endMesId });
}
} else {
arcMap.set(update.name, {
name: update.name,
trajectory: update.trajectory,
progress: update.progress,
moments: update.newMoment ? [{ text: update.newMoment, _addedAt: endMesId }] : [],
_addedAt: endMesId,
});
}
});
merged.arcs = Array.from(arcMap.values());
// L3 factUpdates 合并
merged.facts = mergeFacts(merged.facts, parsed.factUpdates || [], endMesId);
return merged;
}
// ═══════════════════════════════════════════════════════════════════════════
// 回滚
// ═══════════════════════════════════════════════════════════════════════════
export async function rollbackSummaryIfNeeded() {
const { chat, chatId } = getContext();
const currentLength = Array.isArray(chat) ? chat.length : 0;
const store = getSummaryStore();
if (!store || store.lastSummarizedMesId == null || store.lastSummarizedMesId < 0) {
return false;
}
const lastSummarized = store.lastSummarizedMesId;
if (currentLength <= lastSummarized) {
const deletedCount = lastSummarized + 1 - currentLength;
if (deletedCount < 2) {
return false;
}
xbLog.warn(MODULE_ID, `删除已总结楼层 ${deletedCount} 条,触发回滚`);
const history = store.summaryHistory || [];
let targetEndMesId = -1;
for (let i = history.length - 1; i >= 0; i--) {
if (history[i].endMesId < currentLength) {
targetEndMesId = history[i].endMesId;
break;
}
}
await executeRollback(chatId, store, targetEndMesId, currentLength);
return true;
}
return false;
}
export async function executeRollback(chatId, store, targetEndMesId, currentLength) {
const oldEvents = store.json?.events || [];
if (targetEndMesId < 0) {
store.lastSummarizedMesId = -1;
store.json = null;
store.summaryHistory = [];
store.hideSummarizedHistory = false;
await clearEventVectors(chatId);
} else {
const deletedEventIds = oldEvents
.filter(e => (e._addedAt ?? 0) > targetEndMesId)
.map(e => e.id);
const json = store.json || {};
// L2 回滚
json.events = (json.events || []).filter(e => (e._addedAt ?? 0) <= targetEndMesId);
json.keywords = (json.keywords || []).filter(k => (k._addedAt ?? 0) <= targetEndMesId);
json.arcs = (json.arcs || []).filter(a => (a._addedAt ?? 0) <= targetEndMesId);
json.arcs.forEach(a => {
a.moments = (a.moments || []).filter(m =>
typeof m === 'string' || (m._addedAt ?? 0) <= targetEndMesId
);
});
if (json.characters) {
json.characters.main = (json.characters.main || []).filter(m =>
typeof m === 'string' || (m._addedAt ?? 0) <= targetEndMesId
);
}
// L3 facts 回滚
json.facts = (json.facts || []).filter(f => (f._addedAt ?? 0) <= targetEndMesId);
store.json = json;
store.lastSummarizedMesId = targetEndMesId;
store.summaryHistory = (store.summaryHistory || []).filter(h => h.endMesId <= targetEndMesId);
if (deletedEventIds.length > 0) {
await deleteEventVectorsByIds(chatId, deletedEventIds);
xbLog.info(MODULE_ID, `回滚删除 ${deletedEventIds.length} 个事件向量`);
}
}
store.updatedAt = Date.now();
saveSummaryStore();
xbLog.info(MODULE_ID, `回滚完成,目标楼层: ${targetEndMesId}`);
}
export async function clearSummaryData(chatId) {
const store = getSummaryStore();
if (store) {
delete store.json;
store.lastSummarizedMesId = -1;
store.updatedAt = Date.now();
saveSummaryStore();
}
if (chatId) {
await clearEventVectors(chatId);
}
xbLog.info(MODULE_ID, '总结数据已清空');
}
// ═══════════════════════════════════════════════════════════════════════════
// L3 数据读取(供 prompt.js / recall.js 使用)
// ═══════════════════════════════════════════════════════════════════════════
export function getFacts() {
const store = getSummaryStore();
return (store?.json?.facts || []).filter(f => !f.retracted);
}
export function getNewCharacters() {
const store = getSummaryStore();
return (store?.json?.characters?.main || []).map(m =>
typeof m === 'string' ? m : m.name
);
}

View File

@@ -0,0 +1,269 @@
// Story Summary - Generator
// 调用 LLM 生成总结
import { getContext } from "../../../../../../extensions.js";
import { xbLog } from "../../../core/debug-core.js";
import { getSummaryStore, saveSummaryStore, addSummarySnapshot, mergeNewData, getFacts } from "../data/store.js";
import { generateSummary, parseSummaryJson } from "./llm.js";
const MODULE_ID = 'summaryGenerator';
const SUMMARY_SESSION_ID = 'xb9';
const MAX_CAUSED_BY = 2;
// ═══════════════════════════════════════════════════════════════════════════
// factUpdates 清洗
// ═══════════════════════════════════════════════════════════════════════════
function normalizeRelationPredicate(p) {
if (/^对.+的看法$/.test(p)) return p;
if (/^与.+的关系$/.test(p)) return p;
return null;
}
function sanitizeFacts(parsed) {
if (!parsed) return;
const updates = Array.isArray(parsed.factUpdates) ? parsed.factUpdates : [];
const ok = [];
for (const item of updates) {
const s = String(item?.s || '').trim();
const pRaw = String(item?.p || '').trim();
if (!s || !pRaw) continue;
if (item.retracted === true) {
ok.push({ s, p: pRaw, retracted: true });
continue;
}
const o = String(item?.o || '').trim();
if (!o) continue;
const relP = normalizeRelationPredicate(pRaw);
const isRel = !!relP;
const fact = {
s,
p: isRel ? relP : pRaw,
o,
isState: !!item.isState,
};
if (isRel && item.trend) {
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融'];
if (validTrends.includes(item.trend)) {
fact.trend = item.trend;
}
}
ok.push(fact);
}
parsed.factUpdates = ok;
}
// ═══════════════════════════════════════════════════════════════════════════
// causedBy 清洗(事件因果边)
// ═══════════════════════════════════════════════════════════════════════════
function sanitizeEventsCausality(parsed, existingEventIds) {
if (!parsed) return;
const events = Array.isArray(parsed.events) ? parsed.events : [];
if (!events.length) return;
const idRe = /^evt-\d+$/;
const newIds = new Set(
events
.map(e => String(e?.id || '').trim())
.filter(id => idRe.test(id))
);
const allowed = new Set([...(existingEventIds || []), ...newIds]);
for (const e of events) {
const selfId = String(e?.id || '').trim();
if (!idRe.test(selfId)) {
e.causedBy = [];
continue;
}
const raw = Array.isArray(e.causedBy) ? e.causedBy : [];
const out = [];
const seen = new Set();
for (const x of raw) {
const cid = String(x || '').trim();
if (!idRe.test(cid)) continue;
if (cid === selfId) continue;
if (!allowed.has(cid)) continue;
if (seen.has(cid)) continue;
seen.add(cid);
out.push(cid);
if (out.length >= MAX_CAUSED_BY) break;
}
e.causedBy = out;
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 辅助函数
// ═══════════════════════════════════════════════════════════════════════════
export function formatExistingSummaryForAI(store) {
if (!store?.json) return "(空白,这是首次总结)";
const data = store.json;
const parts = [];
if (data.events?.length) {
parts.push("【已记录事件】");
data.events.forEach((ev, i) => parts.push(`${i + 1}. [${ev.timeLabel}] ${ev.title}${ev.summary}`));
}
if (data.characters?.main?.length) {
const names = data.characters.main.map(m => typeof m === 'string' ? m : m.name);
parts.push(`\n【主要角色】${names.join("、")}`);
}
if (data.arcs?.length) {
parts.push("【角色弧光】");
data.arcs.forEach(a => parts.push(`- ${a.name}${a.trajectory}(进度${Math.round(a.progress * 100)}%`));
}
if (data.keywords?.length) {
parts.push(`\n【关键词】${data.keywords.map(k => k.text).join("、")}`);
}
return parts.join("\n") || "(空白,这是首次总结)";
}
export function getNextEventId(store) {
const events = store?.json?.events || [];
if (!events.length) return 1;
const maxId = Math.max(...events.map(e => {
const match = e.id?.match(/evt-(\d+)/);
return match ? parseInt(match[1]) : 0;
}));
return maxId + 1;
}
export function buildIncrementalSlice(targetMesId, lastSummarizedMesId, maxPerRun = 100) {
const { chat, name1, name2 } = getContext();
const start = Math.max(0, (lastSummarizedMesId ?? -1) + 1);
const rawEnd = Math.min(targetMesId, chat.length - 1);
const end = Math.min(rawEnd, start + maxPerRun - 1);
if (start > end) return { text: "", count: 0, range: "", endMesId: -1 };
const userLabel = name1 || '用户';
const charLabel = name2 || '角色';
const slice = chat.slice(start, end + 1);
const text = slice.map((m, i) => {
const speaker = m.name || (m.is_user ? userLabel : charLabel);
return `#${start + i + 1}${speaker}\n${m.mes}`;
}).join('\n\n');
return { text, count: slice.length, range: `${start + 1}-${end + 1}`, endMesId: end };
}
// ═══════════════════════════════════════════════════════════════════════════
// 主生成函数
// ═══════════════════════════════════════════════════════════════════════════
export async function runSummaryGeneration(mesId, config, callbacks = {}) {
const { onStatus, onError, onComplete } = callbacks;
const store = getSummaryStore();
const lastSummarized = store?.lastSummarizedMesId ?? -1;
const maxPerRun = config.trigger?.maxPerRun || 100;
const slice = buildIncrementalSlice(mesId, lastSummarized, maxPerRun);
if (slice.count === 0) {
onStatus?.("没有新的对话需要总结");
return { success: true, noContent: true };
}
onStatus?.(`正在总结 ${slice.range}${slice.count}楼新内容)...`);
const existingSummary = formatExistingSummaryForAI(store);
const existingFacts = getFacts();
const nextEventId = getNextEventId(store);
const existingEventCount = store?.json?.events?.length || 0;
const useStream = config.trigger?.useStream !== false;
let raw;
try {
raw = await generateSummary({
existingSummary,
existingFacts,
newHistoryText: slice.text,
historyRange: slice.range,
nextEventId,
existingEventCount,
llmApi: {
provider: config.api?.provider,
url: config.api?.url,
key: config.api?.key,
model: config.api?.model,
},
genParams: config.gen || {},
useStream,
timeout: 120000,
sessionId: SUMMARY_SESSION_ID,
});
} catch (err) {
xbLog.error(MODULE_ID, '生成失败', err);
onError?.(err?.message || "生成失败");
return { success: false, error: err };
}
if (!raw?.trim()) {
xbLog.error(MODULE_ID, 'AI返回为空');
onError?.("AI返回为空");
return { success: false, error: "empty" };
}
const parsed = parseSummaryJson(raw);
if (!parsed) {
xbLog.error(MODULE_ID, 'JSON解析失败');
onError?.("AI未返回有效JSON");
return { success: false, error: "parse" };
}
sanitizeFacts(parsed);
const existingEventIds = new Set((store?.json?.events || []).map(e => e?.id).filter(Boolean));
sanitizeEventsCausality(parsed, existingEventIds);
const merged = mergeNewData(store?.json || {}, parsed, slice.endMesId);
store.lastSummarizedMesId = slice.endMesId;
store.json = merged;
store.updatedAt = Date.now();
addSummarySnapshot(store, slice.endMesId);
saveSummaryStore();
xbLog.info(MODULE_ID, `总结完成,已更新至 ${slice.endMesId + 1}`);
if (parsed.factUpdates?.length) {
xbLog.info(MODULE_ID, `Facts 更新: ${parsed.factUpdates.length}`);
}
const newEventIds = (parsed.events || []).map(e => e.id);
onComplete?.({
merged,
endMesId: slice.endMesId,
newEventIds,
factStats: { updated: parsed.factUpdates?.length || 0 },
});
return { success: true, merged, endMesId: slice.endMesId, newEventIds };
}

View File

@@ -0,0 +1,438 @@
// LLM Service
const PROVIDER_MAP = {
openai: "openai",
google: "gemini",
gemini: "gemini",
claude: "claude",
anthropic: "claude",
deepseek: "deepseek",
cohere: "cohere",
custom: "custom",
};
const JSON_PREFILL = '下面重新生成完整JSON。';
const LLM_PROMPT_CONFIG = {
topSystem: `Story Analyst: This task involves narrative comprehension and structured incremental summarization, representing creative story analysis at the intersection of plot tracking and character development. As a story analyst, you will conduct systematic evaluation of provided dialogue content to generate structured incremental summary data.
[Read the settings for this task]
<task_settings>
Incremental_Summary_Requirements:
- Incremental_Only: 只提取新对话中的新增要素,绝不重复已有总结
- Event_Granularity: 记录有叙事价值的事件,而非剧情梗概
- Memory_Album_Style: 形成有细节、有温度、有记忆点的回忆册
- Event_Classification:
type:
- 相遇: 人物/事物初次接触
- 冲突: 对抗、矛盾激化
- 揭示: 真相、秘密、身份
- 抉择: 关键决定
- 羁绊: 关系加深或破裂
- 转变: 角色/局势改变
- 收束: 问题解决、和解
- 日常: 生活片段
weight:
- 核心: 删掉故事就崩
- 主线: 推动主要剧情
- 转折: 改变某条线走向
- 点睛: 有细节不影响主线
- 氛围: 纯粹氛围片段
- Causal_Chain: 为每个新事件标注直接前因事件IDcausedBy。仅在因果关系明确直接导致/明确动机/承接后果)时填写;不明确时填[]完全正常。0-2个只填 evt-数字,指向已存在或本次新输出事件。
- Character_Dynamics: 识别新角色,追踪关系趋势(破裂/厌恶/反感/陌生/投缘/亲密/交融)
- Arc_Tracking: 更新角色弧光轨迹与成长进度(0.0-1.0)
- Fact_Tracking: 维护 SPO 三元组知识图谱。追踪生死、物品归属、位置、关系等硬性事实。采用 KV 覆盖模型s+p 为键)。
</task_settings>
---
Story Analyst:
[Responsibility Definition]
\`\`\`yaml
analysis_task:
title: Incremental Story Summarization with Knowledge Graph
Story Analyst:
role: Antigravity
task: >-
To analyze provided dialogue content against existing summary state,
extract only NEW plot elements, character developments, relationship
changes, arc progressions, AND fact updates, outputting
structured JSON for incremental summary database updates.
assistant:
role: Summary Specialist
description: Incremental Story Summary & Knowledge Graph Analyst
behavior: >-
To compare new dialogue against existing summary, identify genuinely
new events and character interactions, classify events by narrative
type and weight, track character arc progression with percentage,
maintain facts as SPO triples with clear semantics,
and output structured JSON containing only incremental updates.
Must strictly avoid repeating any existing summary content.
user:
role: Content Provider
description: Supplies existing summary state and new dialogue
behavior: >-
To provide existing summary state (events, characters, arcs, facts)
and new dialogue content for incremental analysis.
interaction_mode:
type: incremental_analysis
output_format: structured_json
deduplication: strict_enforcement
execution_context:
summary_active: true
incremental_only: true
memory_album_style: true
fact_tracking: true
\`\`\`
---
Summary Specialist:
<Chat_History>`,
assistantDoc: `
Summary Specialist:
Acknowledged. Now reviewing the incremental summarization specifications:
[Event Classification System]
├─ Types: 相遇|冲突|揭示|抉择|羁绊|转变|收束|日常
├─ Weights: 核心|主线|转折|点睛|氛围
└─ Each event needs: id, title, timeLabel, summary(含楼层), participants, type, weight
[Relationship Trend Scale]
破裂 ← 厌恶 ← 反感 ← 陌生 → 投缘 → 亲密 → 交融
[Arc Progress Tracking]
├─ trajectory: 当前阶段描述(15字内)
├─ progress: 0.0 to 1.0
└─ newMoment: 仅记录本次新增的关键时刻
[Fact Tracking - SPO / World Facts]
We maintain a small "world state" as SPO triples.
Each update is a JSON object: {s, p, o, isState, trend?, retracted?}
Core rules:
1) Keyed by (s + p). If a new update has the same (s+p), it overwrites the previous value.
2) Only output facts that are NEW or CHANGED in the new dialogue. Do NOT repeat unchanged facts.
3) isState meaning:
- isState: true -> core constraints that must stay stable and should NEVER be auto-deleted
(identity, location, life/death, ownership, relationship status, binding rules)
- isState: false -> non-core facts / soft memories that may be pruned by capacity limits later
4) Relationship facts:
- Use predicate format: "对X的看法" (X is the target person)
- trend is required for relationship facts, one of:
破裂 | 厌恶 | 反感 | 陌生 | 投缘 | 亲密 | 交融
5) Retraction (deletion):
- To delete a fact, output: {s, p, retracted: true}
6) Predicate normalization:
- Reuse existing predicates whenever possible, avoid inventing synonyms.
Ready to process incremental summary requests with strict deduplication.`,
assistantAskSummary: `
Summary Specialist:
Specifications internalized. Please provide the existing summary state so I can:
1. Index all recorded events to avoid duplication
2. Map current character list as baseline
3. Note existing arc progress levels
4. Identify established keywords
5. Review current facts (SPO triples baseline)`,
assistantAskContent: `
Summary Specialist:
Existing summary fully analyzed and indexed. I understand:
├─ Recorded events: Indexed for deduplication
├─ Character list: Baseline mapped
├─ Arc progress: Levels noted
├─ Keywords: Current state acknowledged
└─ Facts: SPO baseline loaded
I will extract only genuinely NEW elements from the upcoming dialogue.
Please provide the new dialogue content requiring incremental analysis.`,
metaProtocolStart: `
Summary Specialist:
ACKNOWLEDGED. Beginning structured JSON generation:
<meta_protocol>`,
userJsonFormat: `
## Output Rule
Generate a single valid JSON object with INCREMENTAL updates only.
## Mindful Approach
Before generating, observe the USER and analyze carefully:
- What is user's writing style and emotional expression?
- What NEW events occurred (not in existing summary)?
- What NEW characters appeared for the first time?
- What relationship CHANGES happened?
- What arc PROGRESS was made?
- What facts changed? (status/position/ownership/relationships)
## factUpdates 规则
- 目的: 纠错 & 世界一致性约束,只记录硬性事实
- s+p 为键,相同键会覆盖旧值
- isState: true=核心约束(位置/身份/生死/关系)false=有容量上限会被清理
- 关系类: p="对X的看法"trend 必填(破裂|厌恶|反感|陌生|投缘|亲密|交融)
- 删除: {s, p, retracted: true},不需要 o 字段
- 更新: {s, p, o, isState, trend?}
- 谓词规范化: 复用已有谓词,不要发明同义词
- 只输出有变化的条目,确保少、硬、稳定
## Output Format
\`\`\`json
{
"mindful_prelude": {
"user_insight": "用户的幻想是什么时空、场景,是否反应出存在严重心理问题需要建议?",
"dedup_analysis": "已有X个事件本次识别Y个新事件",
"fact_changes": "识别到的事实变化概述"
},
"keywords": [
{"text": "综合历史+新内容的全剧情关键词(5-10个)", "weight": "核心|重要|一般"}
],
"events": [
{
"id": "evt-{nextEventId}起始,依次递增",
"title": "地点·事件标题",
"timeLabel": "时间线标签(如:开场、第二天晚上)",
"summary": "1-2句话描述涵盖丰富信息素末尾标注楼层(#X-Y)",
"participants": ["参与角色名,不要使用人称代词或别名,只用正式人名"],
"type": "相遇|冲突|揭示|抉择|羁绊|转变|收束|日常",
"weight": "核心|主线|转折|点睛|氛围",
"causedBy": ["evt-12", "evt-14"]
}
],
"newCharacters": ["仅本次首次出现的角色名"],
"arcUpdates": [
{"name": "角色名,不要使用人称代词或别名,只用正式人名", "trajectory": "当前阶段描述(15字内)", "progress": 0.0-1.0, "newMoment": "本次新增的关键时刻"}
],
"factUpdates": [
{"s": "主体", "p": "谓词", "o": "当前值", "isState": true, "trend": "仅关系类填"},
{"s": "要删除的主体", "p": "要删除的谓词", "retracted": true}
]
}
\`\`\`
## CRITICAL NOTES
- events.id 从 evt-{nextEventId} 开始编号
- 仅输出【增量】内容,已有事件绝不重复
- keywords 是全局关键词,综合已有+新增
- causedBy 仅在因果明确时填写,允许为[]0-2个
- factUpdates 可为空数组
- 合法JSON字符串值内部避免英文双引号
- 用朴实、白描、有烟火气的笔触记录,避免比喻和意象
</meta_protocol>`,
assistantCheck: `Content review initiated...
[Compliance Check Results]
├─ Existing summary loaded: ✓ Fully indexed
├─ New dialogue received: ✓ Content parsed
├─ Deduplication engine: ✓ Active
├─ Event classification: ✓ Ready
├─ Fact tracking: ✓ Enabled
└─ Output format: ✓ JSON specification loaded
[Material Verification]
├─ Existing events: Indexed ({existingEventCount} recorded)
├─ Character baseline: Mapped
├─ Arc progress baseline: Noted
├─ Facts baseline: Loaded
└─ Output specification: ✓ Defined in <meta_protocol>
All checks passed. Beginning incremental extraction...
{
"mindful_prelude":`,
userConfirm: `怎么截断了重新完整生成只输出JSON不要任何其他内容
</Chat_History>`,
assistantPrefill: JSON_PREFILL
};
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
function b64UrlEncode(str) {
const utf8 = new TextEncoder().encode(String(str));
let bin = '';
utf8.forEach(b => bin += String.fromCharCode(b));
return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
}
function getStreamingModule() {
const mod = window.xiaobaixStreamingGeneration;
return mod?.xbgenrawCommand ? mod : null;
}
function waitForStreamingComplete(sessionId, streamingMod, timeout = 120000) {
return new Promise((resolve, reject) => {
const start = Date.now();
const poll = () => {
const { isStreaming, text } = streamingMod.getStatus(sessionId);
if (!isStreaming) return resolve(text || '');
if (Date.now() - start > timeout) return reject(new Error('生成超时'));
setTimeout(poll, 300);
};
poll();
});
}
// ═══════════════════════════════════════════════════════════════════════════
// 提示词构建
// ═══════════════════════════════════════════════════════════════════════════
function formatFactsForLLM(facts) {
if (!facts?.length) {
return { text: '(空白,尚无事实记录)', predicates: [] };
}
const predicates = [...new Set(facts.map(f => f.p).filter(Boolean))];
const lines = facts.map(f => {
if (f.trend) {
return `- ${f.s} | ${f.p} | ${f.o} [${f.trend}]`;
}
return `- ${f.s} | ${f.p} | ${f.o}`;
});
return {
text: lines.join('\n') || '(空白,尚无事实记录)',
predicates,
};
}
function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, historyRange, nextEventId, existingEventCount) {
const { text: factsText, predicates } = formatFactsForLLM(existingFacts);
const predicatesHint = predicates.length > 0
? `\n\n<\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>\n${predicates.join('\u3001')}\n</\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>`
: '';
const jsonFormat = LLM_PROMPT_CONFIG.userJsonFormat
.replace(/\{nextEventId\}/g, String(nextEventId));
const checkContent = LLM_PROMPT_CONFIG.assistantCheck
.replace(/\{existingEventCount\}/g, String(existingEventCount));
const topMessages = [
{ role: 'system', content: LLM_PROMPT_CONFIG.topSystem },
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantDoc },
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskSummary },
{ role: 'user', content: `<\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n${existingSummary}\n</\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n\n<\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>\n${factsText}\n</\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>${predicatesHint}` },
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskContent },
{ role: 'user', content: `<\u65b0\u5bf9\u8bdd\u5185\u5bb9>\uff08${historyRange}\uff09\n${newHistoryText}\n</\u65b0\u5bf9\u8bdd\u5185\u5bb9>` }
];
const bottomMessages = [
{ role: 'user', content: LLM_PROMPT_CONFIG.metaProtocolStart + '\n' + jsonFormat },
{ role: 'assistant', content: checkContent },
{ role: 'user', content: LLM_PROMPT_CONFIG.userConfirm }
];
return {
top64: b64UrlEncode(JSON.stringify(topMessages)),
bottom64: b64UrlEncode(JSON.stringify(bottomMessages)),
assistantPrefill: LLM_PROMPT_CONFIG.assistantPrefill
};
}
// ═══════════════════════════════════════════════════════════════════════════
// JSON 解析
// ═══════════════════════════════════════════════════════════════════════════
export function parseSummaryJson(raw) {
if (!raw) return null;
let cleaned = String(raw).trim()
.replace(/^```(?:json)?\s*/i, "")
.replace(/\s*```$/i, "")
.trim();
try {
return JSON.parse(cleaned);
} catch { }
const start = cleaned.indexOf('{');
const end = cleaned.lastIndexOf('}');
if (start !== -1 && end > start) {
let jsonStr = cleaned.slice(start, end + 1)
.replace(/,(\s*[}\]])/g, '$1');
try {
return JSON.parse(jsonStr);
} catch { }
}
return null;
}
// ═══════════════════════════════════════════════════════════════════════════
// 主生成函数
// ═══════════════════════════════════════════════════════════════════════════
export async function generateSummary(options) {
const {
existingSummary,
existingFacts,
newHistoryText,
historyRange,
nextEventId,
existingEventCount = 0,
llmApi = {},
genParams = {},
useStream = true,
timeout = 120000,
sessionId = 'xb_summary'
} = options;
if (!newHistoryText?.trim()) {
throw new Error('新对话内容为空');
}
const streamingMod = getStreamingModule();
if (!streamingMod) {
throw new Error('生成模块未加载');
}
const promptData = buildSummaryMessages(
existingSummary,
existingFacts,
newHistoryText,
historyRange,
nextEventId,
existingEventCount
);
const args = {
as: 'user',
nonstream: useStream ? 'false' : 'true',
top64: promptData.top64,
bottom64: promptData.bottom64,
bottomassistant: promptData.assistantPrefill,
id: sessionId,
};
if (llmApi.provider && llmApi.provider !== 'st') {
const mappedApi = PROVIDER_MAP[String(llmApi.provider).toLowerCase()];
if (mappedApi) {
args.api = mappedApi;
if (llmApi.url) args.apiurl = llmApi.url;
if (llmApi.key) args.apipassword = llmApi.key;
if (llmApi.model) args.model = llmApi.model;
}
}
if (genParams.temperature != null) args.temperature = genParams.temperature;
if (genParams.top_p != null) args.top_p = genParams.top_p;
if (genParams.top_k != null) args.top_k = genParams.top_k;
if (genParams.presence_penalty != null) args.presence_penalty = genParams.presence_penalty;
if (genParams.frequency_penalty != null) args.frequency_penalty = genParams.frequency_penalty;
let rawOutput;
if (useStream) {
const sid = await streamingMod.xbgenrawCommand(args, '');
rawOutput = await waitForStreamingComplete(sid, streamingMod, timeout);
} else {
rawOutput = await streamingMod.xbgenrawCommand(args, '');
}
console.group('%c[Story-Summary] LLM输出', 'color: #7c3aed; font-weight: bold');
console.log(rawOutput);
console.groupEnd();
return JSON_PREFILL + rawOutput;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,376 @@
// ============================================================================
// atom-extraction.js - L0 场景锚点提取v2 - 场景摘要 + 图结构)
//
// 设计依据:
// - BGE-M3 (BAAI, 2024): 自然语言段落检索精度最高 → semantic = 纯自然语言
// - TransE (Bordes, 2013): s/t/r 三元组方向性 → edges 格式
//
// 每楼层 1-2 个场景锚点非碎片原子60-100 字场景摘要
// ============================================================================
import { callLLM, parseJson } from './llm-service.js';
import { xbLog } from '../../../../core/debug-core.js';
import { filterText } from '../utils/text-filter.js';
const MODULE_ID = 'atom-extraction';
const CONCURRENCY = 10;
const RETRY_COUNT = 2;
const RETRY_DELAY = 500;
const DEFAULT_TIMEOUT = 20000;
const STAGGER_DELAY = 80;
let batchCancelled = false;
export function cancelBatchExtraction() {
batchCancelled = true;
}
export function isBatchCancelled() {
return batchCancelled;
}
// ============================================================================
// L0 提取 Prompt
// ============================================================================
const SYSTEM_PROMPT = `你是场景摘要器。从一轮对话中提取1-2个场景锚点用于语义检索和关系追踪。
输入格式:
<round>
<user name="用户名">...</user>
<assistant>...</assistant>
</round>
只输出严格JSON
{"anchors":[
{
"scene": "60-100字完整场景描述",
"edges": [{"s":"施事方","t":"受事方","r":"互动行为"}],
"where": "地点"
}
]}
## scene 写法
- 纯自然语言,像旁白或日记,不要任何标签/标记/枚举值
- 必须包含:角色名、动作、情感氛围、关键细节
- 读者只看 scene 就能复原这一幕
- 60-100字信息密集但流畅
## edges关系三元组
- s=施事方 t=受事方 r=互动行为(建议 6-12 字,最多 20 字)
- s/t 必须是参与互动的角色正式名称,不用代词或别称
- 只从正文内容中识别角色名,不要把标签名(如 user、assistant当作角色
- r 使用动作模板短语:“动作+对象/结果”(例:“提出交易条件”、“拒绝对方请求”、“当众揭露秘密”、“安抚对方情绪”)
- r 不要写人名,不要复述整句,不要写心理描写或评价词
- r 正例(合格):提出交易条件、拒绝对方请求、当众揭露秘密、安抚对方情绪、强行打断发言、转移谈话焦点
- r 反例不合格我觉得她现在很害怕、他突然非常生气地大喊起来、user开始说话、assistant解释了很多细节
- 每个锚点 1-3 条
## where
- 场景地点,无明确地点时空字符串
## 数量规则
- 最多2个。1个够时不凑2个
- 明显场景切换(地点/时间/对象变化时才2个
- 同一场景不拆分
- 无角色互动时返回 {"anchors":[]}
## 示例
输入:艾拉在火山口举起圣剑刺穿古龙心脏,龙血溅满她的铠甲,她跪倒在地痛哭
输出:
{"anchors":[{"scene":"火山口上艾拉举起圣剑刺穿古龙的心脏,龙血溅满铠甲,古龙轰然倒地,艾拉跪倒在滚烫的岩石上痛哭,完成了她不得不做的弑杀","edges":[{"s":"艾拉","t":"古龙","r":"以圣剑刺穿心脏"}],"where":"火山口"}]}`;
const JSON_PREFILL = '{"anchors":[';
// ============================================================================
// 睡眠工具
// ============================================================================
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
const ACTION_STRIP_WORDS = [
'突然', '非常', '有些', '有点', '轻轻', '悄悄', '缓缓', '立刻',
'马上', '然后', '并且', '而且', '开始', '继续', '再次', '正在',
];
function clamp(v, min, max) {
return Math.max(min, Math.min(max, v));
}
function sanitizeActionPhrase(raw) {
let text = String(raw || '')
.normalize('NFKC')
.replace(/[\u200B-\u200D\uFEFF]/g, '')
.trim();
if (!text) return '';
text = text
.replace(/[,。!?、;:,.!?;:"'“”‘’()[\]{}<>《》]/g, '')
.replace(/\s+/g, '');
for (const word of ACTION_STRIP_WORDS) {
text = text.replaceAll(word, '');
}
text = text.replace(/(地|得|了|着|过)+$/g, '');
if (text.length < 2) return '';
if (text.length > 12) text = text.slice(0, 12);
return text;
}
function calcAtomQuality(scene, edges, where) {
const sceneLen = String(scene || '').length;
const sceneScore = clamp(sceneLen / 80, 0, 1);
const edgeScore = clamp((edges?.length || 0) / 3, 0, 1);
const whereScore = where ? 1 : 0;
const quality = 0.55 * sceneScore + 0.35 * edgeScore + 0.10 * whereScore;
return Number(quality.toFixed(3));
}
// ============================================================================
// 清洗与构建
// ============================================================================
/**
* 清洗 edges 三元组
* @param {object[]} raw
* @returns {object[]}
*/
function sanitizeEdges(raw) {
if (!Array.isArray(raw)) return [];
return raw
.filter(e => e && typeof e === 'object')
.map(e => ({
s: String(e.s || '').trim(),
t: String(e.t || '').trim(),
r: sanitizeActionPhrase(e.r),
}))
.filter(e => e.s && e.t && e.r)
.slice(0, 3);
}
/**
* 将解析后的 anchor 转换为 atom 存储对象
*
* semantic = scene纯自然语言直接用于 embedding
*
* @param {object} anchor - LLM 输出的 anchor 对象
* @param {number} aiFloor - AI 消息楼层号
* @param {number} idx - 同楼层序号0 或 1
* @returns {object|null} atom 对象
*/
function anchorToAtom(anchor, aiFloor, idx) {
const scene = String(anchor.scene || '').trim();
if (!scene) return null;
// scene 过短(< 15 字)可能是噪音
if (scene.length < 15) return null;
const edges = sanitizeEdges(anchor.edges);
const where = String(anchor.where || '').trim();
const quality = calcAtomQuality(scene, edges, where);
return {
atomId: `atom-${aiFloor}-${idx}`,
floor: aiFloor,
source: 'ai',
// ═══ 检索层embedding 的唯一入口) ═══
semantic: scene,
// ═══ 图结构层(扩散的 key ═══
edges,
where,
quality,
};
}
// ============================================================================
// 单轮提取(带重试)
// ============================================================================
async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options = {}) {
const { timeout = DEFAULT_TIMEOUT } = options;
if (!aiMessage?.mes?.trim()) return [];
const parts = [];
const userName = userMessage?.name || '用户';
if (userMessage?.mes?.trim()) {
const userText = filterText(userMessage.mes);
parts.push(`<user name="${userName}">\n${userText}\n</user>`);
}
const aiText = filterText(aiMessage.mes);
parts.push(`<assistant>\n${aiText}\n</assistant>`);
const input = `<round>\n${parts.join('\n')}\n</round>`;
for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
if (batchCancelled) return [];
try {
const response = await callLLM([
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: input },
{ role: 'assistant', content: JSON_PREFILL },
], {
temperature: 0.3,
max_tokens: 600,
timeout,
});
const rawText = String(response || '');
if (!rawText.trim()) {
if (attempt < RETRY_COUNT) {
await sleep(RETRY_DELAY);
continue;
}
return null;
}
const fullJson = JSON_PREFILL + rawText;
let parsed;
try {
parsed = parseJson(fullJson);
} catch (e) {
xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败 (attempt ${attempt})`);
if (attempt < RETRY_COUNT) {
await sleep(RETRY_DELAY);
continue;
}
return null;
}
// 兼容:优先 anchors回退 atoms
const rawAnchors = parsed?.anchors;
if (!rawAnchors || !Array.isArray(rawAnchors)) {
if (attempt < RETRY_COUNT) {
await sleep(RETRY_DELAY);
continue;
}
return null;
}
// 转换为 atom 存储格式(最多 2 个)
const atoms = rawAnchors
.slice(0, 2)
.map((a, idx) => anchorToAtom(a, aiFloor, idx))
.filter(Boolean);
return atoms;
} catch (e) {
if (batchCancelled) return null;
if (attempt < RETRY_COUNT) {
await sleep(RETRY_DELAY * (attempt + 1));
continue;
}
xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e);
return null;
}
}
return null;
}
export async function extractAtomsForRound(userMessage, aiMessage, aiFloor, options = {}) {
return extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options);
}
// ============================================================================
// 批量提取
// ============================================================================
export async function batchExtractAtoms(chat, onProgress) {
if (!chat?.length) return [];
batchCancelled = false;
const pairs = [];
for (let i = 0; i < chat.length; i++) {
if (!chat[i].is_user) {
const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
pairs.push({ userMsg, aiMsg: chat[i], aiFloor: i });
}
}
if (!pairs.length) return [];
const allAtoms = [];
let completed = 0;
let failed = 0;
for (let i = 0; i < pairs.length; i += CONCURRENCY) {
if (batchCancelled) break;
const batch = pairs.slice(i, i + CONCURRENCY);
if (i === 0) {
const promises = batch.map((pair, idx) => (async () => {
await sleep(idx * STAGGER_DELAY);
if (batchCancelled) return;
try {
const atoms = await extractAtomsForRoundWithRetry(
pair.userMsg,
pair.aiMsg,
pair.aiFloor,
{ timeout: DEFAULT_TIMEOUT }
);
if (atoms?.length) {
allAtoms.push(...atoms);
} else if (atoms === null) {
failed++;
}
} catch {
failed++;
}
completed++;
onProgress?.(completed, pairs.length, failed);
})());
await Promise.all(promises);
} else {
const promises = batch.map(pair =>
extractAtomsForRoundWithRetry(
pair.userMsg,
pair.aiMsg,
pair.aiFloor,
{ timeout: DEFAULT_TIMEOUT }
)
.then(atoms => {
if (batchCancelled) return;
if (atoms?.length) {
allAtoms.push(...atoms);
} else if (atoms === null) {
failed++;
}
completed++;
onProgress?.(completed, pairs.length, failed);
})
.catch(() => {
if (batchCancelled) return;
failed++;
completed++;
onProgress?.(completed, pairs.length, failed);
})
);
await Promise.all(promises);
}
if (i + CONCURRENCY < pairs.length && !batchCancelled) {
await sleep(30);
}
}
xbLog.info(MODULE_ID, `批量提取完成: ${allAtoms.length} atoms, ${failed} 失败`);
return allAtoms;
}

View File

@@ -0,0 +1,99 @@
// ═══════════════════════════════════════════════════════════════════════════
// vector/llm/llm-service.js - 修复 prefill 传递方式
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../../core/debug-core.js';
import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'vector-llm-service';
const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1';
const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
let callCounter = 0;
function getStreamingModule() {
const mod = window.xiaobaixStreamingGeneration;
return mod?.xbgenrawCommand ? mod : null;
}
function generateUniqueId(prefix = 'llm') {
callCounter = (callCounter + 1) % 100000;
return `${prefix}-${callCounter}-${Date.now().toString(36)}`;
}
function b64UrlEncode(str) {
const utf8 = new TextEncoder().encode(String(str));
let bin = '';
utf8.forEach(b => bin += String.fromCharCode(b));
return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
}
/**
* 统一LLM调用 - 走酒馆后端(非流式)
* assistant prefill 用 bottomassistant 参数传递
*/
export async function callLLM(messages, options = {}) {
const {
temperature = 0.2,
max_tokens = 500,
} = options;
const mod = getStreamingModule();
if (!mod) throw new Error('Streaming module not ready');
const cfg = getVectorConfig();
const apiKey = cfg?.online?.key || '';
if (!apiKey) {
throw new Error('L0 requires siliconflow API key');
}
// 分离 assistant prefill
let topMessages = [...messages];
let assistantPrefill = '';
if (topMessages.length > 0 && topMessages[topMessages.length - 1]?.role === 'assistant') {
const lastMsg = topMessages.pop();
assistantPrefill = lastMsg.content || '';
}
const top64 = b64UrlEncode(JSON.stringify(topMessages));
const uniqueId = generateUniqueId('l0');
const args = {
as: 'user',
nonstream: 'true',
top64,
id: uniqueId,
temperature: String(temperature),
max_tokens: String(max_tokens),
api: 'openai',
apiurl: SILICONFLOW_API_URL,
apipassword: apiKey,
model: DEFAULT_L0_MODEL,
};
const isQwen3 = String(DEFAULT_L0_MODEL || '').includes('Qwen3');
if (isQwen3) {
args.enable_thinking = 'false';
}
// ★ 用 bottomassistant 参数传递 prefill
if (assistantPrefill) {
args.bottomassistant = assistantPrefill;
}
try {
const result = await mod.xbgenrawCommand(args, '');
return String(result ?? '');
} catch (e) {
xbLog.error(MODULE_ID, 'LLM调用失败', e);
throw e;
}
}
export function parseJson(text) {
if (!text) return null;
let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/i, '').trim();
try { return JSON.parse(s); } catch { }
const i = s.indexOf('{'), j = s.lastIndexOf('}');
if (i !== -1 && j > i) try { return JSON.parse(s.slice(i, j + 1)); } catch { }
return null;
}

View File

@@ -0,0 +1,266 @@
// ═══════════════════════════════════════════════════════════════════════════
// Reranker - 硅基 bge-reranker-v2-m3
// 对候选文档进行精排,过滤与 query 不相关的内容
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../../core/debug-core.js';
import { getApiKey } from './siliconflow.js';
const MODULE_ID = 'reranker';
const RERANK_URL = 'https://api.siliconflow.cn/v1/rerank';
const RERANK_MODEL = 'BAAI/bge-reranker-v2-m3';
const DEFAULT_TIMEOUT = 15000;
const MAX_DOCUMENTS = 100; // API 限制
const RERANK_BATCH_SIZE = 20;
const RERANK_MAX_CONCURRENCY = 5;
/**
* 对文档列表进行 Rerank 精排
*
* @param {string} query - 查询文本
* @param {Array<string>} documents - 文档文本列表
* @param {object} options - 选项
* @param {number} options.topN - 返回前 N 个结果,默认 40
* @param {number} options.timeout - 超时时间,默认 15000ms
* @param {AbortSignal} options.signal - 取消信号
* @returns {Promise<Array<{index: number, relevance_score: number}>>} 排序后的结果
*/
export async function rerank(query, documents, options = {}) {
const { topN = 40, timeout = DEFAULT_TIMEOUT, signal } = options;
if (!query?.trim()) {
xbLog.warn(MODULE_ID, 'query 为空,跳过 rerank');
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
}
if (!documents?.length) {
return { results: [], failed: false };
}
const key = getApiKey();
if (!key) {
xbLog.warn(MODULE_ID, '未配置 API Key跳过 rerank');
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
}
// 截断超长文档列表
const truncatedDocs = documents.slice(0, MAX_DOCUMENTS);
if (documents.length > MAX_DOCUMENTS) {
xbLog.warn(MODULE_ID, `文档数 ${documents.length} 超过限制 ${MAX_DOCUMENTS},已截断`);
}
// 过滤空文档,记录原始索引
const validDocs = [];
const indexMap = []; // validDocs index → original index
for (let i = 0; i < truncatedDocs.length; i++) {
const text = String(truncatedDocs[i] || '').trim();
if (text) {
validDocs.push(text);
indexMap.push(i);
}
}
if (!validDocs.length) {
xbLog.warn(MODULE_ID, '无有效文档,跳过 rerank');
return { results: [], failed: false };
}
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const T0 = performance.now();
const response = await fetch(RERANK_URL, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: RERANK_MODEL,
// Zero-darkbox: do not silently truncate query.
query,
documents: validDocs,
top_n: Math.min(topN, validDocs.length),
return_documents: false,
}),
signal: signal || controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
const errorText = await response.text().catch(() => '');
throw new Error(`Rerank API ${response.status}: ${errorText.slice(0, 200)}`);
}
const data = await response.json();
const results = data.results || [];
// 映射回原始索引
const mapped = results.map(r => ({
index: indexMap[r.index],
relevance_score: r.relevance_score ?? 0,
}));
const elapsed = Math.round(performance.now() - T0);
xbLog.info(MODULE_ID, `Rerank 完成: ${validDocs.length} docs → ${results.length} selected (${elapsed}ms)`);
return { results: mapped, failed: false };
} catch (e) {
clearTimeout(timeoutId);
if (e?.name === 'AbortError') {
xbLog.warn(MODULE_ID, 'Rerank 超时或取消');
} else {
xbLog.error(MODULE_ID, 'Rerank 失败', e);
}
// 降级:返回原顺序,分数均匀分布
return {
results: documents.slice(0, topN).map((_, i) => ({
index: i,
relevance_score: 0,
})),
failed: true,
};
}
}
/**
* 对 chunk 对象列表进行 Rerank
*
* @param {string} query - 查询文本
* @param {Array<object>} chunks - chunk 对象列表,需要有 text 字段
* @param {object} options - 选项
* @returns {Promise<Array<object>>} 排序后的 chunk 列表,带 _rerankScore 字段
*/
export async function rerankChunks(query, chunks, options = {}) {
const { topN = 40, minScore = 0.1 } = options;
if (!chunks?.length) return [];
const texts = chunks.map(c => c.text || c.semantic || '');
// ─── 单批:直接调用 ───
if (texts.length <= RERANK_BATCH_SIZE) {
const { results, failed } = await rerank(query, texts, {
topN: Math.min(topN, texts.length),
timeout: options.timeout,
signal: options.signal,
});
if (failed) {
return chunks.map(c => ({ ...c, _rerankScore: 0, _rerankFailed: true }));
}
return results
.filter(r => r.relevance_score >= minScore)
.sort((a, b) => b.relevance_score - a.relevance_score)
.slice(0, topN)
.map(r => ({
...chunks[r.index],
_rerankScore: r.relevance_score,
}));
}
// ─── 多批:拆分 → 并发 → 合并 ───
const batches = [];
for (let i = 0; i < texts.length; i += RERANK_BATCH_SIZE) {
batches.push({
texts: texts.slice(i, i + RERANK_BATCH_SIZE),
offset: i,
});
}
const concurrency = Math.min(batches.length, RERANK_MAX_CONCURRENCY);
xbLog.info(MODULE_ID, `并发 Rerank: ${batches.length}×${RERANK_BATCH_SIZE} docs, concurrency=${concurrency}`);
const batchResults = new Array(batches.length);
let failedBatches = 0;
const runBatch = async (batchIdx) => {
const batch = batches[batchIdx];
const { results, failed } = await rerank(query, batch.texts, {
topN: batch.texts.length,
timeout: options.timeout,
signal: options.signal,
});
if (failed) {
failedBatches++;
// 单批降级保留原始顺序score=0
batchResults[batchIdx] = batch.texts.map((_, i) => ({
globalIndex: batch.offset + i,
relevance_score: 0,
_batchFailed: true,
}));
} else {
batchResults[batchIdx] = results.map(r => ({
globalIndex: batch.offset + r.index,
relevance_score: r.relevance_score,
}));
}
};
// 并发池
let nextIdx = 0;
const worker = async () => {
while (nextIdx < batches.length) {
const idx = nextIdx++;
await runBatch(idx);
}
};
await Promise.all(Array.from({ length: concurrency }, () => worker()));
// 全部失败 → 整体降级
if (failedBatches === batches.length) {
xbLog.warn(MODULE_ID, `全部 ${batches.length} 批 rerank 失败,整体降级`);
return chunks.slice(0, topN).map(c => ({
...c,
_rerankScore: 0,
_rerankFailed: true,
}));
}
// 合并所有批次结果
const merged = batchResults.flat();
const selected = merged
.filter(r => r._batchFailed || r.relevance_score >= minScore)
.sort((a, b) => b.relevance_score - a.relevance_score)
.slice(0, topN)
.map(r => ({
...chunks[r.globalIndex],
_rerankScore: r.relevance_score,
...(r._batchFailed ? { _rerankFailed: true } : {}),
}));
xbLog.info(MODULE_ID,
`Rerank 合并: ${merged.length} candidates, ${failedBatches}/${batches.length} 批失败, 选中 ${selected.length}`
);
return selected;
}
/**
* 测试 Rerank 服务连接
*/
export async function testRerankService() {
const key = getApiKey();
if (!key) {
throw new Error('请配置硅基 API Key');
}
try {
const { results } = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 });
return {
success: true,
message: `连接成功,返回 ${results.length} 个结果`,
};
} catch (e) {
throw new Error(`连接失败: ${e.message}`);
}
}

View File

@@ -0,0 +1,59 @@
// ═══════════════════════════════════════════════════════════════════════════
// siliconflow.js - 仅保留 Embedding
// ═══════════════════════════════════════════════════════════════════════════
const BASE_URL = 'https://api.siliconflow.cn';
const EMBEDDING_MODEL = 'BAAI/bge-m3';
export function getApiKey() {
try {
const raw = localStorage.getItem('summary_panel_config');
if (raw) {
const parsed = JSON.parse(raw);
return parsed.vector?.online?.key || null;
}
} catch { }
return null;
}
export async function embed(texts, options = {}) {
if (!texts?.length) return [];
const key = getApiKey();
if (!key) throw new Error('未配置硅基 API Key');
const { timeout = 30000, signal } = options;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(`${BASE_URL}/v1/embeddings`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: EMBEDDING_MODEL,
input: texts,
}),
signal: signal || controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
const errorText = await response.text().catch(() => '');
throw new Error(`Embedding ${response.status}: ${errorText.slice(0, 200)}`);
}
const data = await response.json();
return (data.data || [])
.sort((a, b) => a.index - b.index)
.map(item => Array.isArray(item.embedding) ? item.embedding : Array.from(item.embedding));
} finally {
clearTimeout(timeoutId);
}
}
export { EMBEDDING_MODEL as MODELS };

View File

@@ -0,0 +1,391 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Chunk Builder
// 标准 RAG chunking: ~200 tokens per chunk
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../../extensions.js';
import {
getMeta,
updateMeta,
saveChunks,
saveChunkVectors,
clearAllChunks,
deleteChunksFromFloor,
deleteChunksAtFloor,
makeChunkId,
hashText,
CHUNK_MAX_TOKENS,
} from '../storage/chunk-store.js';
import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../../core/debug-core.js';
import { filterText } from '../utils/text-filter.js';
import { extractAndStoreAtomsForRound } from './state-integration.js';
import {
deleteStateAtomsFromFloor,
deleteStateVectorsFromFloor,
deleteL0IndexFromFloor,
} from '../storage/state-store.js';
const MODULE_ID = 'chunk-builder';
// ═══════════════════════════════════════════════════════════════════════════
// Token 估算
// ═══════════════════════════════════════════════════════════════════════════
function estimateTokens(text) {
if (!text) return 0;
const chinese = (text.match(/[\u4e00-\u9fff]/g) || []).length;
const other = text.length - chinese;
return Math.ceil(chinese + other / 4);
}
function splitSentences(text) {
if (!text) return [];
const parts = text.split(/(?<=[。!?\n])|(?<=[.!?]\s)/);
return parts.map(s => s.trim()).filter(s => s.length > 0);
}
// ═══════════════════════════════════════════════════════════════════════════
// Chunk 切分
// ═══════════════════════════════════════════════════════════════════════════
export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
const text = message.mes || '';
const speaker = message.name || (message.is_user ? '用户' : '角色');
const isUser = !!message.is_user;
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
// 3. 移除 <state> 标签硬编码L0 已单独存储)
const cleanText = filterText(text)
.replace(/\[tts:[^\]]*\]/gi, '')
.replace(/<state>[\s\S]*?<\/state>/gi, '')
.trim();
if (!cleanText) return [];
const totalTokens = estimateTokens(cleanText);
if (totalTokens <= maxTokens) {
return [{
chunkId: makeChunkId(floor, 0),
floor,
chunkIdx: 0,
speaker,
isUser,
text: cleanText,
textHash: hashText(cleanText),
}];
}
const sentences = splitSentences(cleanText);
const chunks = [];
let currentSentences = [];
let currentTokens = 0;
for (const sent of sentences) {
const sentTokens = estimateTokens(sent);
if (sentTokens > maxTokens) {
if (currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
currentSentences = [];
currentTokens = 0;
}
const sliceSize = maxTokens * 2;
for (let i = 0; i < sent.length; i += sliceSize) {
const slice = sent.slice(i, i + sliceSize);
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: slice,
textHash: hashText(slice),
});
}
continue;
}
if (currentTokens + sentTokens > maxTokens && currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
currentSentences = [];
currentTokens = 0;
}
currentSentences.push(sent);
currentTokens += sentTokens;
}
if (currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
}
return chunks;
}
// ═══════════════════════════════════════════════════════════════════════════
// 构建状态
// ═══════════════════════════════════════════════════════════════════════════
export async function getChunkBuildStatus() {
const { chat, chatId } = getContext();
if (!chatId) {
return { totalFloors: 0, builtFloors: 0, pending: 0 };
}
const meta = await getMeta(chatId);
const totalFloors = chat?.length || 0;
const builtFloors = meta.lastChunkFloor + 1;
return {
totalFloors,
builtFloors,
lastChunkFloor: meta.lastChunkFloor,
pending: Math.max(0, totalFloors - builtFloors),
};
}
// ═══════════════════════════════════════════════════════════════════════════
// 全量构建
// ═══════════════════════════════════════════════════════════════════════════
export async function buildAllChunks(options = {}) {
const { onProgress, shouldCancel, vectorConfig } = options;
const { chat, chatId } = getContext();
if (!chatId || !chat?.length) {
return { built: 0, errors: 0 };
}
const fingerprint = getEngineFingerprint(vectorConfig);
await clearAllChunks(chatId);
await updateMeta(chatId, { lastChunkFloor: -1, fingerprint });
const allChunks = [];
for (let floor = 0; floor < chat.length; floor++) {
const chunks = chunkMessage(floor, chat[floor]);
allChunks.push(...chunks);
}
if (allChunks.length === 0) {
return { built: 0, errors: 0 };
}
xbLog.info(MODULE_ID, `开始构建 ${allChunks.length} 个 chunks${chat.length} 层楼)`);
await saveChunks(chatId, allChunks);
const texts = allChunks.map(c => c.text);
const batchSize = 20;
let completed = 0;
let errors = 0;
const allVectors = [];
for (let i = 0; i < texts.length; i += batchSize) {
if (shouldCancel?.()) break;
const batch = texts.slice(i, i + batchSize);
try {
const vectors = await embed(batch, vectorConfig);
allVectors.push(...vectors);
completed += batch.length;
onProgress?.(completed, texts.length);
} catch (e) {
xbLog.error(MODULE_ID, `批次 ${i}/${texts.length} 向量化失败`, e);
allVectors.push(...batch.map(() => null));
errors++;
}
}
if (shouldCancel?.()) {
return { built: completed, errors };
}
const vectorItems = allChunks
.map((chunk, idx) => allVectors[idx] ? { chunkId: chunk.chunkId, vector: allVectors[idx] } : null)
.filter(Boolean);
if (vectorItems.length > 0) {
await saveChunkVectors(chatId, vectorItems, fingerprint);
}
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
xbLog.info(MODULE_ID, `构建完成:${vectorItems.length} 个向量,${errors} 个错误`);
return { built: vectorItems.length, errors };
}
// ═══════════════════════════════════════════════════════════════════════════
// 增量构建
// ═══════════════════════════════════════════════════════════════════════════
export async function buildIncrementalChunks(options = {}) {
const { vectorConfig } = options;
const { chat, chatId } = getContext();
if (!chatId || !chat?.length) {
return { built: 0 };
}
const meta = await getMeta(chatId);
const fingerprint = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fingerprint) {
xbLog.warn(MODULE_ID, '引擎指纹不匹配,跳过增量构建');
return { built: 0 };
}
const startFloor = meta.lastChunkFloor + 1;
if (startFloor >= chat.length) {
return { built: 0 };
}
xbLog.info(MODULE_ID, `增量构建 ${startFloor} - ${chat.length - 1}`);
const newChunks = [];
for (let floor = startFloor; floor < chat.length; floor++) {
const chunks = chunkMessage(floor, chat[floor]);
newChunks.push(...chunks);
}
if (newChunks.length === 0) {
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
return { built: 0 };
}
await saveChunks(chatId, newChunks);
const texts = newChunks.map(c => c.text);
try {
const vectors = await embed(texts, vectorConfig);
const vectorItems = newChunks.map((chunk, idx) => ({
chunkId: chunk.chunkId,
vector: vectors[idx],
}));
await saveChunkVectors(chatId, vectorItems, fingerprint);
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
return { built: vectorItems.length };
} catch (e) {
xbLog.error(MODULE_ID, '增量向量化失败', e);
return { built: 0 };
}
}
// ═══════════════════════════════════════════════════════════════════════════
// L1 同步(消息变化时调用)
// ═══════════════════════════════════════════════════════════════════════════
/**
* 消息删除后同步:删除 floor >= newLength 的 chunk
*/
export async function syncOnMessageDeleted(chatId, newLength) {
if (!chatId || newLength < 0) return;
await deleteChunksFromFloor(chatId, newLength);
await updateMeta(chatId, { lastChunkFloor: newLength - 1 });
xbLog.info(MODULE_ID, `消息删除同步:删除 floor >= ${newLength}`);
}
/**
* swipe 后同步:删除最后楼层的 chunk等待后续重建
*/
export async function syncOnMessageSwiped(chatId, lastFloor) {
if (!chatId || lastFloor < 0) return;
await deleteChunksAtFloor(chatId, lastFloor);
await updateMeta(chatId, { lastChunkFloor: lastFloor - 1 });
xbLog.info(MODULE_ID, `swipe 同步:删除 floor ${lastFloor}`);
}
/**
* 新消息后同步:删除 + 重建最后楼层
*/
export async function syncOnMessageReceived(chatId, lastFloor, message, vectorConfig, onL0Complete) {
if (!chatId || lastFloor < 0 || !message) return { built: 0, chunks: [] };
if (!vectorConfig?.enabled) return { built: 0, chunks: [] };
// 删除该楼层旧的
await deleteChunksAtFloor(chatId, lastFloor);
// 重建
const chunks = chunkMessage(lastFloor, message);
if (chunks.length === 0) return { built: 0, chunks: [] };
await saveChunks(chatId, chunks);
// 向量化
const fingerprint = getEngineFingerprint(vectorConfig);
const texts = chunks.map(c => c.text);
let vectorized = false;
try {
const vectors = await embed(texts, vectorConfig);
const items = chunks.map((c, i) => ({ chunkId: c.chunkId, vector: vectors[i] }));
await saveChunkVectors(chatId, items, fingerprint);
await updateMeta(chatId, { lastChunkFloor: lastFloor });
vectorized = true;
xbLog.info(MODULE_ID, `消息同步:重建 floor ${lastFloor}${chunks.length} 个 chunk`);
} catch (e) {
xbLog.error(MODULE_ID, `消息同步失败floor ${lastFloor}`, e);
}
// L0 配对提取(仅 AI 消息触发)
if (!message.is_user) {
const { chat } = getContext();
const userFloor = lastFloor - 1;
const userMessage = (userFloor >= 0 && chat[userFloor]?.is_user) ? chat[userFloor] : null;
// L0 先删后建(与 L1 deleteChunksAtFloor 对称)
// regenerate / swipe 后新消息覆盖旧楼时,清理旧 atoms
deleteStateAtomsFromFloor(lastFloor);
deleteL0IndexFromFloor(lastFloor);
await deleteStateVectorsFromFloor(chatId, lastFloor);
try {
await extractAndStoreAtomsForRound(lastFloor, message, userMessage, onL0Complete);
} catch (e) {
xbLog.warn(MODULE_ID, `Atom 提取失败: floor ${lastFloor}`, e);
}
}
return { built: vectorized ? chunks.length : 0, chunks };
}

View File

@@ -0,0 +1,486 @@
// ============================================================================
// state-integration.js - L0 状态层集成
// Phase 1: 批量 LLM 提取(只存文本)
// Phase 2: 统一向量化(提取完成后)
// ============================================================================
import { getContext } from '../../../../../../../extensions.js';
import { saveMetadataDebounced } from '../../../../../../../extensions.js';
import { xbLog } from '../../../../core/debug-core.js';
import {
saveStateAtoms,
saveStateVectors,
deleteStateAtomsFromFloor,
deleteStateVectorsFromFloor,
getStateAtoms,
clearStateAtoms,
clearStateVectors,
getL0FloorStatus,
setL0FloorStatus,
clearL0Index,
deleteL0IndexFromFloor,
} from '../storage/state-store.js';
import { embed } from '../llm/siliconflow.js';
import { extractAtomsForRound, cancelBatchExtraction } from '../llm/atom-extraction.js';
import { getVectorConfig } from '../../data/config.js';
import { getEngineFingerprint } from '../utils/embedder.js';
import { filterText } from '../utils/text-filter.js';
const MODULE_ID = 'state-integration';
// ★ 并发配置
const CONCURRENCY = 50;
const STAGGER_DELAY = 15;
const DEBUG_CONCURRENCY = true;
const R_AGG_MAX_CHARS = 256;
let initialized = false;
let extractionCancelled = false;
export function cancelL0Extraction() {
extractionCancelled = true;
cancelBatchExtraction();
}
// ============================================================================
// 初始化
// ============================================================================
export function initStateIntegration() {
if (initialized) return;
initialized = true;
globalThis.LWB_StateRollbackHook = handleStateRollback;
xbLog.info(MODULE_ID, 'L0 状态层集成已初始化');
}
// ============================================================================
// 统计
// ============================================================================
export async function getAnchorStats() {
const { chat } = getContext();
if (!chat?.length) {
return { extracted: 0, total: 0, pending: 0, empty: 0, fail: 0 };
}
// 统计 AI 楼层
const aiFloors = [];
for (let i = 0; i < chat.length; i++) {
if (!chat[i]?.is_user) aiFloors.push(i);
}
let ok = 0;
let empty = 0;
let fail = 0;
for (const f of aiFloors) {
const s = getL0FloorStatus(f);
if (!s) continue;
if (s.status === 'ok') ok++;
else if (s.status === 'empty') empty++;
else if (s.status === 'fail') fail++;
}
const total = aiFloors.length;
const processed = ok + empty + fail;
const pending = Math.max(0, total - processed);
return {
extracted: ok + empty,
total,
pending,
empty,
fail
};
}
// ============================================================================
// 增量提取 - Phase 1 提取文本Phase 2 统一向量化
// ============================================================================
function buildL0InputText(userMessage, aiMessage) {
const parts = [];
const userName = userMessage?.name || '用户';
const aiName = aiMessage?.name || '角色';
if (userMessage?.mes?.trim()) {
parts.push(`【用户:${userName}\n${filterText(userMessage.mes).trim()}`);
}
if (aiMessage?.mes?.trim()) {
parts.push(`【角色:${aiName}\n${filterText(aiMessage.mes).trim()}`);
}
return parts.join('\n\n---\n\n').trim();
}
function buildRAggregateText(atom) {
const uniq = new Set();
for (const edge of (atom?.edges || [])) {
const r = String(edge?.r || '').trim();
if (!r) continue;
uniq.add(r);
}
const joined = [...uniq].join(' ; ');
if (!joined) return String(atom?.semantic || '').trim();
return joined.length > R_AGG_MAX_CHARS ? joined.slice(0, R_AGG_MAX_CHARS) : joined;
}
export async function incrementalExtractAtoms(chatId, chat, onProgress, options = {}) {
const { maxFloors = Infinity } = options;
if (!chatId || !chat?.length) return { built: 0 };
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return { built: 0 };
// ★ 重置取消标志
extractionCancelled = false;
const pendingPairs = [];
for (let i = 0; i < chat.length; i++) {
const msg = chat[i];
if (!msg || msg.is_user) continue;
const st = getL0FloorStatus(i);
// ★ 只跳过 ok 和 emptyfail 的可以重试
if (st?.status === 'ok' || st?.status === 'empty') {
continue;
}
const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
const inputText = buildL0InputText(userMsg, msg);
if (!inputText) {
setL0FloorStatus(i, { status: 'empty', reason: 'filtered_empty', atoms: 0 });
continue;
}
pendingPairs.push({ userMsg, aiMsg: msg, aiFloor: i });
}
// 限制单次提取楼层数(自动触发时使用)
if (pendingPairs.length > maxFloors) {
pendingPairs.length = maxFloors;
}
if (!pendingPairs.length) {
onProgress?.('已全部提取', 0, 0);
return { built: 0 };
}
xbLog.info(MODULE_ID, `增量 L0 提取pending=${pendingPairs.length}, concurrency=${CONCURRENCY}`);
let completed = 0;
let failed = 0;
const total = pendingPairs.length;
let builtAtoms = 0;
let active = 0;
let peakActive = 0;
const tStart = performance.now();
// ★ Phase 1: 收集所有新提取的 atoms不向量化
const allNewAtoms = [];
// ★ 30 并发批次处理
// 并发池处理(保持固定并发度)
const poolSize = Math.min(CONCURRENCY, pendingPairs.length);
let nextIndex = 0;
let started = 0;
const runWorker = async (workerId) => {
while (true) {
if (extractionCancelled) return;
const idx = nextIndex++;
if (idx >= pendingPairs.length) return;
const pair = pendingPairs[idx];
const stagger = started++;
if (STAGGER_DELAY > 0) {
await new Promise(r => setTimeout(r, stagger * STAGGER_DELAY));
}
if (extractionCancelled) return;
const floor = pair.aiFloor;
const prev = getL0FloorStatus(floor);
active++;
if (active > peakActive) peakActive = active;
if (DEBUG_CONCURRENCY && (idx % 10 === 0)) {
xbLog.info(MODULE_ID, `L0 pool start idx=${idx} active=${active} peak=${peakActive} worker=${workerId}`);
}
try {
const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
if (extractionCancelled) return;
if (atoms == null) {
throw new Error('llm_failed');
}
if (!atoms.length) {
setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
} else {
atoms.forEach(a => a.chatId = chatId);
saveStateAtoms(atoms);
// Phase 1: 只收集,不向量化
allNewAtoms.push(...atoms);
setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
builtAtoms += atoms.length;
}
} catch (e) {
if (extractionCancelled) return;
setL0FloorStatus(floor, {
status: 'fail',
attempts: (prev?.attempts || 0) + 1,
reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
});
failed++;
} finally {
active--;
if (!extractionCancelled) {
completed++;
onProgress?.(`提取: ${completed}/${total}`, completed, total);
}
if (DEBUG_CONCURRENCY && (completed % 25 === 0 || completed === total)) {
const elapsed = Math.max(1, Math.round(performance.now() - tStart));
xbLog.info(MODULE_ID, `L0 pool progress=${completed}/${total} active=${active} peak=${peakActive} elapsedMs=${elapsed}`);
}
}
}
};
await Promise.all(Array.from({ length: poolSize }, (_, i) => runWorker(i)));
if (DEBUG_CONCURRENCY) {
const elapsed = Math.max(1, Math.round(performance.now() - tStart));
xbLog.info(MODULE_ID, `L0 pool done completed=${completed}/${total} failed=${failed} peakActive=${peakActive} elapsedMs=${elapsed}`);
}
try {
saveMetadataDebounced?.();
} catch { }
// ★ Phase 2: 统一向量化所有新提取的 atoms
if (allNewAtoms.length > 0 && !extractionCancelled) {
onProgress?.(`向量化 L0: 0/${allNewAtoms.length}`, 0, allNewAtoms.length);
await vectorizeAtoms(chatId, allNewAtoms, (current, total) => {
onProgress?.(`向量化 L0: ${current}/${total}`, current, total);
});
}
xbLog.info(MODULE_ID, `L0 ${extractionCancelled ? '已取消' : '完成'}atoms=${builtAtoms}, completed=${completed}/${total}, failed=${failed}`);
return { built: builtAtoms };
}
// ============================================================================
// 向量化(支持进度回调)
// ============================================================================
async function vectorizeAtoms(chatId, atoms, onProgress) {
if (!atoms?.length) return;
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
const semanticTexts = atoms.map(a => a.semantic);
const rTexts = atoms.map(a => buildRAggregateText(a));
const fingerprint = getEngineFingerprint(vectorCfg);
const batchSize = 20;
try {
const allVectors = [];
for (let i = 0; i < semanticTexts.length; i += batchSize) {
if (extractionCancelled) break;
const semBatch = semanticTexts.slice(i, i + batchSize);
const rBatch = rTexts.slice(i, i + batchSize);
const payload = semBatch.concat(rBatch);
const vectors = await embed(payload, { timeout: 30000 });
const split = semBatch.length;
if (!Array.isArray(vectors) || vectors.length < split * 2) {
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
}
const semVectors = vectors.slice(0, split);
const rVectors = vectors.slice(split, split + split);
for (let j = 0; j < split; j++) {
allVectors.push({
vector: semVectors[j],
rVector: rVectors[j] || semVectors[j],
});
}
onProgress?.(allVectors.length, semanticTexts.length);
}
if (extractionCancelled) return;
const items = atoms.slice(0, allVectors.length).map((a, i) => ({
atomId: a.atomId,
floor: a.floor,
vector: allVectors[i].vector,
rVector: allVectors[i].rVector,
}));
await saveStateVectors(chatId, items, fingerprint);
xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length}`);
} catch (e) {
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
}
}
// ============================================================================
// 清空
// ============================================================================
export async function clearAllAtomsAndVectors(chatId) {
clearStateAtoms();
clearL0Index();
if (chatId) {
await clearStateVectors(chatId);
}
// ★ 立即保存
try {
saveMetadataDebounced?.();
} catch { }
xbLog.info(MODULE_ID, '已清空所有记忆锚点');
}
// ============================================================================
// 实时增量AI 消息后触发)- 保持不变
// ============================================================================
let extractionQueue = [];
let isProcessing = false;
export async function extractAndStoreAtomsForRound(aiFloor, aiMessage, userMessage, onComplete) {
const { chatId } = getContext();
if (!chatId) return;
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
extractionQueue.push({ aiFloor, aiMessage, userMessage, chatId, onComplete });
processQueue();
}
async function processQueue() {
if (isProcessing || extractionQueue.length === 0) return;
isProcessing = true;
while (extractionQueue.length > 0) {
const { aiFloor, aiMessage, userMessage, chatId, onComplete } = extractionQueue.shift();
try {
const atoms = await extractAtomsForRound(userMessage, aiMessage, aiFloor, { timeout: 12000 });
if (!atoms?.length) {
xbLog.info(MODULE_ID, `floor ${aiFloor}: 无有效 atoms`);
onComplete?.({ floor: aiFloor, atomCount: 0 });
continue;
}
atoms.forEach(a => a.chatId = chatId);
saveStateAtoms(atoms);
// 单楼实时处理:立即向量化
await vectorizeAtomsSimple(chatId, atoms);
xbLog.info(MODULE_ID, `floor ${aiFloor}: ${atoms.length} atoms 已存储`);
onComplete?.({ floor: aiFloor, atomCount: atoms.length });
} catch (e) {
xbLog.error(MODULE_ID, `floor ${aiFloor} 处理失败`, e);
onComplete?.({ floor: aiFloor, atomCount: 0, error: e });
}
}
isProcessing = false;
}
// 简单向量化(无进度回调,用于单楼实时处理)
async function vectorizeAtomsSimple(chatId, atoms) {
if (!atoms?.length) return;
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
const semanticTexts = atoms.map(a => a.semantic);
const rTexts = atoms.map(a => buildRAggregateText(a));
const fingerprint = getEngineFingerprint(vectorCfg);
try {
const vectors = await embed(semanticTexts.concat(rTexts), { timeout: 30000 });
const split = semanticTexts.length;
if (!Array.isArray(vectors) || vectors.length < split * 2) {
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
}
const semVectors = vectors.slice(0, split);
const rVectors = vectors.slice(split, split + split);
const items = atoms.map((a, i) => ({
atomId: a.atomId,
floor: a.floor,
vector: semVectors[i],
rVector: rVectors[i] || semVectors[i],
}));
await saveStateVectors(chatId, items, fingerprint);
} catch (e) {
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
}
}
// ============================================================================
// 回滚钩子
// ============================================================================
async function handleStateRollback(floor) {
xbLog.info(MODULE_ID, `收到回滚请求: floor >= ${floor}`);
const { chatId } = getContext();
deleteStateAtomsFromFloor(floor);
deleteL0IndexFromFloor(floor);
if (chatId) {
await deleteStateVectorsFromFloor(chatId, floor);
}
}
// ============================================================================
// 兼容旧接口
// ============================================================================
export async function batchExtractAndStoreAtoms(chatId, chat, onProgress) {
if (!chatId || !chat?.length) return { built: 0 };
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return { built: 0 };
xbLog.info(MODULE_ID, `开始批量 L0 提取: ${chat.length} 条消息`);
clearStateAtoms();
clearL0Index();
await clearStateVectors(chatId);
return await incrementalExtractAtoms(chatId, chat, onProgress);
}
export async function rebuildStateVectors(chatId, vectorCfg) {
if (!chatId || !vectorCfg?.enabled) return { built: 0 };
const atoms = getStateAtoms();
if (!atoms.length) return { built: 0 };
xbLog.info(MODULE_ID, `重建 L0 向量: ${atoms.length} 条 atom`);
await clearStateVectors(chatId);
await vectorizeAtomsSimple(chatId, atoms);
return { built: atoms.length };
}

View File

@@ -0,0 +1,928 @@
// ═══════════════════════════════════════════════════════════════════════════
// diffusion.js - PPR Graph Diffusion (Personalized PageRank)
//
// Spreads activation from seed L0 atoms through entity co-occurrence graph
// to discover narratively-connected but semantically-distant memories.
//
// Pipeline position: recall.js Stage 7.5
// Input: seeds (reranked L0 from Stage 6)
// Output: additional L0 atoms → merged into l0Selected
//
// Algorithm:
// 1. Build undirected weighted graph over all L0 atoms
// Candidate edges: WHAT + R semantic; WHO/WHERE are reweight-only
// 2. Personalized PageRank (Power Iteration)
// Seeds weighted by rerankScore — Haveliwala (2002) topic-sensitive variant
// α = 0.15 restart probability — Page et al. (1998)
// 3. Post-verification (Dense Cosine Gate)
// Exclude seeds, cosine ≥ 0.45, final = PPR_norm × cosine ≥ 0.10
//
// References:
// Page et al. "The PageRank Citation Ranking" (1998)
// Haveliwala "Topic-Sensitive PageRank" (IEEE TKDE 2003)
// Langville & Meyer "Eigenvector Methods for Web IR" (SIAM Review 2005)
// Sun et al. "GraftNet" (EMNLP 2018)
// Jaccard "Étude comparative de la distribution florale" (1912)
// Szymkiewicz "Une contribution statistique" (1934) — Overlap coefficient
// Rimmon-Kenan "Narrative Fiction" (2002) — Channel weight rationale
//
// Core PPR iteration aligned with NetworkX pagerank():
// github.com/networkx/networkx — algorithms/link_analysis/pagerank_alg.py
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../../core/debug-core.js';
import { getContext } from '../../../../../../../extensions.js';
const MODULE_ID = 'diffusion';
// ═══════════════════════════════════════════════════════════════════════════
// Configuration
// ═══════════════════════════════════════════════════════════════════════════
const CONFIG = {
// PPR parameters (Page et al. 1998; GraftNet 2018 uses same values)
ALPHA: 0.15, // restart probability
EPSILON: 1e-5, // L1 convergence threshold
MAX_ITER: 50, // hard iteration cap (typically converges in 15-25)
// Edge weight channel coefficients
// Candidate generation uses WHAT + R semantic only.
// WHO/WHERE are reweight-only signals.
GAMMA: {
what: 0.40, // interaction pair overlap
rSem: 0.40, // semantic similarity over edges.r aggregate
who: 0.10, // endpoint entity overlap (reweight-only)
where: 0.05, // location exact match (reweight-only)
time: 0.05, // temporal decay score
},
// R semantic candidate generation
R_SEM_MIN_SIM: 0.62,
R_SEM_TOPK: 8,
TIME_WINDOW_MAX: 80,
TIME_DECAY_DIVISOR: 12,
WHERE_MAX_GROUP_SIZE: 16, // skip location-only pair expansion for over-common places
WHERE_FREQ_DAMP_PIVOT: 6, // location freq <= pivot keeps full WHERE score
WHERE_FREQ_DAMP_MIN: 0.20, // lower bound for damped WHERE contribution
// Post-verification (Cosine Gate)
COSINE_GATE: 0.46, // min cosine(queryVector, stateVector)
SCORE_FLOOR: 0.10, // min finalScore = PPR_normalized × cosine
DIFFUSION_CAP: 100, // max diffused nodes (excluding seeds)
};
// ═══════════════════════════════════════════════════════════════════════════
// Utility functions
// ═══════════════════════════════════════════════════════════════════════════
/**
* Unicode-safe text normalization (matches recall.js / entity-lexicon.js)
*/
function normalize(s) {
return String(s || '')
.normalize('NFKC')
.replace(/[\u200B-\u200D\uFEFF]/g, '')
.trim()
.toLowerCase();
}
/**
* Cosine similarity between two vectors
*/
function cosineSimilarity(a, b) {
if (!a?.length || !b?.length || a.length !== b.length) return 0;
let dot = 0, nA = 0, nB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
nA += a[i] * a[i];
nB += b[i] * b[i];
}
return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
}
// ═══════════════════════════════════════════════════════════════════════════
// Feature extraction from L0 atoms
// ═══════════════════════════════════════════════════════════════════════════
/**
* Endpoint entity set from edges.s/edges.t (used for candidate pair generation).
* @param {object} atom
* @param {Set<string>} excludeEntities - entities to exclude (e.g. name1)
* @returns {Set<string>}
*/
function extractEntities(atom, excludeEntities = new Set()) {
const set = new Set();
for (const e of (atom.edges || [])) {
const s = normalize(e?.s);
const t = normalize(e?.t);
if (s && !excludeEntities.has(s)) set.add(s);
if (t && !excludeEntities.has(t)) set.add(t);
}
return set;
}
/**
* WHAT channel: interaction pairs "A↔B" (direction-insensitive).
* @param {object} atom
* @param {Set<string>} excludeEntities
* @returns {Set<string>}
*/
function extractInteractionPairs(atom, excludeEntities = new Set()) {
const set = new Set();
for (const e of (atom.edges || [])) {
const s = normalize(e?.s);
const t = normalize(e?.t);
if (s && t && !excludeEntities.has(s) && !excludeEntities.has(t)) {
const pair = [s, t].sort().join('\u2194');
set.add(pair);
}
}
return set;
}
/**
* WHERE channel: normalized location string
* @param {object} atom
* @returns {string} empty string if absent
*/
function extractLocation(atom) {
return normalize(atom.where);
}
function getFloorDistance(a, b) {
const fa = Number(a?.floor || 0);
const fb = Number(b?.floor || 0);
return Math.abs(fa - fb);
}
function getTimeScore(distance) {
return Math.exp(-distance / CONFIG.TIME_DECAY_DIVISOR);
}
// ═══════════════════════════════════════════════════════════════════════════
// Set similarity functions
// ═══════════════════════════════════════════════════════════════════════════
/**
* Jaccard index: |A∩B| / |AB| (Jaccard 1912)
* @param {Set<string>} a
* @param {Set<string>} b
* @returns {number} 0..1
*/
function jaccard(a, b) {
if (!a.size || !b.size) return 0;
let inter = 0;
const [smaller, larger] = a.size <= b.size ? [a, b] : [b, a];
for (const x of smaller) {
if (larger.has(x)) inter++;
}
const union = a.size + b.size - inter;
return union > 0 ? inter / union : 0;
}
/**
* Overlap coefficient: |A∩B| / min(|A|,|B|) (Szymkiewicz-Simpson 1934)
* Used for directed pairs where set sizes are small (1-3); Jaccard
* over-penalizes small-set asymmetry.
* @param {Set<string>} a
* @param {Set<string>} b
* @returns {number} 0..1
*/
function overlapCoefficient(a, b) {
if (!a.size || !b.size) return 0;
let inter = 0;
const [smaller, larger] = a.size <= b.size ? [a, b] : [b, a];
for (const x of smaller) {
if (larger.has(x)) inter++;
}
return inter / smaller.size;
}
// ═══════════════════════════════════════════════════════════════════════════
// Graph construction
//
// Candidate pairs discovered via WHAT inverted index and R semantic top-k.
// WHO/WHERE are reweight-only signals and never create candidate pairs.
// ═══════════════════════════════════════════════════════════════════════════
/**
* Pre-extract features for all atoms
* @param {object[]} allAtoms
* @param {Set<string>} excludeEntities
* @returns {object[]} feature objects with entities/interactionPairs/location
*/
function extractAllFeatures(allAtoms, excludeEntities = new Set()) {
return allAtoms.map(atom => ({
entities: extractEntities(atom, excludeEntities),
interactionPairs: extractInteractionPairs(atom, excludeEntities),
location: extractLocation(atom),
}));
}
/**
* Build inverted index: value → list of atom indices
* @param {object[]} features
* @returns {{ whatIndex: Map, locationFreq: Map }}
*/
function buildInvertedIndices(features) {
const whatIndex = new Map();
const locationFreq = new Map();
for (let i = 0; i < features.length; i++) {
for (const pair of features[i].interactionPairs) {
if (!whatIndex.has(pair)) whatIndex.set(pair, []);
whatIndex.get(pair).push(i);
}
const loc = features[i].location;
if (loc) locationFreq.set(loc, (locationFreq.get(loc) || 0) + 1);
}
return { whatIndex, locationFreq };
}
/**
* Collect candidate pairs from inverted index
* @param {Map} index - value → [atomIndex, ...]
* @param {Set<number>} pairSet - packed pair collector
* @param {number} N - total atom count (for pair packing)
*/
function collectPairsFromIndex(index, pairSet, N) {
for (const indices of index.values()) {
for (let a = 0; a < indices.length; a++) {
for (let b = a + 1; b < indices.length; b++) {
const lo = Math.min(indices[a], indices[b]);
const hi = Math.max(indices[a], indices[b]);
pairSet.add(lo * N + hi);
}
}
}
}
/**
* Build weighted undirected graph over L0 atoms.
*
* @param {object[]} allAtoms
* @param {object[]} stateVectors
* @param {Set<string>} excludeEntities
* @returns {{ neighbors: object[][], edgeCount: number, channelStats: object, buildTime: number }}
*/
function buildGraph(allAtoms, stateVectors = [], excludeEntities = new Set()) {
const N = allAtoms.length;
const T0 = performance.now();
const features = extractAllFeatures(allAtoms, excludeEntities);
const { whatIndex, locationFreq } = buildInvertedIndices(features);
// Candidate pairs: WHAT + R semantic
const pairSetByWhat = new Set();
const pairSetByRSem = new Set();
const rSemByPair = new Map();
const pairSet = new Set();
collectPairsFromIndex(whatIndex, pairSetByWhat, N);
const rVectorByAtomId = new Map(
(stateVectors || [])
.filter(v => v?.atomId && v?.rVector?.length)
.map(v => [v.atomId, v.rVector])
);
const rVectors = allAtoms.map(a => rVectorByAtomId.get(a.atomId) || null);
const directedNeighbors = Array.from({ length: N }, () => []);
let rSemSimSum = 0;
let rSemSimCount = 0;
let topKPrunedPairs = 0;
let timeWindowFilteredPairs = 0;
// Enumerate only pairs within floor window to avoid O(N^2) full scan.
const sortedByFloor = allAtoms
.map((atom, idx) => ({ idx, floor: Number(atom?.floor || 0) }))
.sort((a, b) => a.floor - b.floor);
for (let left = 0; left < sortedByFloor.length; left++) {
const i = sortedByFloor[left].idx;
const baseFloor = sortedByFloor[left].floor;
for (let right = left + 1; right < sortedByFloor.length; right++) {
const floorDelta = sortedByFloor[right].floor - baseFloor;
if (floorDelta > CONFIG.TIME_WINDOW_MAX) break;
const j = sortedByFloor[right].idx;
const vi = rVectors[i];
const vj = rVectors[j];
if (!vi?.length || !vj?.length) continue;
const sim = cosineSimilarity(vi, vj);
if (sim < CONFIG.R_SEM_MIN_SIM) continue;
directedNeighbors[i].push({ target: j, sim });
directedNeighbors[j].push({ target: i, sim });
rSemSimSum += sim;
rSemSimCount++;
}
}
for (let i = 0; i < N; i++) {
const arr = directedNeighbors[i];
if (!arr.length) continue;
arr.sort((a, b) => b.sim - a.sim);
if (arr.length > CONFIG.R_SEM_TOPK) {
topKPrunedPairs += arr.length - CONFIG.R_SEM_TOPK;
}
for (const n of arr.slice(0, CONFIG.R_SEM_TOPK)) {
const lo = Math.min(i, n.target);
const hi = Math.max(i, n.target);
const packed = lo * N + hi;
pairSetByRSem.add(packed);
const prev = rSemByPair.get(packed) || 0;
if (n.sim > prev) rSemByPair.set(packed, n.sim);
}
}
for (const p of pairSetByWhat) pairSet.add(p);
for (const p of pairSetByRSem) pairSet.add(p);
// Compute edge weights for all candidates
const neighbors = Array.from({ length: N }, () => []);
let edgeCount = 0;
const channelStats = { what: 0, where: 0, rSem: 0, who: 0 };
let reweightWhoUsed = 0;
let reweightWhereUsed = 0;
for (const packed of pairSet) {
const i = Math.floor(packed / N);
const j = packed % N;
const distance = getFloorDistance(allAtoms[i], allAtoms[j]);
if (distance > CONFIG.TIME_WINDOW_MAX) {
timeWindowFilteredPairs++;
continue;
}
const wTime = getTimeScore(distance);
const fi = features[i];
const fj = features[j];
const wWhat = overlapCoefficient(fi.interactionPairs, fj.interactionPairs);
const wRSem = rSemByPair.get(packed) || 0;
const wWho = jaccard(fi.entities, fj.entities);
let wWhere = 0.0;
if (fi.location && fi.location === fj.location) {
const freq = locationFreq.get(fi.location) || 1;
const damp = Math.max(
CONFIG.WHERE_FREQ_DAMP_MIN,
Math.min(1, CONFIG.WHERE_FREQ_DAMP_PIVOT / Math.max(1, freq))
);
wWhere = damp;
}
const weight =
CONFIG.GAMMA.what * wWhat +
CONFIG.GAMMA.rSem * wRSem +
CONFIG.GAMMA.who * wWho +
CONFIG.GAMMA.where * wWhere +
CONFIG.GAMMA.time * wTime;
if (weight > 0) {
neighbors[i].push({ target: j, weight });
neighbors[j].push({ target: i, weight });
edgeCount++;
if (wWhat > 0) channelStats.what++;
if (wRSem > 0) channelStats.rSem++;
if (wWho > 0) channelStats.who++;
if (wWhere > 0) channelStats.where++;
if (wWho > 0) reweightWhoUsed++;
if (wWhere > 0) reweightWhereUsed++;
}
}
const buildTime = Math.round(performance.now() - T0);
xbLog.info(MODULE_ID,
`Graph: ${N} nodes, ${edgeCount} edges ` +
`(candidate_by_what=${pairSetByWhat.size} candidate_by_r_sem=${pairSetByRSem.size}) ` +
`(what=${channelStats.what} r_sem=${channelStats.rSem} who=${channelStats.who} where=${channelStats.where}) ` +
`(reweight_who_used=${reweightWhoUsed} reweight_where_used=${reweightWhereUsed}) ` +
`(time_window_filtered=${timeWindowFilteredPairs} topk_pruned=${topKPrunedPairs}) ` +
`(${buildTime}ms)`
);
const totalPairs = N > 1 ? (N * (N - 1)) / 2 : 0;
const edgeDensity = totalPairs > 0 ? Number((edgeCount / totalPairs * 100).toFixed(2)) : 0;
return {
neighbors,
edgeCount,
channelStats,
buildTime,
candidatePairs: pairSet.size,
pairsFromWhat: pairSetByWhat.size,
pairsFromRSem: pairSetByRSem.size,
rSemAvgSim: rSemSimCount ? Number((rSemSimSum / rSemSimCount).toFixed(3)) : 0,
timeWindowFilteredPairs,
topKPrunedPairs,
reweightWhoUsed,
reweightWhereUsed,
edgeDensity,
};
}
// ═══════════════════════════════════════════════════════════════════════════
// PPR: Seed vector construction
// ═══════════════════════════════════════════════════════════════════════════
/**
* Build personalization vector s from seeds, weighted by rerankScore.
* Haveliwala (2002): non-uniform personalization improves topic sensitivity.
*
* @param {object[]} seeds - seed L0 entries with atomId and rerankScore
* @param {Map<string, number>} idToIdx - atomId → array index
* @param {number} N - total node count
* @returns {Float64Array} personalization vector (L1-normalized, sums to 1)
*/
function buildSeedVector(seeds, idToIdx, N) {
const s = new Float64Array(N);
let total = 0;
for (const seed of seeds) {
const idx = idToIdx.get(seed.atomId);
if (idx == null) continue;
const score = Math.max(0, seed.rerankScore || seed.similarity || 0);
s[idx] += score;
total += score;
}
// L1 normalize to probability distribution
if (total > 0) {
for (let i = 0; i < N; i++) s[i] /= total;
}
return s;
}
// ═══════════════════════════════════════════════════════════════════════════
// PPR: Column normalization + dangling node detection
// ═══════════════════════════════════════════════════════════════════════════
/**
* Column-normalize adjacency into transition matrix W.
*
* Column j of W: W_{ij} = weight(i,j) / Σ_k weight(k,j)
* Dangling nodes (no outgoing edges): handled in powerIteration
* via redistribution to personalization vector s.
* (Langville & Meyer 2005, §4.1)
*
* @param {object[][]} neighbors - neighbors[j] = [{target, weight}, ...]
* @param {number} N
* @returns {{ columns: object[][], dangling: number[] }}
*/
function columnNormalize(neighbors, N) {
const columns = Array.from({ length: N }, () => []);
const dangling = [];
for (let j = 0; j < N; j++) {
const edges = neighbors[j];
let sum = 0;
for (let e = 0; e < edges.length; e++) sum += edges[e].weight;
if (sum <= 0) {
dangling.push(j);
continue;
}
const col = columns[j];
for (let e = 0; e < edges.length; e++) {
col.push({ target: edges[e].target, prob: edges[e].weight / sum });
}
}
return { columns, dangling };
}
// ═══════════════════════════════════════════════════════════════════════════
// PPR: Power Iteration
//
// Aligned with NetworkX pagerank() (pagerank_alg.py):
//
// NetworkX "alpha" = damping = our (1 α)
// NetworkX "1-alpha" = teleportation = our α
//
// Per iteration:
// π_new[i] = α·s[i] + (1α)·( Σ_j W_{ij}·π[j] + dangling_sum·s[i] )
//
// Convergence: Perron-Frobenius theorem guarantees unique stationary
// distribution for irreducible aperiodic column-stochastic matrix.
// Rate: ‖π^(t+1) π^t‖₁ ≤ (1α)^t (geometric).
// ═══════════════════════════════════════════════════════════════════════════
/**
* Run PPR Power Iteration.
*
* @param {object[][]} columns - column-normalized transition matrix
* @param {Float64Array} s - personalization vector (sums to 1)
* @param {number[]} dangling - dangling node indices
* @param {number} N - node count
* @returns {{ pi: Float64Array, iterations: number, finalError: number }}
*/
function powerIteration(columns, s, dangling, N) {
const alpha = CONFIG.ALPHA;
const d = 1 - alpha; // damping factor = prob of following edges
const epsilon = CONFIG.EPSILON;
const maxIter = CONFIG.MAX_ITER;
// Initialize π to personalization vector
let pi = new Float64Array(N);
for (let i = 0; i < N; i++) pi[i] = s[i];
let iterations = 0;
let finalError = 0;
for (let iter = 0; iter < maxIter; iter++) {
const piNew = new Float64Array(N);
// Dangling mass: probability at nodes with no outgoing edges
// redistributed to personalization vector (Langville & Meyer 2005)
let danglingSum = 0;
for (let k = 0; k < dangling.length; k++) {
danglingSum += pi[dangling[k]];
}
// Sparse matrix-vector product: (1α) · W · π
for (let j = 0; j < N; j++) {
const pj = pi[j];
if (pj === 0) continue;
const col = columns[j];
const dpj = d * pj;
for (let e = 0; e < col.length; e++) {
piNew[col[e].target] += dpj * col[e].prob;
}
}
// Restart + dangling contribution:
// α · s[i] + (1α) · danglingSum · s[i]
const restartCoeff = alpha + d * danglingSum;
for (let i = 0; i < N; i++) {
piNew[i] += restartCoeff * s[i];
}
// L1 convergence check
let l1 = 0;
for (let i = 0; i < N; i++) {
l1 += Math.abs(piNew[i] - pi[i]);
}
pi = piNew;
iterations = iter + 1;
finalError = l1;
if (l1 < epsilon) break;
}
return { pi, iterations, finalError };
}
// ═══════════════════════════════════════════════════════════════════════════
// Post-verification: Dense Cosine Gate
//
// PPR measures graph-structural relevance ("same characters").
// Cosine gate measures semantic relevance ("related to current topic").
// Product combination ensures both dimensions are satisfied
// (CombMNZ — Fox & Shaw, TREC-2 1994).
// ═══════════════════════════════════════════════════════════════════════════
/**
* Filter PPR-activated nodes by semantic relevance.
*
* For each non-seed node with PPR > 0:
* 1. cosine(queryVector, stateVector) ≥ COSINE_GATE
* 2. finalScore = PPR_normalized × cosine ≥ SCORE_FLOOR
* 3. Top DIFFUSION_CAP by finalScore
*
* @param {Float64Array} pi - PPR stationary distribution
* @param {string[]} atomIds - index → atomId
* @param {Map<string, object>} atomById - atomId → atom object
* @param {Set<string>} seedAtomIds - seed atomIds (excluded from output)
* @param {Map<string, Float32Array>} vectorMap - atomId → embedding vector
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
* @returns {{ diffused: object[], gateStats: object }}
*/
function postVerify(pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector) {
const N = atomIds.length;
const gateStats = { passed: 0, filtered: 0, noVector: 0 };
// Find max PPR score among non-seed nodes (for normalization)
let maxPPR = 0;
for (let i = 0; i < N; i++) {
if (pi[i] > 0 && !seedAtomIds.has(atomIds[i])) {
if (pi[i] > maxPPR) maxPPR = pi[i];
}
}
if (maxPPR <= 0) {
return { diffused: [], gateStats };
}
const candidates = [];
for (let i = 0; i < N; i++) {
const atomId = atomIds[i];
// Skip seeds and zero-probability nodes
if (seedAtomIds.has(atomId)) continue;
if (pi[i] <= 0) continue;
// Require state vector for cosine verification
const vec = vectorMap.get(atomId);
if (!vec?.length) {
gateStats.noVector++;
continue;
}
// Cosine gate
const cos = cosineSimilarity(queryVector, vec);
if (cos < CONFIG.COSINE_GATE) {
gateStats.filtered++;
continue;
}
// Final score = PPR_normalized × cosine
const pprNorm = pi[i] / maxPPR;
const finalScore = pprNorm * cos;
if (finalScore < CONFIG.SCORE_FLOOR) {
gateStats.filtered++;
continue;
}
gateStats.passed++;
const atom = atomById.get(atomId);
if (!atom) continue;
candidates.push({
atomId,
floor: atom.floor,
atom,
finalScore,
pprScore: pi[i],
pprNormalized: pprNorm,
cosine: cos,
});
}
// Sort by finalScore descending, cap at DIFFUSION_CAP
candidates.sort((a, b) => b.finalScore - a.finalScore);
const diffused = candidates.slice(0, CONFIG.DIFFUSION_CAP);
return { diffused, gateStats };
}
// ═══════════════════════════════════════════════════════════════════════════
// Main entry point
// ═══════════════════════════════════════════════════════════════════════════
/**
* Spread activation from seed L0 atoms through entity co-occurrence graph.
*
* Called from recall.js Stage 7.5, after locateAndPullEvidence and before
* Causation Trace. Results are merged into l0Selected and consumed by
* prompt.js through existing budget/formatting pipeline (zero downstream changes).
*
* @param {object[]} seeds - l0Selected from recall Stage 6
* Each: { atomId, rerankScore, similarity, atom, ... }
* @param {object[]} allAtoms - getStateAtoms() result
* Each: { atomId, floor, semantic, edges, where }
* @param {object[]} stateVectors - getAllStateVectors() result
* Each: { atomId, floor, vector: Float32Array, rVector?: Float32Array }
* @param {Float32Array|number[]} queryVector - R2 weighted query vector
* @param {object|null} metrics - metrics object (optional, mutated in-place)
* @returns {object[]} Additional L0 atoms for l0Selected
* Each: { atomId, floor, atom, finalScore, pprScore, pprNormalized, cosine }
*/
export function diffuseFromSeeds(seeds, allAtoms, stateVectors, queryVector, metrics) {
const T0 = performance.now();
// ─── Early exits ─────────────────────────────────────────────────
if (!seeds?.length || !allAtoms?.length || !queryVector?.length) {
fillMetricsEmpty(metrics);
return [];
}
// Align with entity-lexicon hard rule: exclude name1 from graph features.
const { name1 } = getContext();
const excludeEntities = new Set();
if (name1) excludeEntities.add(normalize(name1));
// ─── 1. Build atom index ─────────────────────────────────────────
const atomById = new Map();
const atomIds = [];
const idToIdx = new Map();
for (let i = 0; i < allAtoms.length; i++) {
const a = allAtoms[i];
atomById.set(a.atomId, a);
atomIds.push(a.atomId);
idToIdx.set(a.atomId, i);
}
const N = allAtoms.length;
// Validate seeds against atom index
const validSeeds = seeds.filter(s => idToIdx.has(s.atomId));
const seedAtomIds = new Set(validSeeds.map(s => s.atomId));
if (!validSeeds.length) {
fillMetricsEmpty(metrics);
return [];
}
// ─── 2. Build graph ──────────────────────────────────────────────
const graph = buildGraph(allAtoms, stateVectors, excludeEntities);
if (graph.edgeCount === 0) {
fillMetrics(metrics, {
seedCount: validSeeds.length,
graphNodes: N,
graphEdges: 0,
channelStats: graph.channelStats,
candidatePairs: graph.candidatePairs,
pairsFromWhat: graph.pairsFromWhat,
pairsFromRSem: graph.pairsFromRSem,
rSemAvgSim: graph.rSemAvgSim,
timeWindowFilteredPairs: graph.timeWindowFilteredPairs,
topKPrunedPairs: graph.topKPrunedPairs,
edgeDensity: graph.edgeDensity,
reweightWhoUsed: graph.reweightWhoUsed,
reweightWhereUsed: graph.reweightWhereUsed,
time: graph.buildTime,
});
xbLog.info(MODULE_ID, 'No graph edges — skipping diffusion');
return [];
}
// ─── 3. Build seed vector ────────────────────────────────────────
const s = buildSeedVector(validSeeds, idToIdx, N);
// ─── 4. Column normalize ─────────────────────────────────────────
const { columns, dangling } = columnNormalize(graph.neighbors, N);
// ─── 5. PPR Power Iteration ──────────────────────────────────────
const T_PPR = performance.now();
const { pi, iterations, finalError } = powerIteration(columns, s, dangling, N);
const pprTime = Math.round(performance.now() - T_PPR);
// Count activated non-seed nodes
let pprActivated = 0;
for (let i = 0; i < N; i++) {
if (pi[i] > 0 && !seedAtomIds.has(atomIds[i])) pprActivated++;
}
// ─── 6. Post-verification ────────────────────────────────────────
const vectorMap = new Map();
for (const sv of (stateVectors || [])) {
vectorMap.set(sv.atomId, sv.vector);
}
const { diffused, gateStats } = postVerify(
pi, atomIds, atomById, seedAtomIds, vectorMap, queryVector
);
// ─── 7. Metrics ──────────────────────────────────────────────────
const totalTime = Math.round(performance.now() - T0);
fillMetrics(metrics, {
seedCount: validSeeds.length,
graphNodes: N,
graphEdges: graph.edgeCount,
channelStats: graph.channelStats,
candidatePairs: graph.candidatePairs,
pairsFromWhat: graph.pairsFromWhat,
pairsFromRSem: graph.pairsFromRSem,
rSemAvgSim: graph.rSemAvgSim,
timeWindowFilteredPairs: graph.timeWindowFilteredPairs,
topKPrunedPairs: graph.topKPrunedPairs,
edgeDensity: graph.edgeDensity,
reweightWhoUsed: graph.reweightWhoUsed,
reweightWhereUsed: graph.reweightWhereUsed,
buildTime: graph.buildTime,
iterations,
convergenceError: finalError,
pprActivated,
cosineGatePassed: gateStats.passed,
cosineGateFiltered: gateStats.filtered,
cosineGateNoVector: gateStats.noVector,
postGatePassRate: pprActivated > 0
? Math.round((gateStats.passed / pprActivated) * 100)
: 0,
finalCount: diffused.length,
scoreDistribution: diffused.length > 0
? calcScoreStats(diffused.map(d => d.finalScore))
: { min: 0, max: 0, mean: 0 },
time: totalTime,
});
xbLog.info(MODULE_ID,
`Diffusion: ${validSeeds.length} seeds → ` +
`graph(${N}n/${graph.edgeCount}e) → ` +
`PPR(${iterations}it, ε=${finalError.toExponential(1)}, ${pprTime}ms) → ` +
`${pprActivated} activated → ` +
`gate(${gateStats.passed}\u2713/${gateStats.filtered}\u2717` +
`${gateStats.noVector ? `/${gateStats.noVector}?` : ''}) → ` +
`${diffused.length} final (${totalTime}ms)`
);
return diffused;
}
// ═══════════════════════════════════════════════════════════════════════════
// Metrics helpers
// ═══════════════════════════════════════════════════════════════════════════
/**
* Compute min/max/mean distribution
* @param {number[]} scores
* @returns {{ min: number, max: number, mean: number }}
*/
function calcScoreStats(scores) {
if (!scores.length) return { min: 0, max: 0, mean: 0 };
const sorted = [...scores].sort((a, b) => a - b);
const sum = sorted.reduce((a, b) => a + b, 0);
return {
min: Number(sorted[0].toFixed(3)),
max: Number(sorted[sorted.length - 1].toFixed(3)),
mean: Number((sum / sorted.length).toFixed(3)),
};
}
/**
* Fill metrics with empty diffusion block
*/
function fillMetricsEmpty(metrics) {
if (!metrics) return;
metrics.diffusion = {
seedCount: 0,
graphNodes: 0,
graphEdges: 0,
iterations: 0,
convergenceError: 0,
pprActivated: 0,
cosineGatePassed: 0,
cosineGateFiltered: 0,
cosineGateNoVector: 0,
finalCount: 0,
scoreDistribution: { min: 0, max: 0, mean: 0 },
byChannel: { what: 0, where: 0, rSem: 0, who: 0 },
candidatePairs: 0,
pairsFromWhat: 0,
pairsFromRSem: 0,
rSemAvgSim: 0,
timeWindowFilteredPairs: 0,
topKPrunedPairs: 0,
edgeDensity: 0,
reweightWhoUsed: 0,
reweightWhereUsed: 0,
postGatePassRate: 0,
time: 0,
};
}
/**
* Fill metrics with diffusion results
*/
function fillMetrics(metrics, data) {
if (!metrics) return;
metrics.diffusion = {
seedCount: data.seedCount || 0,
graphNodes: data.graphNodes || 0,
graphEdges: data.graphEdges || 0,
iterations: data.iterations || 0,
convergenceError: data.convergenceError || 0,
pprActivated: data.pprActivated || 0,
cosineGatePassed: data.cosineGatePassed || 0,
cosineGateFiltered: data.cosineGateFiltered || 0,
cosineGateNoVector: data.cosineGateNoVector || 0,
postGatePassRate: data.postGatePassRate || 0,
finalCount: data.finalCount || 0,
scoreDistribution: data.scoreDistribution || { min: 0, max: 0, mean: 0 },
byChannel: data.channelStats || { what: 0, where: 0, rSem: 0, who: 0 },
candidatePairs: data.candidatePairs || 0,
pairsFromWhat: data.pairsFromWhat || 0,
pairsFromRSem: data.pairsFromRSem || 0,
rSemAvgSim: data.rSemAvgSim || 0,
timeWindowFilteredPairs: data.timeWindowFilteredPairs || 0,
topKPrunedPairs: data.topKPrunedPairs || 0,
edgeDensity: data.edgeDensity || 0,
reweightWhoUsed: data.reweightWhoUsed || 0,
reweightWhereUsed: data.reweightWhereUsed || 0,
time: data.time || 0,
};
}

View File

@@ -0,0 +1,221 @@
// ═══════════════════════════════════════════════════════════════════════════
// entity-lexicon.js - 实体词典(确定性,无 LLM
//
// 职责:
// 1. 从已有结构化存储构建可信实体词典
// 2. 从文本中提取命中的实体
//
// 硬约束name1 永不进入词典
// ═══════════════════════════════════════════════════════════════════════════
import { getStateAtoms } from '../storage/state-store.js';
// 人名词典黑名单:代词、标签词、明显非人物词
const PERSON_LEXICON_BLACKLIST = new Set([
'我', '你', '他', '她', '它', '我们', '你们', '他们', '她们', '它们',
'自己', '对方', '用户', '助手', 'user', 'assistant',
'男人', '女性', '成熟女性', '主人', '主角',
'龟头', '子宫', '阴道', '阴茎',
'电脑', '电脑屏幕', '手机', '监控画面', '摄像头', '阳光', '折叠床', '书房', '卫生间隔间',
]);
/**
* 标准化字符串(用于实体匹配)
* @param {string} s
* @returns {string}
*/
function normalize(s) {
return String(s || '')
.normalize('NFKC')
.replace(/[\u200B-\u200D\uFEFF]/g, '')
.trim()
.toLowerCase();
}
function isBlacklistedPersonTerm(raw) {
return PERSON_LEXICON_BLACKLIST.has(normalize(raw));
}
function addPersonTerm(set, raw) {
const n = normalize(raw);
if (!n || n.length < 2) return;
if (isBlacklistedPersonTerm(n)) return;
set.add(n);
}
function collectTrustedCharacters(store, context) {
const trusted = new Set();
const main = store?.json?.characters?.main || [];
for (const m of main) {
addPersonTerm(trusted, typeof m === 'string' ? m : m.name);
}
const arcs = store?.json?.arcs || [];
for (const a of arcs) {
addPersonTerm(trusted, a.name);
}
if (context?.name2) {
addPersonTerm(trusted, context.name2);
}
const events = store?.json?.events || [];
for (const ev of events) {
for (const p of (ev?.participants || [])) {
addPersonTerm(trusted, p);
}
}
if (context?.name1) {
trusted.delete(normalize(context.name1));
}
return trusted;
}
/**
* Build trusted character pool only (without scanning L0 candidate atoms).
* trustedCharacters: main/arcs/name2/L2 participants, excludes name1.
*
* @param {object} store
* @param {object} context
* @returns {Set<string>}
*/
export function buildTrustedCharacters(store, context) {
return collectTrustedCharacters(store, context);
}
function collectCandidateCharactersFromL0(context) {
const candidate = new Set();
const atoms = getStateAtoms();
for (const atom of atoms) {
for (const e of (atom.edges || [])) {
addPersonTerm(candidate, e?.s);
addPersonTerm(candidate, e?.t);
}
}
if (context?.name1) {
candidate.delete(normalize(context.name1));
}
return candidate;
}
/**
* Build character pools with trust tiers.
* trustedCharacters: main/arcs/name2/L2 participants (clean source)
* candidateCharacters: L0 edges.s/t (blacklist-cleaned)
*/
export function buildCharacterPools(store, context) {
const trustedCharacters = collectTrustedCharacters(store, context);
const candidateCharacters = collectCandidateCharactersFromL0(context);
const allCharacters = new Set([...trustedCharacters, ...candidateCharacters]);
return { trustedCharacters, candidateCharacters, allCharacters };
}
/**
* 构建实体词典
*
* 来源(按可信度):
* 1. store.json.characters.main — 已确认主要角色
* 2. store.json.arcs[].name — 弧光对象
* 3. context.name2 — 当前角色
* 4. store.json.events[].participants — L2 事件参与者
* 5. L0 atoms edges.s/edges.t
*
* 硬约束:永远排除 normalize(context.name1)
*
* @param {object} store - getSummaryStore() 返回值
* @param {object} context - { name1: string, name2: string }
* @returns {Set<string>} 标准化后的实体集合
*/
export function buildEntityLexicon(store, context) {
return buildCharacterPools(store, context).allCharacters;
}
/**
* 构建"原词形 → 标准化"映射表
* 用于从 lexicon 反查原始显示名
*
* @param {object} store
* @param {object} context
* @returns {Map<string, string>} normalize(name) → 原词形
*/
export function buildDisplayNameMap(store, context) {
const map = new Map();
const register = (raw) => {
const n = normalize(raw);
if (!n || n.length < 2) return;
if (isBlacklistedPersonTerm(n)) return;
if (!map.has(n)) {
map.set(n, String(raw).trim());
}
};
const main = store?.json?.characters?.main || [];
for (const m of main) {
register(typeof m === 'string' ? m : m.name);
}
const arcs = store?.json?.arcs || [];
for (const a of arcs) {
register(a.name);
}
if (context?.name2) register(context.name2);
// 4. L2 events 参与者
const events = store?.json?.events || [];
for (const ev of events) {
for (const p of (ev?.participants || [])) {
register(p);
}
}
// 5. L0 atoms 的 edges.s/edges.t
const atoms = getStateAtoms();
for (const atom of atoms) {
for (const e of (atom.edges || [])) {
register(e?.s);
register(e?.t);
}
}
// ★ 硬约束:删除 name1
if (context?.name1) {
map.delete(normalize(context.name1));
}
return map;
}
/**
* 从文本中提取命中的实体
*
* 逻辑:遍历词典,检查文本中是否包含(不区分大小写)
* 返回命中的实体原词形(去重)
*
* @param {string} text - 清洗后的文本
* @param {Set<string>} lexicon - 标准化后的实体集合
* @param {Map<string, string>} displayMap - normalize → 原词形
* @returns {string[]} 命中的实体(原词形)
*/
export function extractEntitiesFromText(text, lexicon, displayMap) {
if (!text || !lexicon?.size) return [];
const textNorm = normalize(text);
const hits = [];
const seen = new Set();
for (const entity of lexicon) {
if (textNorm.includes(entity) && !seen.has(entity)) {
seen.add(entity);
// 优先返回原词形
const display = displayMap?.get(entity) || entity;
hits.push(display);
}
}
return hits;
}

View File

@@ -0,0 +1,541 @@
// ═══════════════════════════════════════════════════════════════════════════
// lexical-index.js - MiniSearch 词法检索索引
//
// 职责:
// 1. 对 L0 atoms + L1 chunks + L2 events 建立词法索引
// 2. 提供词法检索接口(专名精确匹配兜底)
// 3. 惰性构建 + 异步预热 + 缓存失效机制
//
// 索引存储:纯内存(不持久化)
// 分词器:统一使用 tokenizer.js结巴 + 实体保护 + 降级)
// 重建时机CHAT_CHANGED / L0提取完成 / L2总结完成
// ═══════════════════════════════════════════════════════════════════════════
import MiniSearch from '../../../../libs/minisearch.mjs';
import { getContext } from '../../../../../../../extensions.js';
import { getSummaryStore } from '../../data/store.js';
import { getAllChunks } from '../storage/chunk-store.js';
import { xbLog } from '../../../../core/debug-core.js';
import { tokenizeForIndex } from '../utils/tokenizer.js';
const MODULE_ID = 'lexical-index';
// ─────────────────────────────────────────────────────────────────────────
// 缓存
// ─────────────────────────────────────────────────────────────────────────
/** @type {MiniSearch|null} */
let cachedIndex = null;
/** @type {string|null} */
let cachedChatId = null;
/** @type {string|null} 数据指纹atoms + chunks + events 数量) */
let cachedFingerprint = null;
/** @type {boolean} 是否正在构建 */
let building = false;
/** @type {Promise<MiniSearch|null>|null} 当前构建 Promise防重入 */
let buildPromise = null;
/** @type {Map<number, string[]>} floor → 该楼层的 doc IDs仅 L1 chunks */
let floorDocIds = new Map();
// ─────────────────────────────────────────────────────────────────────────
// 工具函数
// ─────────────────────────────────────────────────────────────────────────
/**
* 清理事件摘要(移除楼层标记)
* @param {string} summary
* @returns {string}
*/
function cleanSummary(summary) {
return String(summary || '')
.replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '')
.trim();
}
/**
* 计算缓存指纹
* @param {number} chunkCount
* @param {number} eventCount
* @returns {string}
*/
function computeFingerprint(chunkCount, eventCount) {
return `${chunkCount}:${eventCount}`;
}
/**
* 让出主线程(避免长时间阻塞 UI
* @returns {Promise<void>}
*/
function yieldToMain() {
return new Promise(resolve => setTimeout(resolve, 0));
}
// ─────────────────────────────────────────────────────────────────────────
// 文档收集
// ─────────────────────────────────────────────────────────────────────────
/**
* 收集所有待索引文档
*
* @param {object[]} chunks - getAllChunks(chatId) 返回值
* @param {object[]} events - store.json.events
* @returns {object[]} 文档数组
*/
function collectDocuments(chunks, events) {
const docs = [];
// L1 chunks + 填充 floorDocIds
for (const chunk of (chunks || [])) {
if (!chunk?.chunkId || !chunk.text) continue;
const floor = chunk.floor ?? -1;
docs.push({
id: chunk.chunkId,
type: 'chunk',
floor,
text: chunk.text,
});
if (floor >= 0) {
if (!floorDocIds.has(floor)) {
floorDocIds.set(floor, []);
}
floorDocIds.get(floor).push(chunk.chunkId);
}
}
// L2 events
for (const ev of (events || [])) {
if (!ev?.id) continue;
const parts = [];
if (ev.title) parts.push(ev.title);
if (ev.participants?.length) parts.push(ev.participants.join(' '));
const summary = cleanSummary(ev.summary);
if (summary) parts.push(summary);
const text = parts.join(' ').trim();
if (!text) continue;
docs.push({
id: ev.id,
type: 'event',
floor: null,
text,
});
}
return docs;
}
// ─────────────────────────────────────────────────────────────────────────
// 索引构建(分片,不阻塞主线程)
// ─────────────────────────────────────────────────────────────────────────
/** 每批添加的文档数 */
const BUILD_BATCH_SIZE = 500;
/**
* 构建 MiniSearch 索引(分片异步)
*
* @param {object[]} docs - 文档数组
* @returns {Promise<MiniSearch>}
*/
async function buildIndexAsync(docs) {
const T0 = performance.now();
const index = new MiniSearch({
fields: ['text'],
storeFields: ['type', 'floor'],
idField: 'id',
searchOptions: {
boost: { text: 1 },
fuzzy: 0.2,
prefix: true,
},
tokenize: tokenizeForIndex,
});
if (!docs.length) {
return index;
}
// 分片添加,每批 BUILD_BATCH_SIZE 条后让出主线程
for (let i = 0; i < docs.length; i += BUILD_BATCH_SIZE) {
const batch = docs.slice(i, i + BUILD_BATCH_SIZE);
index.addAll(batch);
// 非最后一批时让出主线程
if (i + BUILD_BATCH_SIZE < docs.length) {
await yieldToMain();
}
}
const elapsed = Math.round(performance.now() - T0);
xbLog.info(MODULE_ID,
`索引构建完成: ${docs.length} 文档 (${elapsed}ms)`
);
return index;
}
// ─────────────────────────────────────────────────────────────────────────
// 检索
// ─────────────────────────────────────────────────────────────────────────
/**
* @typedef {object} LexicalSearchResult
* @property {string[]} atomIds - 命中的 L0 atom IDs
* @property {Set<number>} atomFloors - 命中的 L0 楼层集合
* @property {string[]} chunkIds - 命中的 L1 chunk IDs
* @property {Set<number>} chunkFloors - 命中的 L1 楼层集合
* @property {string[]} eventIds - 命中的 L2 event IDs
* @property {object[]} chunkScores - chunk 命中详情 [{ chunkId, score }]
* @property {number} searchTime - 检索耗时 ms
*/
/**
* 在词法索引中检索
*
* @param {MiniSearch} index - 索引实例
* @param {string[]} terms - 查询词列表
* @returns {LexicalSearchResult}
*/
export function searchLexicalIndex(index, terms) {
const T0 = performance.now();
const result = {
atomIds: [],
atomFloors: new Set(),
chunkIds: [],
chunkFloors: new Set(),
eventIds: [],
chunkScores: [],
searchTime: 0,
};
if (!index || !terms?.length) {
result.searchTime = Math.round(performance.now() - T0);
return result;
}
// 用所有 terms 联合查询
const queryString = terms.join(' ');
let hits;
try {
hits = index.search(queryString, {
boost: { text: 1 },
fuzzy: 0.2,
prefix: true,
combineWith: 'OR',
// 使用与索引相同的分词器
tokenize: tokenizeForIndex,
});
} catch (e) {
xbLog.warn(MODULE_ID, '检索失败', e);
result.searchTime = Math.round(performance.now() - T0);
return result;
}
// 分类结果
const chunkIdSet = new Set();
const eventIdSet = new Set();
for (const hit of hits) {
const type = hit.type;
const id = hit.id;
const floor = hit.floor;
switch (type) {
case 'chunk':
if (!chunkIdSet.has(id)) {
chunkIdSet.add(id);
result.chunkIds.push(id);
result.chunkScores.push({ chunkId: id, score: hit.score });
if (typeof floor === 'number' && floor >= 0) {
result.chunkFloors.add(floor);
}
}
break;
case 'event':
if (!eventIdSet.has(id)) {
eventIdSet.add(id);
result.eventIds.push(id);
}
break;
}
}
result.searchTime = Math.round(performance.now() - T0);
xbLog.info(MODULE_ID,
`检索完成: terms=[${terms.slice(0, 5).join(',')}] → atoms=${result.atomIds.length} chunks=${result.chunkIds.length} events=${result.eventIds.length} (${result.searchTime}ms)`
);
return result;
}
// ─────────────────────────────────────────────────────────────────────────
// 内部构建流程(收集数据 + 构建索引)
// ─────────────────────────────────────────────────────────────────────────
/**
* 收集数据并构建索引
*
* @param {string} chatId
* @returns {Promise<{index: MiniSearch, fingerprint: string}>}
*/
async function collectAndBuild(chatId) {
// 清空侧索引(全量重建)
floorDocIds = new Map();
// 收集数据(不含 L0 atoms
const store = getSummaryStore();
const events = store?.json?.events || [];
let chunks = [];
try {
chunks = await getAllChunks(chatId);
} catch (e) {
xbLog.warn(MODULE_ID, '获取 chunks 失败', e);
}
const fp = computeFingerprint(chunks.length, events.length);
// 检查是否在收集过程中缓存已被其他调用更新
if (cachedIndex && cachedChatId === chatId && cachedFingerprint === fp) {
return { index: cachedIndex, fingerprint: fp };
}
// 收集文档(同时填充 floorDocIds
const docs = collectDocuments(chunks, events);
// 异步分片构建
const index = await buildIndexAsync(docs);
return { index, fingerprint: fp };
}
// ─────────────────────────────────────────────────────────────────────────
// 公开接口getLexicalIndex惰性获取
// ─────────────────────────────────────────────────────────────────────────
/**
* 获取词法索引(惰性构建 + 缓存)
*
* 如果缓存有效则直接返回;否则自动构建。
* 如果正在构建中,等待构建完成。
*
* @returns {Promise<MiniSearch|null>}
*/
export async function getLexicalIndex() {
const { chatId } = getContext();
if (!chatId) return null;
// 快速路径:如果缓存存在且 chatId 未变,则直接命中
// 指纹校验放到构建流程中完成,避免为指纹而额外读一次 IndexedDB
if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {
return cachedIndex;
}
// 正在构建中,等待结果
if (building && buildPromise) {
try {
await buildPromise;
if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {
return cachedIndex;
}
} catch {
// 构建失败,继续往下重建
}
}
// 需要重建(指纹将在 collectAndBuild 内部计算并写入缓存)
xbLog.info(MODULE_ID, `缓存失效,重建索引 (chatId=${chatId.slice(0, 8)})`);
building = true;
buildPromise = collectAndBuild(chatId);
try {
const { index, fingerprint } = await buildPromise;
// 原子替换缓存
cachedIndex = index;
cachedChatId = chatId;
cachedFingerprint = fingerprint;
return index;
} catch (e) {
xbLog.error(MODULE_ID, '索引构建失败', e);
return null;
} finally {
building = false;
buildPromise = null;
}
}
// ─────────────────────────────────────────────────────────────────────────
// 公开接口warmupIndex异步预建
// ─────────────────────────────────────────────────────────────────────────
/**
* 异步预建索引
*
* 在 CHAT_CHANGED 时调用,后台构建索引。
* 不阻塞调用方,不返回结果。
* 构建完成后缓存自动更新,后续 getLexicalIndex() 直接命中。
*
* 调用时机:
* - handleChatChanged实体注入后
* - L0 提取完成
* - L2 总结完成
*/
export function warmupIndex() {
const { chatId } = getContext();
if (!chatId) return;
// 已在构建中,不重复触发
if (building) return;
// fire-and-forget
getLexicalIndex().catch(e => {
xbLog.warn(MODULE_ID, '预热索引失败', e);
});
}
// ─────────────────────────────────────────────────────────────────────────
// 公开接口invalidateLexicalIndex缓存失效
// ─────────────────────────────────────────────────────────────────────────
/**
* 使缓存失效(下次 getLexicalIndex / warmupIndex 时自动重建)
*
* 调用时机:
* - CHAT_CHANGED
* - L0 提取完成
* - L2 总结完成
*/
export function invalidateLexicalIndex() {
if (cachedIndex) {
xbLog.info(MODULE_ID, '索引缓存已失效');
}
cachedIndex = null;
cachedChatId = null;
cachedFingerprint = null;
floorDocIds = new Map();
}
// ─────────────────────────────────────────────────────────────────────────
// 增量更新接口
// ─────────────────────────────────────────────────────────────────────────
/**
* 为指定楼层添加 L1 chunks 到索引
*
* 先移除该楼层旧文档,再添加新文档。
* 如果索引不存在(缓存失效),静默跳过(下次 getLexicalIndex 全量重建)。
*
* @param {number} floor - 楼层号
* @param {object[]} chunks - chunk 对象列表(需有 chunkId、text、floor
*/
export function addDocumentsForFloor(floor, chunks) {
if (!cachedIndex || !chunks?.length) return;
// 先移除旧文档
removeDocumentsByFloor(floor);
const docs = [];
const docIds = [];
for (const chunk of chunks) {
if (!chunk?.chunkId || !chunk.text) continue;
docs.push({
id: chunk.chunkId,
type: 'chunk',
floor: chunk.floor ?? floor,
text: chunk.text,
});
docIds.push(chunk.chunkId);
}
if (docs.length > 0) {
cachedIndex.addAll(docs);
floorDocIds.set(floor, docIds);
xbLog.info(MODULE_ID, `增量添加: floor ${floor}, ${docs.length} 个 chunk`);
}
}
/**
* 从索引中移除指定楼层的所有 L1 chunk 文档
*
* 使用 MiniSearch discard()(软删除)。
* 如果索引不存在,静默跳过。
*
* @param {number} floor - 楼层号
*/
export function removeDocumentsByFloor(floor) {
if (!cachedIndex) return;
const docIds = floorDocIds.get(floor);
if (!docIds?.length) return;
for (const id of docIds) {
try {
cachedIndex.discard(id);
} catch {
// 文档可能不存在(已被全量重建替换)
}
}
floorDocIds.delete(floor);
xbLog.info(MODULE_ID, `增量移除: floor ${floor}, ${docIds.length} 个文档`);
}
/**
* 将新 L2 事件添加到索引
*
* 如果事件 ID 已存在,先 discard 再 add覆盖
* 如果索引不存在,静默跳过。
*
* @param {object[]} events - 事件对象列表(需有 id、title、summary 等)
*/
export function addEventDocuments(events) {
if (!cachedIndex || !events?.length) return;
const docs = [];
for (const ev of events) {
if (!ev?.id) continue;
const parts = [];
if (ev.title) parts.push(ev.title);
if (ev.participants?.length) parts.push(ev.participants.join(' '));
const summary = cleanSummary(ev.summary);
if (summary) parts.push(summary);
const text = parts.join(' ').trim();
if (!text) continue;
// 覆盖:先尝试移除旧的
try {
cachedIndex.discard(ev.id);
} catch {
// 不存在则忽略
}
docs.push({
id: ev.id,
type: 'event',
floor: null,
text,
});
}
if (docs.length > 0) {
cachedIndex.addAll(docs);
xbLog.info(MODULE_ID, `增量添加: ${docs.length} 个事件`);
}
}

View File

@@ -0,0 +1,685 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector (v6 - Dense-Gated Lexical)
//
// v5 → v6 变更:
// - lexical: 新增 eventFilteredByDense / floorFilteredByDense
// - event: entityFilter bypass 阈值改为 CONFIG 驱动0.80
// - 其余结构不变
//
// v4 → v5 变更:
// - query: 新增 segmentWeights / r2Weights加权向量诊断
// - fusion: 新增 denseAggMethod / lexDensityBonus聚合策略可观测
// - quality: 新增 rerankRetentionRate粗排-精排一致性)
// - 移除 timing 中从未写入的死字段queryBuild/queryRefine/lexicalSearch/fusion
// - 移除从未写入的 arc 区块
// ═══════════════════════════════════════════════════════════════════════════
/**
* 创建空的指标对象
* @returns {object}
*/
export function createMetrics() {
return {
// Query Build - 查询构建
query: {
buildTime: 0,
refineTime: 0,
lengths: {
v0Chars: 0,
v1Chars: null, // null = 无 hints
rerankChars: 0,
},
segmentWeights: [], // R1 归一化后权重 [context..., focus]
r2Weights: null, // R2 归一化后权重 [context..., focus, hints]null = 无 hints
},
// Anchor (L0 StateAtoms) - 语义锚点
anchor: {
needRecall: false,
focusTerms: [],
focusCharacters: [],
focusEntities: [],
matched: 0,
floorsHit: 0,
topHits: [],
},
// Lexical (MiniSearch) - 词法检索
lexical: {
terms: [],
atomHits: 0,
chunkHits: 0,
eventHits: 0,
searchTime: 0,
indexReadyTime: 0,
eventFilteredByDense: 0,
floorFilteredByDense: 0,
},
// Fusion (W-RRF, floor-level) - 多路融合
fusion: {
denseFloors: 0,
lexFloors: 0,
totalUnique: 0,
afterCap: 0,
time: 0,
denseAggMethod: '', // 聚合方法描述(如 "max×0.6+mean×0.4"
lexDensityBonus: 0, // 密度加成系数
},
// Constraint (L3 Facts) - 世界约束
constraint: {
total: 0,
filtered: 0,
injected: 0,
tokens: 0,
samples: [],
},
// Event (L2 Events) - 事件摘要
event: {
inStore: 0,
considered: 0,
selected: 0,
byRecallType: { direct: 0, related: 0, causal: 0, lexical: 0, l0Linked: 0 },
similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
entityFilter: null,
causalChainDepth: 0,
causalCount: 0,
entitiesUsed: 0,
focusTermsCount: 0,
entityNames: [],
},
// Evidence (Two-Stage: Floor rerank → L1 pull) - 原文证据
evidence: {
// Stage 1: Floor
floorCandidates: 0,
floorsSelected: 0,
l0Collected: 0,
rerankApplied: false,
rerankFailed: false,
beforeRerank: 0,
afterRerank: 0,
rerankTime: 0,
rerankScores: null,
rerankDocAvgLength: 0,
// Stage 2: L1
l1Pulled: 0,
l1Attached: 0,
l1CosineTime: 0,
// 装配
contextPairsAdded: 0,
tokens: 0,
assemblyTime: 0,
},
// Diffusion (PPR Spreading Activation) - 图扩散
diffusion: {
seedCount: 0,
graphNodes: 0,
graphEdges: 0,
candidatePairs: 0,
pairsFromWhat: 0,
pairsFromRSem: 0,
rSemAvgSim: 0,
timeWindowFilteredPairs: 0,
topKPrunedPairs: 0,
edgeDensity: 0,
reweightWhoUsed: 0,
reweightWhereUsed: 0,
iterations: 0,
convergenceError: 0,
pprActivated: 0,
cosineGatePassed: 0,
cosineGateFiltered: 0,
cosineGateNoVector: 0,
postGatePassRate: 0,
finalCount: 0,
scoreDistribution: { min: 0, max: 0, mean: 0 },
byChannel: { what: 0, where: 0, rSem: 0, who: 0 },
time: 0,
},
// Formatting - 格式化
formatting: {
sectionsIncluded: [],
time: 0,
},
// Budget Summary - 预算
budget: {
total: 0,
limit: 0,
utilization: 0,
breakdown: {
constraints: 0,
events: 0,
distantEvidence: 0,
recentEvidence: 0,
arcs: 0,
},
},
// Timing - 计时(仅包含实际写入的字段)
timing: {
anchorSearch: 0,
constraintFilter: 0,
eventRetrieval: 0,
evidenceRetrieval: 0,
evidenceRerank: 0,
evidenceAssembly: 0,
diffusion: 0,
formatting: 0,
total: 0,
},
// Quality Indicators - 质量指标
quality: {
constraintCoverage: 100,
eventPrecisionProxy: 0,
l1AttachRate: 0,
rerankRetentionRate: 0,
diffusionEffectiveRate: 0,
potentialIssues: [],
},
};
}
/**
* 计算相似度分布统计
* @param {number[]} similarities
* @returns {{min: number, max: number, mean: number, median: number}}
*/
export function calcSimilarityStats(similarities) {
if (!similarities?.length) {
return { min: 0, max: 0, mean: 0, median: 0 };
}
const sorted = [...similarities].sort((a, b) => a - b);
const sum = sorted.reduce((a, b) => a + b, 0);
return {
min: Number(sorted[0].toFixed(3)),
max: Number(sorted[sorted.length - 1].toFixed(3)),
mean: Number((sum / sorted.length).toFixed(3)),
median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)),
};
}
/**
* 格式化权重数组为紧凑字符串
* @param {number[]|null} weights
* @returns {string}
*/
function fmtWeights(weights) {
if (!weights?.length) return 'N/A';
return '[' + weights.map(w => (typeof w === 'number' ? w.toFixed(3) : String(w))).join(', ') + ']';
}
/**
* 格式化指标为可读日志
* @param {object} metrics
* @returns {string}
*/
export function formatMetricsLog(metrics) {
const m = metrics;
const lines = [];
lines.push('');
lines.push('════════════════════════════════════════');
lines.push(' Recall Metrics Report (v5) ');
lines.push('════════════════════════════════════════');
lines.push('');
// Query Length
lines.push('[Query Length] 查询长度');
lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`);
lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'N/A' : m.query.lengths.v1Chars}`);
lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`);
lines.push('');
// Query Build
lines.push('[Query] 查询构建');
lines.push(`├─ build_time: ${m.query.buildTime}ms`);
lines.push(`├─ refine_time: ${m.query.refineTime}ms`);
lines.push(`├─ r1_weights: ${fmtWeights(m.query.segmentWeights)}`);
if (m.query.r2Weights) {
lines.push(`└─ r2_weights: ${fmtWeights(m.query.r2Weights)}`);
} else {
lines.push(`└─ r2_weights: N/A (no hints)`);
}
lines.push('');
// Anchor (L0 StateAtoms)
lines.push('[Anchor] L0 StateAtoms - 语义锚点');
lines.push(`├─ need_recall: ${m.anchor.needRecall}`);
if (m.anchor.needRecall) {
lines.push(`├─ focus_terms: [${(m.anchor.focusTerms || m.anchor.focusEntities || []).join(', ')}]`);
lines.push(`├─ focus_characters: [${(m.anchor.focusCharacters || []).join(', ')}]`);
lines.push(`├─ matched: ${m.anchor.matched || 0}`);
lines.push(`└─ floors_hit: ${m.anchor.floorsHit || 0}`);
}
lines.push('');
// Lexical (MiniSearch)
lines.push('[Lexical] MiniSearch - 词法检索');
lines.push(`├─ terms: [${(m.lexical.terms || []).slice(0, 8).join(', ')}]`);
lines.push(`├─ atom_hits: ${m.lexical.atomHits}`);
lines.push(`├─ chunk_hits: ${m.lexical.chunkHits}`);
lines.push(`├─ event_hits: ${m.lexical.eventHits}`);
lines.push(`├─ search_time: ${m.lexical.searchTime}ms`);
if (m.lexical.indexReadyTime > 0) {
lines.push(`├─ index_ready_time: ${m.lexical.indexReadyTime}ms`);
}
if (m.lexical.eventFilteredByDense > 0) {
lines.push(`├─ event_filtered_by_dense: ${m.lexical.eventFilteredByDense}`);
}
if (m.lexical.floorFilteredByDense > 0) {
lines.push(`├─ floor_filtered_by_dense: ${m.lexical.floorFilteredByDense}`);
}
lines.push(`└─ dense_gate_threshold: 0.50`);
lines.push('');
// Fusion (W-RRF, floor-level)
lines.push('[Fusion] W-RRF (floor-level) - 多路融合');
lines.push(`├─ dense_floors: ${m.fusion.denseFloors}`);
lines.push(`├─ lex_floors: ${m.fusion.lexFloors}`);
if (m.fusion.lexDensityBonus > 0) {
lines.push(`│ └─ density_bonus: ${m.fusion.lexDensityBonus}`);
}
lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
lines.push(`└─ time: ${m.fusion.time}ms`);
lines.push('');
// Constraint (L3 Facts)
lines.push('[Constraint] L3 Facts - 世界约束');
lines.push(`├─ total: ${m.constraint.total}`);
lines.push(`├─ filtered: ${m.constraint.filtered || 0}`);
lines.push(`├─ injected: ${m.constraint.injected}`);
lines.push(`├─ tokens: ${m.constraint.tokens}`);
if (m.constraint.samples && m.constraint.samples.length > 0) {
lines.push(`└─ samples: "${m.constraint.samples.slice(0, 2).join('", "')}"`);
}
lines.push('');
// Event (L2 Events)
lines.push('[Event] L2 Events - 事件摘要');
lines.push(`├─ in_store: ${m.event.inStore}`);
lines.push(`├─ considered: ${m.event.considered}`);
if (m.event.entityFilter) {
const ef = m.event.entityFilter;
lines.push(`├─ entity_filter:`);
lines.push(`│ ├─ focus_characters: [${(ef.focusCharacters || ef.focusEntities || []).join(', ')}]`);
lines.push(`│ ├─ before: ${ef.before}`);
lines.push(`│ ├─ after: ${ef.after}`);
lines.push(`│ └─ filtered: ${ef.filtered}`);
}
lines.push(`├─ selected: ${m.event.selected}`);
lines.push(`├─ by_recall_type:`);
lines.push(`│ ├─ direct: ${m.event.byRecallType.direct}`);
lines.push(`│ ├─ related: ${m.event.byRecallType.related}`);
lines.push(`│ ├─ causal: ${m.event.byRecallType.causal}`);
if (m.event.byRecallType.l0Linked) {
lines.push(`│ ├─ lexical: ${m.event.byRecallType.lexical}`);
lines.push(`│ └─ l0_linked: ${m.event.byRecallType.l0Linked}`);
} else {
lines.push(`│ └─ lexical: ${m.event.byRecallType.lexical}`);
}
const sim = m.event.similarityDistribution;
if (sim && sim.max > 0) {
lines.push(`├─ similarity_distribution:`);
lines.push(`│ ├─ min: ${sim.min}`);
lines.push(`│ ├─ max: ${sim.max}`);
lines.push(`│ ├─ mean: ${sim.mean}`);
lines.push(`│ └─ median: ${sim.median}`);
}
lines.push(`├─ causal_chain: depth=${m.event.causalChainDepth}, count=${m.event.causalCount}`);
lines.push(`└─ focus_characters_used: ${m.event.entitiesUsed} [${(m.event.entityNames || []).join(', ')}], focus_terms_count=${m.event.focusTermsCount || 0}`);
lines.push('');
// Evidence (Two-Stage: Floor Rerank → L1 Pull)
lines.push('[Evidence] Two-Stage: Floor Rerank → L1 Pull');
lines.push(`├─ Stage 1 (Floor Rerank):`);
lines.push(`│ ├─ floor_candidates (post-fusion): ${m.evidence.floorCandidates}`);
if (m.evidence.rerankApplied) {
lines.push(`│ ├─ rerank_applied: true`);
if (m.evidence.rerankFailed) {
lines.push(`│ │ ⚠ rerank_failed: using fusion order`);
}
lines.push(`│ │ ├─ before: ${m.evidence.beforeRerank} floors`);
lines.push(`│ │ ├─ after: ${m.evidence.afterRerank} floors`);
lines.push(`│ │ └─ time: ${m.evidence.rerankTime}ms`);
if (m.evidence.rerankScores) {
const rs = m.evidence.rerankScores;
lines.push(`│ ├─ rerank_scores: min=${rs.min}, max=${rs.max}, mean=${rs.mean}`);
}
if (m.evidence.rerankDocAvgLength > 0) {
lines.push(`│ ├─ rerank_doc_avg_length: ${m.evidence.rerankDocAvgLength} chars`);
}
} else {
lines.push(`│ ├─ rerank_applied: false`);
}
lines.push(`│ ├─ floors_selected: ${m.evidence.floorsSelected}`);
lines.push(`│ └─ l0_atoms_collected: ${m.evidence.l0Collected}`);
lines.push(`├─ Stage 2 (L1):`);
lines.push(`│ ├─ pulled: ${m.evidence.l1Pulled}`);
lines.push(`│ ├─ attached: ${m.evidence.l1Attached}`);
lines.push(`│ └─ cosine_time: ${m.evidence.l1CosineTime}ms`);
lines.push(`├─ tokens: ${m.evidence.tokens}`);
lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`);
lines.push('');
// Diffusion (PPR)
lines.push('[Diffusion] PPR Spreading Activation');
lines.push(`├─ seeds: ${m.diffusion.seedCount}`);
lines.push(`├─ graph: ${m.diffusion.graphNodes} nodes, ${m.diffusion.graphEdges} edges`);
lines.push(`├─ candidate_pairs: ${m.diffusion.candidatePairs || 0} (what=${m.diffusion.pairsFromWhat || 0}, r_sem=${m.diffusion.pairsFromRSem || 0})`);
lines.push(`├─ r_sem_avg_sim: ${m.diffusion.rSemAvgSim || 0}`);
lines.push(`├─ pair_filters: time_window=${m.diffusion.timeWindowFilteredPairs || 0}, topk_pruned=${m.diffusion.topKPrunedPairs || 0}`);
lines.push(`├─ edge_density: ${m.diffusion.edgeDensity || 0}%`);
if (m.diffusion.graphEdges > 0) {
const ch = m.diffusion.byChannel || {};
lines.push(`│ ├─ by_channel: what=${ch.what || 0}, r_sem=${ch.rSem || 0}, who=${ch.who || 0}, where=${ch.where || 0}`);
lines.push(`│ └─ reweight_used: who=${m.diffusion.reweightWhoUsed || 0}, where=${m.diffusion.reweightWhereUsed || 0}`);
}
if (m.diffusion.iterations > 0) {
lines.push(`├─ ppr: ${m.diffusion.iterations} iterations, ε=${Number(m.diffusion.convergenceError).toExponential(1)}`);
}
lines.push(`├─ activated (excl seeds): ${m.diffusion.pprActivated}`);
if (m.diffusion.pprActivated > 0) {
lines.push(`├─ cosine_gate: ${m.diffusion.cosineGatePassed} passed, ${m.diffusion.cosineGateFiltered} filtered`);
const passPrefix = m.diffusion.cosineGateNoVector > 0 ? '│ ├─' : '│ └─';
lines.push(`${passPrefix} pass_rate: ${m.diffusion.postGatePassRate || 0}%`);
if (m.diffusion.cosineGateNoVector > 0) {
lines.push(`│ ├─ no_vector: ${m.diffusion.cosineGateNoVector}`);
}
}
lines.push(`├─ final_injected: ${m.diffusion.finalCount}`);
if (m.diffusion.finalCount > 0) {
const ds = m.diffusion.scoreDistribution;
lines.push(`├─ scores: min=${ds.min}, max=${ds.max}, mean=${ds.mean}`);
}
lines.push(`└─ time: ${m.diffusion.time}ms`);
lines.push('');
// Formatting
lines.push('[Formatting] 格式化');
lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`);
lines.push(`└─ time: ${m.formatting.time}ms`);
lines.push('');
// Budget Summary
lines.push('[Budget] 预算');
lines.push(`├─ total_tokens: ${m.budget.total}`);
lines.push(`├─ limit: ${m.budget.limit}`);
lines.push(`├─ utilization: ${m.budget.utilization}%`);
lines.push(`└─ breakdown:`);
const bd = m.budget.breakdown || {};
lines.push(` ├─ constraints: ${bd.constraints || 0}`);
lines.push(` ├─ events: ${bd.events || 0}`);
lines.push(` ├─ distant_evidence: ${bd.distantEvidence || 0}`);
lines.push(` ├─ recent_evidence: ${bd.recentEvidence || 0}`);
lines.push(` └─ arcs: ${bd.arcs || 0}`);
lines.push('');
// Timing
lines.push('[Timing] 计时');
lines.push(`├─ query_build: ${m.query.buildTime}ms`);
lines.push(`├─ query_refine: ${m.query.refineTime}ms`);
lines.push(`├─ anchor_search: ${m.timing.anchorSearch}ms`);
const lexicalTotal = (m.lexical.searchTime || 0) + (m.lexical.indexReadyTime || 0);
lines.push(`├─ lexical_search: ${lexicalTotal}ms (query=${m.lexical.searchTime || 0}ms, index_ready=${m.lexical.indexReadyTime || 0}ms)`);
lines.push(`├─ fusion: ${m.fusion.time}ms`);
lines.push(`├─ constraint_filter: ${m.timing.constraintFilter}ms`);
lines.push(`├─ event_retrieval: ${m.timing.eventRetrieval}ms`);
lines.push(`├─ evidence_retrieval: ${m.timing.evidenceRetrieval}ms`);
lines.push(`├─ floor_rerank: ${m.timing.evidenceRerank || 0}ms`);
lines.push(`├─ l1_cosine: ${m.evidence.l1CosineTime}ms`);
lines.push(`├─ diffusion: ${m.timing.diffusion}ms`);
lines.push(`├─ evidence_assembly: ${m.timing.evidenceAssembly}ms`);
lines.push(`├─ formatting: ${m.timing.formatting}ms`);
lines.push(`└─ total: ${m.timing.total}ms`);
lines.push('');
// Quality Indicators
lines.push('[Quality] 质量指标');
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`);
lines.push(`├─ rerank_retention_rate: ${m.quality.rerankRetentionRate}%`);
lines.push(`├─ diffusion_effective_rate: ${m.quality.diffusionEffectiveRate}%`);
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
lines.push(`└─ potential_issues:`);
m.quality.potentialIssues.forEach((issue, i) => {
const prefix = i === m.quality.potentialIssues.length - 1 ? ' └─' : ' ├─';
lines.push(`${prefix}${issue}`);
});
} else {
lines.push(`└─ potential_issues: none`);
}
lines.push('');
lines.push('════════════════════════════════════════');
lines.push('');
return lines.join('\n');
}
/**
* 检测潜在问题
* @param {object} metrics
* @returns {string[]}
*/
export function detectIssues(metrics) {
const issues = [];
const m = metrics;
// ─────────────────────────────────────────────────────────────────
// 查询构建问题
// ─────────────────────────────────────────────────────────────────
if ((m.anchor.focusTerms || m.anchor.focusEntities || []).length === 0) {
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
}
// 权重极端退化检测
const segWeights = m.query.segmentWeights || [];
if (segWeights.length > 0) {
const focusWeight = segWeights[segWeights.length - 1] || 0;
if (focusWeight < 0.15) {
issues.push(`Focus segment weight very low (${(focusWeight * 100).toFixed(0)}%) - focus message may be too short`);
}
const allLow = segWeights.every(w => w < 0.1);
if (allLow) {
issues.push('All segment weights below 10% - all messages may be extremely short');
}
}
// ─────────────────────────────────────────────────────────────────
// 锚点匹配问题
// ─────────────────────────────────────────────────────────────────
if ((m.anchor.matched || 0) === 0 && m.anchor.needRecall) {
issues.push('No anchors matched - may need to generate anchors');
}
// ─────────────────────────────────────────────────────────────────
// 词法检索问题
// ─────────────────────────────────────────────────────────────────
if ((m.lexical.terms || []).length > 0 && m.lexical.chunkHits === 0 && m.lexical.eventHits === 0) {
issues.push('Lexical search returned zero hits - terms may not match any indexed content');
}
// ─────────────────────────────────────────────────────────────────
// 融合问题floor-level
// ─────────────────────────────────────────────────────────────────
if (m.fusion.lexFloors === 0 && m.fusion.denseFloors > 0) {
issues.push('No lexical floors in fusion - hybrid retrieval not contributing');
}
if (m.fusion.afterCap === 0) {
issues.push('Fusion produced zero floor candidates - all retrieval paths may have failed');
}
// ─────────────────────────────────────────────────────────────────
// 事件召回问题
// ─────────────────────────────────────────────────────────────────
if (m.event.considered > 0) {
const denseSelected =
(m.event.byRecallType?.direct || 0) +
(m.event.byRecallType?.related || 0);
const denseSelectRatio = denseSelected / m.event.considered;
if (denseSelectRatio < 0.1) {
issues.push(`Dense event selection ratio too low (${(denseSelectRatio * 100).toFixed(1)}%) - threshold may be too high`);
}
if (denseSelectRatio > 0.6 && m.event.considered > 10) {
issues.push(`Dense event selection ratio high (${(denseSelectRatio * 100).toFixed(1)}%) - may include noise`);
}
}
// 实体过滤问题
if (m.event.entityFilter) {
const ef = m.event.entityFilter;
if (ef.filtered === 0 && ef.before > 10) {
issues.push('No events filtered by entity - focus entities may be too broad or missing');
}
if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
}
}
// 相似度问题
if (m.event.similarityDistribution && m.event.similarityDistribution.min > 0 && m.event.similarityDistribution.min < 0.5) {
issues.push(`Low similarity events included (min=${m.event.similarityDistribution.min})`);
}
// 因果链问题
if (m.event.selected > 0 && m.event.causalCount === 0 && m.event.byRecallType.direct === 0) {
issues.push('No direct or causal events - query may not align with stored events');
}
// ─────────────────────────────────────────────────────────────────
// Floor Rerank 问题
// ─────────────────────────────────────────────────────────────────
if (m.evidence.rerankFailed) {
issues.push('Rerank API failed — using fusion rank order as fallback, relevance scores are zero');
}
if (m.evidence.rerankApplied && !m.evidence.rerankFailed) {
if (m.evidence.rerankScores) {
const rs = m.evidence.rerankScores;
if (rs.max < 0.3) {
issues.push(`Low floor rerank scores (max=${rs.max}) - query-document domain mismatch`);
}
if (rs.mean < 0.2) {
issues.push(`Very low average floor rerank score (mean=${rs.mean}) - context may be weak`);
}
}
if (m.evidence.rerankTime > 3000) {
issues.push(`Slow floor rerank (${m.evidence.rerankTime}ms) - may affect response time`);
}
if (m.evidence.rerankDocAvgLength > 3000) {
issues.push(`Large rerank documents (avg ${m.evidence.rerankDocAvgLength} chars) - may reduce rerank precision`);
}
}
// Rerank 保留率
const retentionRate = m.evidence.floorCandidates > 0
? Math.round(m.evidence.floorsSelected / m.evidence.floorCandidates * 100)
: 0;
m.quality.rerankRetentionRate = retentionRate;
if (m.evidence.floorCandidates > 0 && retentionRate < 25) {
issues.push(`Low rerank retention rate (${retentionRate}%) - fusion ranking poorly aligned with reranker`);
}
// ─────────────────────────────────────────────────────────────────
// L1 挂载问题
// ─────────────────────────────────────────────────────────────────
if (m.evidence.floorsSelected > 0 && m.evidence.l1Pulled === 0) {
issues.push('Zero L1 chunks pulled - L1 vectors may not exist or DB read failed');
}
if (m.evidence.floorsSelected > 0 && m.evidence.l1Attached === 0 && m.evidence.l1Pulled > 0) {
issues.push('L1 chunks pulled but none attached - cosine scores may be too low');
}
const l1AttachRate = m.quality.l1AttachRate || 0;
if (m.evidence.floorsSelected > 3 && l1AttachRate < 50) {
issues.push(`Low L1 attach rate (${l1AttachRate}%) - selected floors lack L1 chunks`);
}
// ─────────────────────────────────────────────────────────────────
// 预算问题
// ─────────────────────────────────────────────────────────────────
if (m.budget.utilization > 90) {
issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
}
// ─────────────────────────────────────────────────────────────────
// 性能问题
// ─────────────────────────────────────────────────────────────────
if (m.timing.total > 8000) {
issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
}
if (m.query.buildTime > 100) {
issues.push(`Slow query build (${m.query.buildTime}ms) - entity lexicon may be too large`);
}
if (m.evidence.l1CosineTime > 1000) {
issues.push(`Slow L1 cosine scoring (${m.evidence.l1CosineTime}ms) - too many chunks pulled`);
}
// ─────────────────────────────────────────────────────────────────
// Diffusion 问题
// ─────────────────────────────────────────────────────────────────
if (m.diffusion.graphEdges === 0 && m.diffusion.seedCount > 0) {
issues.push('No diffusion graph edges - atoms may lack edges fields');
}
if (m.diffusion.pprActivated > 0 && m.diffusion.cosineGatePassed === 0) {
issues.push('All PPR-activated nodes failed cosine gate - graph structure diverged from query semantics');
}
m.quality.diffusionEffectiveRate = m.diffusion.pprActivated > 0
? Math.round((m.diffusion.finalCount / m.diffusion.pprActivated) * 100)
: 0;
if (m.diffusion.cosineGateNoVector > 5) {
issues.push(`${m.diffusion.cosineGateNoVector} PPR nodes missing vectors - L0 vectorization may be incomplete`);
}
if (m.diffusion.time > 50) {
issues.push(`Slow diffusion (${m.diffusion.time}ms) - graph may be too dense`);
}
if (m.diffusion.pprActivated > 0 && (m.diffusion.postGatePassRate < 20 || m.diffusion.postGatePassRate > 60)) {
issues.push(`Diffusion post-gate pass rate out of target (${m.diffusion.postGatePassRate}%)`);
}
return issues;
}

View File

@@ -0,0 +1,387 @@
// ═══════════════════════════════════════════════════════════════════════════
// query-builder.js - 确定性查询构建器(无 LLM
//
// 职责:
// 1. 从最近 3 条消息构建 QueryBundle加权向量段
// 2. 用第一轮召回结果产出 hints 段用于 R2 增强
//
// 加权向量设计:
// - 每条消息独立 embed得到独立向量
// - 按位置分配基础权重(焦点 > 近上下文 > 远上下文)
// - 短消息通过 lengthFactor 自动降权(下限 35%
// - recall.js 负责 embed + 归一化 + 加权平均
//
// 焦点确定:
// - pendingUserMessage 存在 → 它是焦点
// - 否则 → lastMessages 最后一条是焦点
//
// 不负责向量化、检索、rerank
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../../extensions.js';
import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText, buildCharacterPools } from './entity-lexicon.js';
import { getSummaryStore } from '../../data/store.js';
import { filterText } from '../utils/text-filter.js';
import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
// ─────────────────────────────────────────────────────────────────────────
// 权重常量
// ─────────────────────────────────────────────────────────────────────────
// R1 基础权重:[...context(oldest→newest), focus]
// 焦点消息占 55%,最近上下文 30%,更早上下文 15%
export const FOCUS_BASE_WEIGHT = 0.55;
export const CONTEXT_BASE_WEIGHTS = [0.15, 0.30];
// R2 基础权重:焦点让权给 hints
export const FOCUS_BASE_WEIGHT_R2 = 0.45;
export const CONTEXT_BASE_WEIGHTS_R2 = [0.10, 0.20];
export const HINTS_BASE_WEIGHT = 0.25;
// 长度惩罚:< 50 字线性衰减,下限 35%
export const LENGTH_FULL_THRESHOLD = 50;
export const LENGTH_MIN_FACTOR = 0.35;
// 归一化后的焦点最小占比(由 recall.js 在归一化后硬保底)
// 语义:即使焦点文本很短,也不能被稀释到过低权重
export const FOCUS_MIN_NORMALIZED_WEIGHT = 0.35;
// ─────────────────────────────────────────────────────────────────────────
// 其他常量
// ─────────────────────────────────────────────────────────────────────────
const MEMORY_HINT_ATOMS_MAX = 5;
const MEMORY_HINT_EVENTS_MAX = 3;
const LEXICAL_TERMS_MAX = 10;
// ─────────────────────────────────────────────────────────────────────────
// 工具函数
// ─────────────────────────────────────────────────────────────────────────
/**
* 清洗消息文本(与 chunk-builder / recall 保持一致)
* @param {string} text
* @returns {string}
*/
function cleanMessageText(text) {
return filterText(text)
.replace(/\[tts:[^\]]*\]/gi, '')
.replace(/<state>[\s\S]*?<\/state>/gi, '')
.trim();
}
/**
* 清理事件摘要(移除楼层标记)
* @param {string} summary
* @returns {string}
*/
function cleanSummary(summary) {
return String(summary || '')
.replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '')
.trim();
}
/**
* 计算长度因子
*
* charCount >= 50 → 1.0
* charCount = 0 → 0.35
* 中间线性插值
*
* @param {number} charCount - 清洗后内容字符数(不含 speaker 前缀)
* @returns {number} 0.35 ~ 1.0
*/
export function computeLengthFactor(charCount) {
if (charCount >= LENGTH_FULL_THRESHOLD) return 1.0;
if (charCount <= 0) return LENGTH_MIN_FACTOR;
return LENGTH_MIN_FACTOR + (1.0 - LENGTH_MIN_FACTOR) * (charCount / LENGTH_FULL_THRESHOLD);
}
/**
* 从文本中提取高频实词(用于词法检索)
*
* @param {string} text - 清洗后的文本
* @param {number} maxTerms - 最大词数
* @returns {string[]}
*/
function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
if (!text) return [];
const tokens = tokenizerTokenizeForIndex(text);
const freq = new Map();
for (const token of tokens) {
const key = String(token || '').toLowerCase();
if (!key) continue;
freq.set(key, (freq.get(key) || 0) + 1);
}
return Array.from(freq.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, maxTerms)
.map(([term]) => term);
}
// ─────────────────────────────────────────────────────────────────────────
// 类型定义
// ─────────────────────────────────────────────────────────────────────────
/**
* @typedef {object} QuerySegment
* @property {string} text - 待 embed 的文本(含 speaker 前缀,纯自然语言)
* @property {number} baseWeight - R1 基础权重
* @property {number} charCount - 内容字符数(不含 speaker 前缀,用于 lengthFactor
*/
/**
* @typedef {object} QueryBundle
* @property {QuerySegment[]} querySegments - R1 向量段(上下文 oldest→newest焦点在末尾
* @property {QuerySegment|null} hintsSegment - R2 hints 段refinement 后填充)
* @property {string} rerankQuery - rerank 用的纯自然语言查询(焦点在前)
* @property {string[]} lexicalTerms - MiniSearch 查询词
* @property {string[]} focusTerms - 焦点词(原 focusEntities
* @property {string[]} focusCharacters - 焦点人物focusTerms ∩ trustedCharacters
* @property {string[]} focusEntities - Deprecated alias of focusTerms
* @property {Set<string>} allEntities - Full entity lexicon (includes non-character entities)
* @property {Set<string>} allCharacters - Union of trusted and candidate character pools
* @property {Set<string>} trustedCharacters - Clean character pool (main/arcs/name2/L2 participants)
* @property {Set<string>} candidateCharacters - Extended character pool from L0 edges.s/t after cleanup
* @property {Set<string>} _lexicon - 实体词典(内部使用)
* @property {Map<string, string>} _displayMap - 标准化→原词形映射(内部使用)
*/
// ─────────────────────────────────────────────────────────────────────────
// 内部:消息条目构建
// ─────────────────────────────────────────────────────────────────────────
/**
* @typedef {object} MessageEntry
* @property {string} text - speaker内容完整文本
* @property {number} charCount - 内容字符数(不含 speaker 前缀)
*/
/**
* 清洗消息并构建条目
* @param {object} message - chat 消息对象
* @param {object} context - { name1, name2 }
* @returns {MessageEntry|null}
*/
function buildMessageEntry(message, context) {
if (!message?.mes) return null;
const speaker = message.is_user
? (context.name1 || '用户')
: (message.name || context.name2 || '角色');
const clean = cleanMessageText(message.mes);
if (!clean) return null;
return {
text: `${speaker}${clean}`,
charCount: clean.length,
};
}
// ─────────────────────────────────────────────────────────────────────────
// 阶段 1构建 QueryBundle
// ─────────────────────────────────────────────────────────────────────────
/**
* 构建初始查询包
*
* 消息布局K=3 时):
* msg[0] = USER(#N-2) 上下文 baseWeight = 0.15
* msg[1] = AI(#N-1) 上下文 baseWeight = 0.30
* msg[2] = USER(#N) 焦点 baseWeight = 0.55
*
* 焦点确定:
* pendingUserMessage 存在 → 焦点,所有 lastMessages 为上下文
* pendingUserMessage 不存在 → lastMessages[-1] 为焦点,其余为上下文
*
* @param {object[]} lastMessages - 最近 K 条消息(由 recall.js 传入)
* @param {string|null} pendingUserMessage - 用户刚输入但未进 chat 的消息
* @param {object|null} store
* @param {object|null} context - { name1, name2 }
* @returns {QueryBundle}
*/
export function buildQueryBundle(lastMessages, pendingUserMessage, store = null, context = null) {
if (!store) store = getSummaryStore();
if (!context) {
const ctx = getContext();
context = { name1: ctx.name1, name2: ctx.name2 };
}
// 1. 实体/人物词典
const lexicon = buildEntityLexicon(store, context);
const displayMap = buildDisplayNameMap(store, context);
const { trustedCharacters, candidateCharacters, allCharacters } = buildCharacterPools(store, context);
// 2. 分离焦点与上下文
const contextEntries = [];
let focusEntry = null;
const allCleanTexts = [];
if (pendingUserMessage) {
// pending 是焦点,所有 lastMessages 是上下文
const pendingClean = cleanMessageText(pendingUserMessage);
if (pendingClean) {
const speaker = context.name1 || '用户';
focusEntry = {
text: `${speaker}${pendingClean}`,
charCount: pendingClean.length,
};
allCleanTexts.push(pendingClean);
}
for (const m of (lastMessages || [])) {
const entry = buildMessageEntry(m, context);
if (entry) {
contextEntries.push(entry);
allCleanTexts.push(cleanMessageText(m.mes));
}
}
} else {
// 无 pending → lastMessages[-1] 是焦点
const msgs = lastMessages || [];
if (msgs.length > 0) {
const lastMsg = msgs[msgs.length - 1];
const entry = buildMessageEntry(lastMsg, context);
if (entry) {
focusEntry = entry;
allCleanTexts.push(cleanMessageText(lastMsg.mes));
}
}
for (let i = 0; i < msgs.length - 1; i++) {
const entry = buildMessageEntry(msgs[i], context);
if (entry) {
contextEntries.push(entry);
allCleanTexts.push(cleanMessageText(msgs[i].mes));
}
}
}
// 3. 提取焦点词与焦点人物
const combinedText = allCleanTexts.join(' ');
const focusTerms = extractEntitiesFromText(combinedText, lexicon, displayMap);
const focusCharacters = focusTerms.filter(term => trustedCharacters.has(term.toLowerCase()));
// 4. 构建 querySegments
// 上下文在前oldest → newest焦点在末尾
// 上下文权重从 CONTEXT_BASE_WEIGHTS 尾部对齐分配
const querySegments = [];
for (let i = 0; i < contextEntries.length; i++) {
const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS.length - contextEntries.length + i);
querySegments.push({
text: contextEntries[i].text,
baseWeight: CONTEXT_BASE_WEIGHTS[weightIdx] || CONTEXT_BASE_WEIGHTS[0],
charCount: contextEntries[i].charCount,
});
}
if (focusEntry) {
querySegments.push({
text: focusEntry.text,
baseWeight: FOCUS_BASE_WEIGHT,
charCount: focusEntry.charCount,
});
}
// 5. rerankQuery焦点在前纯自然语言无前缀
const contextLines = contextEntries.map(e => e.text);
const rerankQuery = focusEntry
? [focusEntry.text, ...contextLines].join('\n')
: contextLines.join('\n');
// 6. lexicalTerms实体优先 + 高频实词补充)
const entityTerms = focusTerms.map(e => e.toLowerCase());
const textTerms = extractKeyTerms(combinedText);
const termSet = new Set(entityTerms);
for (const t of textTerms) {
if (termSet.size >= LEXICAL_TERMS_MAX) break;
termSet.add(t);
}
return {
querySegments,
hintsSegment: null,
rerankQuery,
lexicalTerms: Array.from(termSet),
focusTerms,
focusCharacters,
focusEntities: focusTerms, // deprecated alias (compat)
allEntities: lexicon,
allCharacters,
trustedCharacters,
candidateCharacters,
_lexicon: lexicon,
_displayMap: displayMap,
};
}
// ─────────────────────────────────────────────────────────────────────────
// 阶段 3Query Refinement用第一轮召回结果产出 hints 段)
// ─────────────────────────────────────────────────────────────────────────
/**
* 用第一轮召回结果增强 QueryBundle
*
* 原地修改 bundle仅 query/rerank 辅助项):
* - hintsSegment填充 hints 段(供 R2 加权使用)
* - lexicalTerms可能追加 hints 中的关键词
* - rerankQuery不变保持焦点优先的纯自然语言
*
* @param {QueryBundle} bundle - 原始查询包
* @param {object[]} anchorHits - 第一轮 L0 命中(按相似度降序)
* @param {object[]} eventHits - 第一轮 L2 命中(按相似度降序)
*/
export function refineQueryBundle(bundle, anchorHits, eventHits) {
const hints = [];
// 1. 从 top anchorHits 提取 memory hints
const topAnchors = (anchorHits || []).slice(0, MEMORY_HINT_ATOMS_MAX);
for (const hit of topAnchors) {
const semantic = hit.atom?.semantic || '';
if (semantic) hints.push(semantic);
}
// 2. 从 top eventHits 提取 memory hints
const topEvents = (eventHits || []).slice(0, MEMORY_HINT_EVENTS_MAX);
for (const hit of topEvents) {
const ev = hit.event || {};
const title = String(ev.title || '').trim();
const summary = cleanSummary(ev.summary);
const line = title && summary
? `${title}: ${summary}`
: title || summary;
if (line) hints.push(line);
}
// 3. 构建 hintsSegment
if (hints.length > 0) {
const hintsText = hints.join('\n');
bundle.hintsSegment = {
text: hintsText,
baseWeight: HINTS_BASE_WEIGHT,
charCount: hintsText.length,
};
} else {
bundle.hintsSegment = null;
}
// 4. rerankQuery 不变
// cross-encoder 接收纯自然语言 query不受 hints 干扰
// 5. 增强 lexicalTerms
if (hints.length > 0) {
const hintTerms = extractKeyTerms(hints.join(' '), 5);
const termSet = new Set(bundle.lexicalTerms);
for (const t of hintTerms) {
if (termSet.size >= LEXICAL_TERMS_MAX) break;
if (!termSet.has(t)) {
termSet.add(t);
bundle.lexicalTerms.push(t);
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,261 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Chunk Store (L1/L2 storage)
// ═══════════════════════════════════════════════════════════════════════════
import {
metaTable,
chunksTable,
chunkVectorsTable,
eventVectorsTable,
CHUNK_MAX_TOKENS,
} from '../../data/db.js';
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
export function float32ToBuffer(arr) {
return arr.buffer.slice(arr.byteOffset, arr.byteOffset + arr.byteLength);
}
export function bufferToFloat32(buffer) {
return new Float32Array(buffer);
}
export function makeChunkId(floor, chunkIdx) {
return `c-${floor}-${chunkIdx}`;
}
export function hashText(text) {
let hash = 0;
for (let i = 0; i < text.length; i++) {
hash = ((hash << 5) - hash + text.charCodeAt(i)) | 0;
}
return hash.toString(36);
}
// ═══════════════════════════════════════════════════════════════════════════
// Meta 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function getMeta(chatId) {
let meta = await metaTable.get(chatId);
if (!meta) {
meta = {
chatId,
fingerprint: null,
lastChunkFloor: -1,
updatedAt: Date.now(),
};
await metaTable.put(meta);
}
return meta;
}
export async function updateMeta(chatId, updates) {
await metaTable.update(chatId, {
...updates,
updatedAt: Date.now(),
});
}
// ═══════════════════════════════════════════════════════════════════════════
// Chunks 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function saveChunks(chatId, chunks) {
const records = chunks.map(chunk => ({
chatId,
chunkId: chunk.chunkId,
floor: chunk.floor,
chunkIdx: chunk.chunkIdx,
speaker: chunk.speaker,
isUser: chunk.isUser,
text: chunk.text,
textHash: chunk.textHash,
createdAt: Date.now(),
}));
await chunksTable.bulkPut(records);
}
export async function getAllChunks(chatId) {
return await chunksTable.where('chatId').equals(chatId).toArray();
}
export async function getChunksByFloors(chatId, floors) {
const chunks = await chunksTable
.where('[chatId+floor]')
.anyOf(floors.map(f => [chatId, f]))
.toArray();
return chunks;
}
/**
* 删除指定楼层及之后的所有 chunk 和向量
*/
export async function deleteChunksFromFloor(chatId, fromFloor) {
const chunks = await chunksTable
.where('chatId')
.equals(chatId)
.filter(c => c.floor >= fromFloor)
.toArray();
const chunkIds = chunks.map(c => c.chunkId);
await chunksTable
.where('chatId')
.equals(chatId)
.filter(c => c.floor >= fromFloor)
.delete();
for (const chunkId of chunkIds) {
await chunkVectorsTable.delete([chatId, chunkId]);
}
}
/**
* 删除指定楼层的 chunk 和向量
*/
export async function deleteChunksAtFloor(chatId, floor) {
const chunks = await chunksTable
.where('[chatId+floor]')
.equals([chatId, floor])
.toArray();
const chunkIds = chunks.map(c => c.chunkId);
await chunksTable.where('[chatId+floor]').equals([chatId, floor]).delete();
for (const chunkId of chunkIds) {
await chunkVectorsTable.delete([chatId, chunkId]);
}
}
export async function clearAllChunks(chatId) {
await chunksTable.where('chatId').equals(chatId).delete();
await chunkVectorsTable.where('chatId').equals(chatId).delete();
}
// ═══════════════════════════════════════════════════════════════════════════
// ChunkVectors 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function saveChunkVectors(chatId, items, fingerprint) {
const records = items.map(item => ({
chatId,
chunkId: item.chunkId,
vector: float32ToBuffer(new Float32Array(item.vector)),
dims: item.vector.length,
fingerprint,
}));
await chunkVectorsTable.bulkPut(records);
}
export async function getAllChunkVectors(chatId) {
const records = await chunkVectorsTable.where('chatId').equals(chatId).toArray();
return records.map(r => ({
...r,
vector: bufferToFloat32(r.vector),
}));
}
export async function getChunkVectorsByIds(chatId, chunkIds) {
if (!chatId || !chunkIds?.length) return [];
const records = await chunkVectorsTable
.where('[chatId+chunkId]')
.anyOf(chunkIds.map(id => [chatId, id]))
.toArray();
return records.map(r => ({
chunkId: r.chunkId,
vector: bufferToFloat32(r.vector),
}));
}
// ═══════════════════════════════════════════════════════════════════════════
// EventVectors 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function saveEventVectors(chatId, items, fingerprint) {
const records = items.map(item => ({
chatId,
eventId: item.eventId,
vector: float32ToBuffer(new Float32Array(item.vector)),
dims: item.vector.length,
fingerprint,
}));
await eventVectorsTable.bulkPut(records);
}
export async function getAllEventVectors(chatId) {
const records = await eventVectorsTable.where('chatId').equals(chatId).toArray();
return records.map(r => ({
...r,
vector: bufferToFloat32(r.vector),
}));
}
export async function clearEventVectors(chatId) {
await eventVectorsTable.where('chatId').equals(chatId).delete();
}
/**
* 按 ID 列表删除 event 向量
*/
export async function deleteEventVectorsByIds(chatId, eventIds) {
for (const eventId of eventIds) {
await eventVectorsTable.delete([chatId, eventId]);
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 统计与工具
// ═══════════════════════════════════════════════════════════════════════════
export async function getStorageStats(chatId) {
const [meta, chunkCount, chunkVectorCount, eventCount] = await Promise.all([
getMeta(chatId),
chunksTable.where('chatId').equals(chatId).count(),
chunkVectorsTable.where('chatId').equals(chatId).count(),
eventVectorsTable.where('chatId').equals(chatId).count(),
]);
return {
fingerprint: meta.fingerprint,
lastChunkFloor: meta.lastChunkFloor,
chunks: chunkCount,
chunkVectors: chunkVectorCount,
eventVectors: eventCount,
};
}
export async function clearChatData(chatId) {
await Promise.all([
metaTable.delete(chatId),
chunksTable.where('chatId').equals(chatId).delete(),
chunkVectorsTable.where('chatId').equals(chatId).delete(),
eventVectorsTable.where('chatId').equals(chatId).delete(),
]);
}
export async function ensureFingerprintMatch(chatId, newFingerprint) {
const meta = await getMeta(chatId);
if (meta.fingerprint && meta.fingerprint !== newFingerprint) {
await Promise.all([
chunkVectorsTable.where('chatId').equals(chatId).delete(),
eventVectorsTable.where('chatId').equals(chatId).delete(),
]);
await updateMeta(chatId, {
fingerprint: newFingerprint,
lastChunkFloor: -1,
});
return false;
}
if (!meta.fingerprint) {
await updateMeta(chatId, { fingerprint: newFingerprint });
}
return true;
}
export { CHUNK_MAX_TOKENS };

View File

@@ -0,0 +1,266 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - State Store (L0)
// StateAtom 存 chat_metadata持久化
// StateVector 存 IndexedDB可重建
// ═══════════════════════════════════════════════════════════════════════════
import { saveMetadataDebounced } from '../../../../../../../extensions.js';
import { chat_metadata } from '../../../../../../../../script.js';
import { stateVectorsTable } from '../../data/db.js';
import { EXT_ID } from '../../../../core/constants.js';
import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'state-store';
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
export function float32ToBuffer(arr) {
return arr.buffer.slice(arr.byteOffset, arr.byteOffset + arr.byteLength);
}
export function bufferToFloat32(buffer) {
return new Float32Array(buffer);
}
// ═══════════════════════════════════════════════════════════════════════════
// StateAtom 操作chat_metadata
// ═══════════════════════════════════════════════════════════════════════════
function ensureStateAtomsArray() {
chat_metadata.extensions ||= {};
chat_metadata.extensions[EXT_ID] ||= {};
chat_metadata.extensions[EXT_ID].stateAtoms ||= [];
return chat_metadata.extensions[EXT_ID].stateAtoms;
}
// L0Index: per-floor status (ok | empty | fail)
function ensureL0Index() {
chat_metadata.extensions ||= {};
chat_metadata.extensions[EXT_ID] ||= {};
chat_metadata.extensions[EXT_ID].l0Index ||= { version: 1, byFloor: {} };
chat_metadata.extensions[EXT_ID].l0Index.byFloor ||= {};
return chat_metadata.extensions[EXT_ID].l0Index;
}
export function getL0Index() {
return ensureL0Index();
}
export function getL0FloorStatus(floor) {
const idx = ensureL0Index();
return idx.byFloor?.[String(floor)] || null;
}
export function setL0FloorStatus(floor, record) {
const idx = ensureL0Index();
idx.byFloor[String(floor)] = {
...record,
floor,
updatedAt: Date.now(),
};
saveMetadataDebounced();
}
export function clearL0Index() {
const idx = ensureL0Index();
idx.byFloor = {};
saveMetadataDebounced();
}
export function deleteL0IndexFromFloor(fromFloor) {
const idx = ensureL0Index();
const keys = Object.keys(idx.byFloor || {});
let deleted = 0;
for (const k of keys) {
const f = Number(k);
if (Number.isFinite(f) && f >= fromFloor) {
delete idx.byFloor[k];
deleted++;
}
}
if (deleted > 0) {
saveMetadataDebounced();
xbLog.info(MODULE_ID, `删除 ${deleted} 条 L0Index (floor >= ${fromFloor})`);
}
return deleted;
}
/**
* 获取当前聊天的所有 StateAtoms
*/
export function getStateAtoms() {
return ensureStateAtomsArray();
}
/**
* 保存新的 StateAtoms追加去重
*/
export function saveStateAtoms(atoms) {
if (!atoms?.length) return;
const arr = ensureStateAtomsArray();
const existing = new Set(arr.map(a => a.atomId));
let added = 0;
for (const atom of atoms) {
// 有效性检查
if (!atom?.atomId || typeof atom.floor !== 'number' || atom.floor < 0 || !atom.semantic) {
xbLog.warn(MODULE_ID, `跳过无效 atom: ${atom?.atomId}`);
continue;
}
if (!existing.has(atom.atomId)) {
arr.push(atom);
existing.add(atom.atomId);
added++;
}
}
if (added > 0) {
saveMetadataDebounced();
xbLog.info(MODULE_ID, `存储 ${added} 个 StateAtom`);
}
}
/**
* 删除指定楼层及之后的 StateAtoms
*/
export function deleteStateAtomsFromFloor(floor) {
const arr = ensureStateAtomsArray();
const before = arr.length;
const filtered = arr.filter(a => a.floor < floor);
chat_metadata.extensions[EXT_ID].stateAtoms = filtered;
const deleted = before - filtered.length;
if (deleted > 0) {
saveMetadataDebounced();
xbLog.info(MODULE_ID, `删除 ${deleted} 个 StateAtom (floor >= ${floor})`);
}
return deleted;
}
/**
* 清空所有 StateAtoms
*/
export function clearStateAtoms() {
const arr = ensureStateAtomsArray();
const count = arr.length;
chat_metadata.extensions[EXT_ID].stateAtoms = [];
if (count > 0) {
saveMetadataDebounced();
xbLog.info(MODULE_ID, `清空 ${count} 个 StateAtom`);
}
}
/**
* 获取 StateAtoms 数量
*/
export function getStateAtomsCount() {
return ensureStateAtomsArray().length;
}
/**
* Return floors that already have extracted atoms.
*/
export function getExtractedFloors() {
const floors = new Set();
const arr = ensureStateAtomsArray();
for (const atom of arr) {
if (typeof atom?.floor === 'number' && atom.floor >= 0) {
floors.add(atom.floor);
}
}
return floors;
}
/**
* Replace all stored StateAtoms.
*/
export function replaceStateAtoms(atoms) {
const next = Array.isArray(atoms) ? atoms : [];
chat_metadata.extensions[EXT_ID].stateAtoms = next;
saveMetadataDebounced();
xbLog.info(MODULE_ID, `替换 StateAtoms: ${next.length}`);
}
// ═══════════════════════════════════════════════════════════════════════════
// StateVector 操作IndexedDB
// ═══════════════════════════════════════════════════════════════════════════
/**
* 保存 StateVectors
*/
export async function saveStateVectors(chatId, items, fingerprint) {
if (!chatId || !items?.length) return;
const records = items.map(item => ({
chatId,
atomId: item.atomId,
floor: item.floor,
vector: float32ToBuffer(new Float32Array(item.vector)),
dims: item.vector.length,
rVector: item.rVector?.length ? float32ToBuffer(new Float32Array(item.rVector)) : null,
rDims: item.rVector?.length ? item.rVector.length : 0,
fingerprint,
}));
await stateVectorsTable.bulkPut(records);
xbLog.info(MODULE_ID, `存储 ${records.length} 个 StateVector`);
}
/**
* 获取所有 StateVectors
*/
export async function getAllStateVectors(chatId) {
if (!chatId) return [];
const records = await stateVectorsTable.where('chatId').equals(chatId).toArray();
return records.map(r => ({
...r,
vector: bufferToFloat32(r.vector),
rVector: r.rVector ? bufferToFloat32(r.rVector) : null,
}));
}
/**
* 删除指定楼层及之后的 StateVectors
*/
export async function deleteStateVectorsFromFloor(chatId, floor) {
if (!chatId) return;
const deleted = await stateVectorsTable
.where('chatId')
.equals(chatId)
.filter(v => v.floor >= floor)
.delete();
if (deleted > 0) {
xbLog.info(MODULE_ID, `删除 ${deleted} 个 StateVector (floor >= ${floor})`);
}
}
/**
* 清空所有 StateVectors
*/
export async function clearStateVectors(chatId) {
if (!chatId) return;
const deleted = await stateVectorsTable.where('chatId').equals(chatId).delete();
if (deleted > 0) {
xbLog.info(MODULE_ID, `清空 ${deleted} 个 StateVector`);
}
}
/**
* 获取 StateVectors 数量
*/
export async function getStateVectorsCount(chatId) {
if (!chatId) return 0;
return await stateVectorsTable.where('chatId').equals(chatId).count();
}

View File

@@ -0,0 +1,385 @@
// ═══════════════════════════════════════════════════════════════════════════
// Vector Import/Export
// 向量数据导入导出(当前 chatId 级别)
// ═══════════════════════════════════════════════════════════════════════════
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../../libs/fflate.mjs';
import { getContext } from '../../../../../../../extensions.js';
import { xbLog } from '../../../../core/debug-core.js';
import {
getMeta,
updateMeta,
getAllChunks,
getAllChunkVectors,
getAllEventVectors,
saveChunks,
saveChunkVectors,
clearAllChunks,
clearEventVectors,
saveEventVectors,
} from './chunk-store.js';
import {
getStateAtoms,
saveStateAtoms,
clearStateAtoms,
getAllStateVectors,
saveStateVectors,
clearStateVectors,
} from './state-store.js';
import { getEngineFingerprint } from '../utils/embedder.js';
import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'vector-io';
const EXPORT_VERSION = 2;
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
function float32ToBytes(vectors, dims) {
const totalFloats = vectors.length * dims;
const buffer = new ArrayBuffer(totalFloats * 4);
const view = new Float32Array(buffer);
let offset = 0;
for (const vec of vectors) {
for (let i = 0; i < dims; i++) {
view[offset++] = vec[i] || 0;
}
}
return new Uint8Array(buffer);
}
function bytesToFloat32(bytes, dims) {
const view = new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4);
const vectors = [];
for (let i = 0; i < view.length; i += dims) {
vectors.push(Array.from(view.slice(i, i + dims)));
}
return vectors;
}
function downloadBlob(blob, filename) {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
// ═══════════════════════════════════════════════════════════════════════════
// 导出
// ═══════════════════════════════════════════════════════════════════════════
export async function exportVectors(onProgress) {
const { chatId } = getContext();
if (!chatId) {
throw new Error('未打开聊天');
}
onProgress?.('读取数据...');
const meta = await getMeta(chatId);
const chunks = await getAllChunks(chatId);
const chunkVectors = await getAllChunkVectors(chatId);
const eventVectors = await getAllEventVectors(chatId);
const stateAtoms = getStateAtoms();
const stateVectors = await getAllStateVectors(chatId);
if (chunkVectors.length === 0 && eventVectors.length === 0 && stateVectors.length === 0) {
throw new Error('没有可导出的向量数据');
}
// 确定维度
const dims = chunkVectors[0]?.vector?.length
|| eventVectors[0]?.vector?.length
|| stateVectors[0]?.vector?.length
|| 0;
if (dims === 0) {
throw new Error('无法确定向量维度');
}
onProgress?.('构建索引...');
// 构建 chunk 索引(按 chunkId 排序保证顺序一致)
const sortedChunks = [...chunks].sort((a, b) => a.chunkId.localeCompare(b.chunkId));
const chunkVectorMap = new Map(chunkVectors.map(cv => [cv.chunkId, cv.vector]));
// chunks.jsonl
const chunksJsonl = sortedChunks.map(c => JSON.stringify({
chunkId: c.chunkId,
floor: c.floor,
chunkIdx: c.chunkIdx,
speaker: c.speaker,
isUser: c.isUser,
text: c.text,
textHash: c.textHash,
})).join('\n');
// chunk_vectors.bin按 sortedChunks 顺序)
const chunkVectorsOrdered = sortedChunks.map(c => chunkVectorMap.get(c.chunkId) || new Array(dims).fill(0));
onProgress?.('压缩向量...');
// 构建 event 索引
const sortedEventVectors = [...eventVectors].sort((a, b) => a.eventId.localeCompare(b.eventId));
const eventsJsonl = sortedEventVectors.map(ev => JSON.stringify({
eventId: ev.eventId,
})).join('\n');
// event_vectors.bin
const eventVectorsOrdered = sortedEventVectors.map(ev => ev.vector);
// state vectors
const sortedStateVectors = [...stateVectors].sort((a, b) => String(a.atomId).localeCompare(String(b.atomId)));
const stateVectorsOrdered = sortedStateVectors.map(v => v.vector);
const rDims = sortedStateVectors.find(v => v.rVector?.length)?.rVector?.length || dims;
const stateRVectorsOrdered = sortedStateVectors.map(v =>
v.rVector?.length ? v.rVector : new Array(rDims).fill(0)
);
const stateVectorsJsonl = sortedStateVectors.map(v => JSON.stringify({
atomId: v.atomId,
floor: v.floor,
hasRVector: !!(v.rVector?.length),
rDims: v.rVector?.length || 0,
})).join('\n');
// manifest
const manifest = {
version: EXPORT_VERSION,
exportedAt: Date.now(),
chatId,
fingerprint: meta.fingerprint || '',
dims,
chunkCount: sortedChunks.length,
chunkVectorCount: chunkVectors.length,
eventCount: sortedEventVectors.length,
stateAtomCount: stateAtoms.length,
stateVectorCount: stateVectors.length,
stateRVectorCount: sortedStateVectors.filter(v => v.rVector?.length).length,
rDims,
lastChunkFloor: meta.lastChunkFloor ?? -1,
};
onProgress?.('打包文件...');
// 打包 zip
const zipData = zipSync({
'manifest.json': strToU8(JSON.stringify(manifest, null, 2)),
'chunks.jsonl': strToU8(chunksJsonl),
'chunk_vectors.bin': float32ToBytes(chunkVectorsOrdered, dims),
'events.jsonl': strToU8(eventsJsonl),
'event_vectors.bin': float32ToBytes(eventVectorsOrdered, dims),
'state_atoms.json': strToU8(JSON.stringify(stateAtoms)),
'state_vectors.jsonl': strToU8(stateVectorsJsonl),
'state_vectors.bin': stateVectorsOrdered.length
? float32ToBytes(stateVectorsOrdered, dims)
: new Uint8Array(0),
'state_r_vectors.bin': stateRVectorsOrdered.length
? float32ToBytes(stateRVectorsOrdered, rDims)
: new Uint8Array(0),
}, { level: 1 }); // 降低压缩级别,速度优先
onProgress?.('下载文件...');
// 生成文件名
const timestamp = new Date().toISOString().slice(0, 10).replace(/-/g, '');
const shortChatId = chatId.slice(0, 8);
const filename = `vectors_${shortChatId}_${timestamp}.zip`;
downloadBlob(new Blob([zipData]), filename);
const sizeMB = (zipData.byteLength / 1024 / 1024).toFixed(2);
xbLog.info(MODULE_ID, `导出完成: ${filename} (${sizeMB}MB)`);
return {
filename,
size: zipData.byteLength,
chunkCount: sortedChunks.length,
eventCount: sortedEventVectors.length,
};
}
// ═══════════════════════════════════════════════════════════════════════════
// 导入
// ═══════════════════════════════════════════════════════════════════════════
export async function importVectors(file, onProgress) {
const { chatId } = getContext();
if (!chatId) {
throw new Error('未打开聊天');
}
onProgress?.('读取文件...');
const arrayBuffer = await file.arrayBuffer();
const zipData = new Uint8Array(arrayBuffer);
onProgress?.('解压文件...');
let unzipped;
try {
unzipped = unzipSync(zipData);
} catch (e) {
throw new Error('文件格式错误,无法解压');
}
// 读取 manifest
if (!unzipped['manifest.json']) {
throw new Error('缺少 manifest.json');
}
const manifest = JSON.parse(strFromU8(unzipped['manifest.json']));
if (![1, 2].includes(manifest.version)) {
throw new Error(`不支持的版本: ${manifest.version}`);
}
onProgress?.('校验数据...');
// 校验 fingerprint
const vectorCfg = getVectorConfig();
const currentFingerprint = vectorCfg ? getEngineFingerprint(vectorCfg) : '';
const fingerprintMismatch = manifest.fingerprint && currentFingerprint && manifest.fingerprint !== currentFingerprint;
// chatId 校验(警告但允许)
const chatIdMismatch = manifest.chatId !== chatId;
const warnings = [];
if (fingerprintMismatch) {
warnings.push(`向量引擎不匹配(文件: ${manifest.fingerprint}, 当前: ${currentFingerprint}),导入后需重新生成`);
}
if (chatIdMismatch) {
warnings.push(`聊天ID不匹配文件: ${manifest.chatId}, 当前: ${chatId}`);
}
onProgress?.('解析数据...');
// 解析 chunks
const chunksJsonl = unzipped['chunks.jsonl'] ? strFromU8(unzipped['chunks.jsonl']) : '';
const chunkMetas = chunksJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
// 解析 chunk vectors
const chunkVectorsBytes = unzipped['chunk_vectors.bin'];
const chunkVectors = chunkVectorsBytes ? bytesToFloat32(chunkVectorsBytes, manifest.dims) : [];
// 解析 events
const eventsJsonl = unzipped['events.jsonl'] ? strFromU8(unzipped['events.jsonl']) : '';
const eventMetas = eventsJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
// 解析 event vectors
const eventVectorsBytes = unzipped['event_vectors.bin'];
const eventVectors = eventVectorsBytes ? bytesToFloat32(eventVectorsBytes, manifest.dims) : [];
// 解析 L0 state atoms
const stateAtoms = unzipped['state_atoms.json']
? JSON.parse(strFromU8(unzipped['state_atoms.json']))
: [];
// 解析 L0 state vectors metas
const stateVectorsJsonl = unzipped['state_vectors.jsonl'] ? strFromU8(unzipped['state_vectors.jsonl']) : '';
const stateVectorMetas = stateVectorsJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
// Parse L0 semantic vectors
const stateVectorsBytes = unzipped['state_vectors.bin'];
const stateVectors = (stateVectorsBytes && stateVectorMetas.length)
? bytesToFloat32(stateVectorsBytes, manifest.dims)
: [];
// Parse optional L0 r-vectors (for diffusion r-sem edges)
const stateRVectorsBytes = unzipped['state_r_vectors.bin'];
const stateRVectors = (stateRVectorsBytes && stateVectorMetas.length)
? bytesToFloat32(stateRVectorsBytes, manifest.rDims || manifest.dims)
: [];
const hasRVectorMeta = stateVectorMetas.some(m => typeof m.hasRVector === 'boolean');
// 校验数量
if (chunkMetas.length !== chunkVectors.length) {
throw new Error(`chunk 数量不匹配: 元数据 ${chunkMetas.length}, 向量 ${chunkVectors.length}`);
}
if (eventMetas.length !== eventVectors.length) {
throw new Error(`event 数量不匹配: 元数据 ${eventMetas.length}, 向量 ${eventVectors.length}`);
}
if (stateVectorMetas.length !== stateVectors.length) {
throw new Error(`state 向量数量不匹配: 元数据 ${stateVectorMetas.length}, 向量 ${stateVectors.length}`);
}
if (stateRVectors.length > 0 && stateVectorMetas.length !== stateRVectors.length) {
throw new Error(`state r-vector count mismatch: meta=${stateVectorMetas.length}, vectors=${stateRVectors.length}`);
}
onProgress?.('清空旧数据...');
// 清空当前数据
await clearAllChunks(chatId);
await clearEventVectors(chatId);
await clearStateVectors(chatId);
clearStateAtoms();
onProgress?.('写入数据...');
// 写入 chunks
if (chunkMetas.length > 0) {
const chunksToSave = chunkMetas.map(meta => ({
chunkId: meta.chunkId,
floor: meta.floor,
chunkIdx: meta.chunkIdx,
speaker: meta.speaker,
isUser: meta.isUser,
text: meta.text,
textHash: meta.textHash,
}));
await saveChunks(chatId, chunksToSave);
// 写入 chunk vectors
const chunkVectorItems = chunkMetas.map((meta, idx) => ({
chunkId: meta.chunkId,
vector: chunkVectors[idx],
}));
await saveChunkVectors(chatId, chunkVectorItems, manifest.fingerprint);
}
// 写入 event vectors
if (eventMetas.length > 0) {
const eventVectorItems = eventMetas.map((meta, idx) => ({
eventId: meta.eventId,
vector: eventVectors[idx],
}));
await saveEventVectors(chatId, eventVectorItems, manifest.fingerprint);
}
// 写入 state atoms
if (stateAtoms.length > 0) {
saveStateAtoms(stateAtoms);
}
// Write state vectors (semantic + optional r-vector)
if (stateVectorMetas.length > 0) {
const stateVectorItems = stateVectorMetas.map((meta, idx) => ({
atomId: meta.atomId,
floor: meta.floor,
vector: stateVectors[idx],
rVector: (stateRVectors[idx] && (!hasRVectorMeta || meta.hasRVector)) ? stateRVectors[idx] : null,
}));
await saveStateVectors(chatId, stateVectorItems, manifest.fingerprint);
}
// 更新 meta
await updateMeta(chatId, {
fingerprint: manifest.fingerprint,
lastChunkFloor: manifest.lastChunkFloor,
});
xbLog.info(MODULE_ID, `导入完成: ${chunkMetas.length} chunks, ${eventMetas.length} events, ${stateAtoms.length} state atoms`);
return {
chunkCount: chunkMetas.length,
eventCount: eventMetas.length,
warnings,
fingerprintMismatch,
};
}

View File

@@ -0,0 +1,83 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Embedder (v2 - 统一硅基)
// 所有 embedding 请求转发到 siliconflow.js
// ═══════════════════════════════════════════════════════════════════════════
import { embed as sfEmbed, getApiKey } from '../llm/siliconflow.js';
// ═══════════════════════════════════════════════════════════════════════════
// 统一 embed 接口
// ═══════════════════════════════════════════════════════════════════════════
export async function embed(texts, config, options = {}) {
// 忽略旧的 config 参数,统一走硅基
return await sfEmbed(texts, options);
}
// ═══════════════════════════════════════════════════════════════════════════
// 指纹(简化版)
// ═══════════════════════════════════════════════════════════════════════════
export function getEngineFingerprint(config) {
// 统一使用硅基 bge-m3
return 'siliconflow:bge-m3:1024';
}
// ═══════════════════════════════════════════════════════════════════════════
// 状态检查(简化版)
// ═══════════════════════════════════════════════════════════════════════════
export async function checkLocalModelStatus() {
// 不再支持本地模型
return { status: 'not_supported', message: '请使用在线服务' };
}
export function isLocalModelLoaded() {
return false;
}
export async function downloadLocalModel() {
throw new Error('本地模型已移除,请使用在线服务');
}
export function cancelDownload() { }
export async function deleteLocalModelCache() { }
// ═══════════════════════════════════════════════════════════════════════════
// 在线服务测试
// ═══════════════════════════════════════════════════════════════════════════
export async function testOnlineService() {
const key = getApiKey();
if (!key) {
throw new Error('请配置硅基 API Key');
}
try {
const [vec] = await sfEmbed(['测试连接']);
return { success: true, dims: vec?.length || 0 };
} catch (e) {
throw new Error(`连接失败: ${e.message}`);
}
}
export async function fetchOnlineModels() {
// 硅基模型固定
return ['BAAI/bge-m3'];
}
// ═══════════════════════════════════════════════════════════════════════════
// 兼容旧接口
// ═══════════════════════════════════════════════════════════════════════════
export const DEFAULT_LOCAL_MODEL = 'bge-m3';
export const LOCAL_MODELS = {};
export const ONLINE_PROVIDERS = {
siliconflow: {
id: 'siliconflow',
name: '硅基流动',
baseUrl: 'https://api.siliconflow.cn',
},
};

View File

@@ -0,0 +1,64 @@
// run local embedding in background
let pipe = null;
let currentModelId = null;
self.onmessage = async (e) => {
const { type, modelId, hfId, texts, requestId } = e.data || {};
if (type === 'load') {
try {
self.postMessage({ type: 'status', status: 'loading', requestId });
const { pipeline, env } = await import(
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2'
);
env.allowLocalModels = false;
env.useBrowserCache = false;
pipe = await pipeline('feature-extraction', hfId, {
progress_callback: (progress) => {
if (progress.status === 'progress' && typeof progress.progress === 'number') {
self.postMessage({ type: 'progress', percent: Math.round(progress.progress), requestId });
}
}
});
currentModelId = modelId;
self.postMessage({ type: 'loaded', requestId });
} catch (err) {
self.postMessage({ type: 'error', error: err?.message || String(err), requestId });
}
return;
}
if (type === 'embed') {
if (!pipe) {
self.postMessage({ type: 'error', error: '模型未加载', requestId });
return;
}
try {
const results = [];
for (let i = 0; i < texts.length; i++) {
const output = await pipe(texts[i], { pooling: 'mean', normalize: true });
results.push(Array.from(output.data));
self.postMessage({ type: 'embed_progress', current: i + 1, total: texts.length, requestId });
}
self.postMessage({ type: 'result', vectors: results, requestId });
} catch (err) {
self.postMessage({ type: 'error', error: err?.message || String(err), requestId });
}
return;
}
if (type === 'check') {
self.postMessage({
type: 'status',
loaded: !!pipe,
modelId: currentModelId,
requestId
});
}
};

View File

@@ -0,0 +1,63 @@
// ═══════════════════════════════════════════════════════════════════════════
// Text Filter - 通用文本过滤
// 跳过用户定义的「起始→结束」区间
// ═══════════════════════════════════════════════════════════════════════════
import { getTextFilterRules } from '../../data/config.js';
/**
* 转义正则特殊字符
*/
function escapeRegex(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* 应用过滤规则
* - start + end删除 start...end含边界
* - start 空 + end从开头删到 end
* - start + end 空:从 start 删到结尾
* - 两者都空:跳过
*/
export function applyTextFilterRules(text, rules) {
if (!text || !rules?.length) return text;
let result = text;
for (const rule of rules) {
const start = rule.start ?? '';
const end = rule.end ?? '';
if (!start && !end) continue;
if (start && end) {
// 标准区间:删除 start...end含边界非贪婪
const regex = new RegExp(
escapeRegex(start) + '[\\s\\S]*?' + escapeRegex(end),
'gi'
);
result = result.replace(regex, '');
} else if (start && !end) {
// 从 start 到结尾
const idx = result.toLowerCase().indexOf(start.toLowerCase());
if (idx !== -1) {
result = result.slice(0, idx);
}
} else if (!start && end) {
// 从开头到 end
const idx = result.toLowerCase().indexOf(end.toLowerCase());
if (idx !== -1) {
result = result.slice(idx + end.length);
}
}
}
return result.trim();
}
/**
* 便捷方法:使用当前配置过滤文本
*/
export function filterText(text) {
return applyTextFilterRules(text, getTextFilterRules());
}

View File

@@ -0,0 +1,749 @@
// ═══════════════════════════════════════════════════════════════════════════
// tokenizer.js - 统一分词器
//
// 职责:
// 1. 管理结巴 WASM 生命周期(预加载 / 就绪检测 / 降级)
// 2. 实体词典注入(分词前最长匹配保护)
// 3. 亚洲文字CJK + 假名)走结巴,拉丁文字走空格分割
// 4. 提供 tokenize(text): string[] 统一接口
//
// 加载时机:
// - 插件初始化时 storySummary.enabled && vectorConfig.enabled → preload()
// - 向量开关从 off→on 时 → preload()
// - CHAT_CHANGED 时 → injectEntities() + warmup 索引(不负责加载 WASM
//
// 降级策略:
// - WASM 未就绪时 → 实体保护 + 标点分割(不用 bigram
// ═══════════════════════════════════════════════════════════════════════════
import { extensionFolderPath } from '../../../../core/constants.js';
import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'tokenizer';
// ═══════════════════════════════════════════════════════════════════════════
// WASM 状态机
// ═══════════════════════════════════════════════════════════════════════════
/**
* @enum {string}
*/
const WasmState = {
IDLE: 'IDLE',
LOADING: 'LOADING',
READY: 'READY',
FAILED: 'FAILED',
};
let wasmState = WasmState.IDLE;
/** @type {Promise<void>|null} 当前加载 Promise防重入 */
let loadingPromise = null;
/** @type {typeof import('../../../../libs/jieba-wasm/jieba_rs_wasm.js')|null} */
let jiebaModule = null;
/** @type {Function|null} jieba cut 函数引用 */
let jiebaCut = null;
/** @type {Function|null} jieba add_word 函数引用 */
let jiebaAddWord = null;
/** @type {object|null} TinySegmenter 实例 */
let tinySegmenter = null;
// ═══════════════════════════════════════════════════════════════════════════
// 实体词典
// ═══════════════════════════════════════════════════════════════════════════
/** @type {string[]} 按长度降序排列的实体列表(用于最长匹配) */
let entityList = [];
/** @type {Set<string>} 已注入结巴的实体(避免重复 add_word */
let injectedEntities = new Set();
// ═══════════════════════════════════════════════════════════════════════════
// 停用词
// ═══════════════════════════════════════════════════════════════════════════
const STOP_WORDS = new Set([
// 中文高频虚词
'的', '了', '在', '是', '我', '有', '和', '就', '不', '人',
'都', '一', '一个', '上', '也', '很', '到', '说', '要', '去',
'你', '会', '着', '没有', '看', '好', '自己', '这', '他', '她',
'它', '吗', '什么', '那', '里', '来', '吧', '呢', '啊', '哦',
'嗯', '呀', '哈', '嘿', '喂', '哎', '唉', '哇', '呃', '嘛',
'把', '被', '让', '给', '从', '向', '对', '跟', '比', '但',
'而', '或', '如果', '因为', '所以', '虽然', '但是', '然后',
'可以', '这样', '那样', '怎么', '为什么', '什么样', '哪里',
'时候', '现在', '已经', '还是', '只是', '可能', '应该', '知道',
'觉得', '开始', '一下', '一些', '这个', '那个', '他们', '我们',
'你们', '自己', '起来', '出来', '进去', '回来', '过来', '下去',
// 日语常见虚词≥2字匹配 TinySegmenter 产出粒度)
'です', 'ます', 'した', 'して', 'する', 'ない', 'いる', 'ある',
'なる', 'れる', 'られ', 'られる',
'この', 'その', 'あの', 'どの', 'ここ', 'そこ', 'あそこ',
'これ', 'それ', 'あれ', 'どれ',
'ても', 'から', 'まで', 'ので', 'のに', 'けど', 'だけ',
'もう', 'まだ', 'とても', 'ちょっと', 'やっぱり',
// 英文常见停用词
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
'would', 'could', 'should', 'may', 'might', 'can', 'shall',
'and', 'but', 'or', 'not', 'no', 'nor', 'so', 'yet',
'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from',
'it', 'its', 'he', 'she', 'his', 'her', 'they', 'them',
'this', 'that', 'these', 'those', 'i', 'me', 'my', 'you', 'your',
'we', 'our', 'if', 'then', 'than', 'when', 'what', 'which',
'who', 'how', 'where', 'there', 'here', 'all', 'each', 'every',
'both', 'few', 'more', 'most', 'other', 'some', 'such',
'only', 'own', 'same', 'just', 'very', 'also', 'about',
]);
// ═══════════════════════════════════════════════════════════════════════════
// Unicode 分类
// ═══════════════════════════════════════════════════════════════════════════
/**
* 判断字符是否为假名(平假名 + 片假名)
* @param {number} code - charCode
* @returns {boolean}
*/
function isKana(code) {
return (
(code >= 0x3040 && code <= 0x309F) || // Hiragana
(code >= 0x30A0 && code <= 0x30FF) || // Katakana
(code >= 0x31F0 && code <= 0x31FF) || // Katakana Extensions
(code >= 0xFF65 && code <= 0xFF9F) // Halfwidth Katakana
);
}
/**
* 判断字符是否为 CJK 汉字(不含假名)
* @param {number} code - charCode
* @returns {boolean}
*/
function isCJK(code) {
return (
(code >= 0x4E00 && code <= 0x9FFF) ||
(code >= 0x3400 && code <= 0x4DBF) ||
(code >= 0xF900 && code <= 0xFAFF) ||
(code >= 0x20000 && code <= 0x2A6DF)
);
}
/**
* 判断字符是否为亚洲文字CJK + 假名)
* @param {number} code - charCode
* @returns {boolean}
*/
function isAsian(code) {
return (
isCJK(code) || isKana(code)
);
}
/**
* 判断字符是否为拉丁字母或数字
* @param {number} code - charCode
* @returns {boolean}
*/
function isLatin(code) {
return (
(code >= 0x41 && code <= 0x5A) || // A-Z
(code >= 0x61 && code <= 0x7A) || // a-z
(code >= 0x30 && code <= 0x39) || // 0-9
(code >= 0xC0 && code <= 0x024F) // Latin Extended (àáâ 等)
);
}
// ═══════════════════════════════════════════════════════════════════════════
// 文本分段(亚洲 vs 拉丁 vs 其他)
// ═══════════════════════════════════════════════════════════════════════════
/**
* @typedef {'asian'|'latin'|'other'} SegmentType
*/
/**
* @typedef {object} TextSegment
* @property {SegmentType} type - 段类型
* @property {string} text - 段文本
*/
/**
* 将文本按 Unicode 脚本分段
* 连续的同类字符归为一段
*
* @param {string} text
* @returns {TextSegment[]}
*/
function segmentByScript(text) {
if (!text) return [];
const segments = [];
let currentType = null;
let currentStart = 0;
for (let i = 0; i < text.length; i++) {
const code = text.charCodeAt(i);
let type;
if (isAsian(code)) {
type = 'asian';
} else if (isLatin(code)) {
type = 'latin';
} else {
type = 'other';
}
if (type !== currentType) {
if (currentType !== null && currentStart < i) {
const seg = text.slice(currentStart, i);
if (currentType !== 'other' || seg.trim()) {
segments.push({ type: currentType, text: seg });
}
}
currentType = type;
currentStart = i;
}
}
// 最后一段
if (currentStart < text.length) {
const seg = text.slice(currentStart);
if (currentType !== 'other' || seg.trim()) {
segments.push({ type: currentType, text: seg });
}
}
return segments;
}
// ═══════════════════════════════════════════════════════════════════════════
// 亚洲文字语言检测(中文 vs 日语)
// ═══════════════════════════════════════════════════════════════════════════
/**
* 检测亚洲文字段的语言
*
* 假名占比 > 30% 判定为日语(日语文本中假名通常占 40-60%
*
* @param {string} text - 亚洲文字段
* @returns {'zh'|'ja'|'other'}
*/
function detectAsianLanguage(text) {
let kanaCount = 0;
let cjkCount = 0;
for (const ch of text) {
const code = ch.codePointAt(0);
if (isKana(code)) kanaCount++;
else if (isCJK(code)) cjkCount++;
}
const total = kanaCount + cjkCount;
if (total === 0) return 'other';
return (kanaCount / total) > 0.3 ? 'ja' : 'zh';
}
// ═══════════════════════════════════════════════════════════════════════════
// 实体保护(最长匹配占位符替换)
// ═══════════════════════════════════════════════════════════════════════════
// 使用纯 PUA 字符序列作为占位符,避免拉丁字母泄漏到分词结果
const PLACEHOLDER_PREFIX = '\uE000\uE010';
const PLACEHOLDER_SUFFIX = '\uE001';
/**
* 在文本中执行实体最长匹配,替换为占位符
*
* @param {string} text - 原始文本
* @returns {{masked: string, entities: Map<string, string>}} masked 文本 + 占位符→原文映射
*/
function maskEntities(text) {
const entities = new Map();
if (!entityList.length || !text) {
return { masked: text, entities };
}
let masked = text;
let idx = 0;
// entityList 已按长度降序排列,保证最长匹配优先
for (const entity of entityList) {
// 大小写不敏感搜索
const lowerMasked = masked.toLowerCase();
const lowerEntity = entity.toLowerCase();
let searchFrom = 0;
while (true) {
const pos = lowerMasked.indexOf(lowerEntity, searchFrom);
if (pos === -1) break;
// 已被占位符覆盖则跳过(检查前后是否存在 PUA 边界字符)
const aroundStart = Math.max(0, pos - 4);
const aroundEnd = Math.min(masked.length, pos + entity.length + 4);
const around = masked.slice(aroundStart, aroundEnd);
if (around.includes('\uE000') || around.includes('\uE001')) {
searchFrom = pos + 1;
continue;
}
const placeholder = `${PLACEHOLDER_PREFIX}${idx}${PLACEHOLDER_SUFFIX}`;
const originalText = masked.slice(pos, pos + entity.length);
entities.set(placeholder, originalText);
masked = masked.slice(0, pos) + placeholder + masked.slice(pos + entity.length);
idx++;
// 更新搜索位置(跳过占位符)
searchFrom = pos + placeholder.length;
}
}
return { masked, entities };
}
/**
* 将 token 数组中的占位符还原为原始实体
*
* @param {string[]} tokens
* @param {Map<string, string>} entities - 占位符→原文映射
* @returns {string[]}
*/
function unmaskTokens(tokens, entities) {
if (!entities.size) return tokens;
return tokens.flatMap(token => {
// token 本身就是一个完整占位符
if (entities.has(token)) {
return [entities.get(token)];
}
// token 中包含 PUA 字符 → 检查是否包含完整占位符
if (/[\uE000-\uE0FF]/.test(token)) {
for (const [placeholder, original] of entities) {
if (token.includes(placeholder)) {
return [original];
}
}
// 纯 PUA 碎片,丢弃
return [];
}
// 普通 token原样保留
return [token];
});
}
// ═══════════════════════════════════════════════════════════════════════════
// 分词:亚洲文字(结巴 / 降级)
// ═══════════════════════════════════════════════════════════════════════════
/**
* 用结巴分词处理亚洲文字段
* @param {string} text
* @returns {string[]}
*/
function tokenizeAsianJieba(text) {
if (!text || !jiebaCut) return [];
try {
const words = jiebaCut(text, true); // hmm=true
return Array.from(words)
.map(w => String(w || '').trim())
.filter(w => w.length >= 2);
} catch (e) {
xbLog.warn(MODULE_ID, '结巴分词异常,降级处理', e);
return tokenizeAsianFallback(text);
}
}
/**
* 降级分词:标点/空格分割 + 保留 2-6 字 CJK 片段
* 不使用 bigram避免索引膨胀
*
* @param {string} text
* @returns {string[]}
*/
function tokenizeAsianFallback(text) {
if (!text) return [];
const tokens = [];
// 按标点和空格分割
const parts = text.split(/[\s""''()【】《》…—\-,.!?;:'"()[\]{}<>/\\|@#$%^&*+=~`]+/);
for (const part of parts) {
const trimmed = part.trim();
if (!trimmed) continue;
if (trimmed.length >= 2 && trimmed.length <= 6) {
tokens.push(trimmed);
} else if (trimmed.length > 6) {
// 长片段按 4 字滑窗切分(比 bigram 稀疏得多)
for (let i = 0; i <= trimmed.length - 4; i += 2) {
tokens.push(trimmed.slice(i, i + 4));
}
// 保留完整片段的前 6 字
tokens.push(trimmed.slice(0, 6));
}
}
return tokens;
}
/**
* 用 TinySegmenter 处理日语文字段
* @param {string} text
* @returns {string[]}
*/
function tokenizeJapanese(text) {
if (tinySegmenter) {
try {
const words = tinySegmenter.segment(text);
return words
.map(w => String(w || '').trim())
.filter(w => w.length >= 2);
} catch (e) {
xbLog.warn(MODULE_ID, 'TinySegmenter 分词异常,降级处理', e);
return tokenizeAsianFallback(text);
}
}
return tokenizeAsianFallback(text);
}
// ═══════════════════════════════════════════════════════════════════════════
// 分词:拉丁文字
// ═══════════════════════════════════════════════════════════════════════════
/**
* 拉丁文字分词:空格/标点分割
* @param {string} text
* @returns {string[]}
*/
function tokenizeLatin(text) {
if (!text) return [];
return text
.split(/[\s\-_.,;:!?'"()[\]{}<>/\\|@#$%^&*+=~`]+/)
.map(w => w.trim().toLowerCase())
.filter(w => w.length >= 3);
}
// ═══════════════════════════════════════════════════════════════════════════
// 公开接口preload
// ═══════════════════════════════════════════════════════════════════════════
/**
* 预加载结巴 WASM
*
* 可多次调用,内部防重入。
* FAILED 状态下再次调用会重试。
*
* @returns {Promise<boolean>} 是否加载成功
*/
export async function preload() {
// TinySegmenter 独立于结巴状态(内部有防重入)
loadTinySegmenter();
// 已就绪
if (wasmState === WasmState.READY) return true;
// 正在加载,等待结果
if (wasmState === WasmState.LOADING && loadingPromise) {
try {
await loadingPromise;
return wasmState === WasmState.READY;
} catch {
return false;
}
}
// IDLE 或 FAILED → 开始加载
wasmState = WasmState.LOADING;
const T0 = performance.now();
loadingPromise = (async () => {
try {
// ★ 使用绝对路径(开头加 /
const wasmPath = `/${extensionFolderPath}/libs/jieba-wasm/jieba_rs_wasm_bg.wasm`;
// eslint-disable-next-line no-unsanitized/method
jiebaModule = await import(
`/${extensionFolderPath}/libs/jieba-wasm/jieba_rs_wasm.js`
);
// 初始化 WASM新版 API 用对象形式)
if (typeof jiebaModule.default === 'function') {
await jiebaModule.default({ module_or_path: wasmPath });
}
// 缓存函数引用
jiebaCut = jiebaModule.cut;
jiebaAddWord = jiebaModule.add_word;
if (typeof jiebaCut !== 'function') {
throw new Error('jieba cut 函数不存在');
}
wasmState = WasmState.READY;
const elapsed = Math.round(performance.now() - T0);
xbLog.info(MODULE_ID, `结巴 WASM 加载完成 (${elapsed}ms)`);
// 如果有待注入的实体,补做
if (entityList.length > 0 && jiebaAddWord) {
reInjectAllEntities();
}
return true;
} catch (e) {
wasmState = WasmState.FAILED;
xbLog.error(MODULE_ID, '结巴 WASM 加载失败', e);
throw e;
}
})();
try {
await loadingPromise;
return true;
} catch {
return false;
} finally {
loadingPromise = null;
}
}
/**
* 加载 TinySegmenter懒加载不阻塞
*/
async function loadTinySegmenter() {
if (tinySegmenter) return;
try {
// eslint-disable-next-line no-unsanitized/method
const mod = await import(
`/${extensionFolderPath}/libs/tiny-segmenter.js`
);
const Ctor = mod.TinySegmenter || mod.default;
tinySegmenter = new Ctor();
xbLog.info(MODULE_ID, 'TinySegmenter 加载完成');
} catch (e) {
xbLog.warn(MODULE_ID, 'TinySegmenter 加载失败,日语将使用降级分词', e);
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 公开接口isReady
// ═══════════════════════════════════════════════════════════════════════════
/**
* 检查结巴是否已就绪
* @returns {boolean}
*/
export function isReady() {
return wasmState === WasmState.READY;
}
/**
* 获取当前 WASM 状态
* @returns {string}
*/
export function getState() {
return wasmState;
}
// ═══════════════════════════════════════════════════════════════════════════
// 公开接口injectEntities
// ═══════════════════════════════════════════════════════════════════════════
/**
* 注入实体词典
*
* 更新内部实体列表(用于最长匹配保护)
* 如果结巴已就绪,同时调用 add_word 注入
*
* @param {Set<string>} lexicon - 标准化后的实体集合
* @param {Map<string, string>} [displayMap] - normalize→原词形映射
*/
export function injectEntities(lexicon, displayMap) {
if (!lexicon?.size) {
entityList = [];
return;
}
// 构建实体列表使用原词形displayMap按长度降序排列
const entities = [];
for (const normalized of lexicon) {
const display = displayMap?.get(normalized) || normalized;
if (display.length >= 2) {
entities.push(display);
}
}
// 按长度降序(最长匹配优先)
entities.sort((a, b) => b.length - a.length);
entityList = entities;
// 如果结巴已就绪,注入自定义词
if (wasmState === WasmState.READY && jiebaAddWord) {
injectNewEntitiesToJieba(entities);
}
xbLog.info(MODULE_ID, `实体词典更新: ${entities.length} 个实体`);
}
/**
* 将新实体注入结巴(增量,跳过已注入的)
* @param {string[]} entities
*/
function injectNewEntitiesToJieba(entities) {
let count = 0;
for (const entity of entities) {
if (!injectedEntities.has(entity)) {
try {
// freq 设高保证不被切碎
jiebaAddWord(entity, 99999);
injectedEntities.add(entity);
count++;
} catch (e) {
xbLog.warn(MODULE_ID, `add_word 失败: ${entity}`, e);
}
}
}
if (count > 0) {
xbLog.info(MODULE_ID, `注入 ${count} 个新实体到结巴`);
}
}
/**
* 重新注入所有实体WASM 刚加载完时调用)
*/
function reInjectAllEntities() {
injectedEntities.clear();
injectNewEntitiesToJieba(entityList);
}
// ═══════════════════════════════════════════════════════════════════════════
// 公开接口tokenize
// ═══════════════════════════════════════════════════════════════════════════
/**
* 统一分词接口
*
* 流程:
* 1. 实体最长匹配 → 占位符保护
* 2. 按 Unicode 脚本分段(亚洲 vs 拉丁)
* 3. 亚洲段 → 结巴 cut()(或降级)
* 4. 拉丁段 → 空格/标点分割
* 5. 还原占位符
* 6. 过滤停用词 + 去重
*
* @param {string} text - 输入文本
* @returns {string[]} token 数组
*/
export function tokenize(text) {
const restored = tokenizeCore(text);
// 5. 过滤停用词 + 去重 + 清理
const seen = new Set();
const result = [];
for (const token of restored) {
const cleaned = token.trim().toLowerCase();
if (!cleaned) continue;
if (cleaned.length < 2) continue;
if (STOP_WORDS.has(cleaned)) continue;
if (seen.has(cleaned)) continue;
// 过滤纯标点/特殊字符
if (/^[\s\x00-\x1F\p{P}\p{S}]+$/u.test(cleaned)) continue;
seen.add(cleaned);
result.push(token.trim()); // 保留原始大小写
}
return result;
}
/**
* 内核分词流程(不去重、不 lower、仅完成实体保护→分段→分词→还原
* @param {string} text
* @returns {string[]}
*/
function tokenizeCore(text) {
if (!text) return [];
const input = String(text).trim();
if (!input) return [];
// 1. 实体保护
const { masked, entities } = maskEntities(input);
// 2. 分段
const segments = segmentByScript(masked);
// 3. 分段分词
const rawTokens = [];
for (const seg of segments) {
if (seg.type === 'asian') {
const lang = detectAsianLanguage(seg.text);
if (lang === 'ja') {
rawTokens.push(...tokenizeJapanese(seg.text));
} else if (wasmState === WasmState.READY && jiebaCut) {
rawTokens.push(...tokenizeAsianJieba(seg.text));
} else {
rawTokens.push(...tokenizeAsianFallback(seg.text));
}
} else if (seg.type === 'latin') {
rawTokens.push(...tokenizeLatin(seg.text));
}
}
// 4. 还原占位符
return unmaskTokens(rawTokens, entities);
}
// ═══════════════════════════════════════════════════════════════════════════
// 公开接口tokenizeForIndex
// ═══════════════════════════════════════════════════════════════════════════
/**
* MiniSearch 索引专用分词
*
* 与 tokenize() 的区别:
* - 全部转小写MiniSearch 内部需要一致性)
* - 不去重MiniSearch 自己处理词频)
*
* @param {string} text
* @returns {string[]}
*/
export function tokenizeForIndex(text) {
const restored = tokenizeCore(text);
return restored
.map(t => t.trim().toLowerCase())
.filter(t => {
if (!t || t.length < 2) return false;
if (STOP_WORDS.has(t)) return false;
if (/^[\s\x00-\x1F\p{P}\p{S}]+$/u.test(t)) return false;
return true;
});
}
// ═══════════════════════════════════════════════════════════════════════════
// 公开接口reset
// ═══════════════════════════════════════════════════════════════════════════
/**
* 重置分词器状态
* 用于测试或模块卸载
*/
export function reset() {
entityList = [];
injectedEntities.clear();
// 不重置 WASM 状态(避免重复加载)
}

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@
*/
const V3_URL = 'https://openspeech.bytedance.com/api/v3/tts/unidirectional';
const FREE_V1_URL = 'https://hstts.velure.top';
const FREE_V1_URL = 'https://hstts.velure.codes';
export const FREE_VOICES = [
{ key: 'female_1', name: '桃夭', tag: '甜蜜仙子', gender: 'female' },

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,746 @@
import { getContext } from '../../../../../../extensions.js';
import { getLocalVariable, setLocalVariable } from '../../../../../../variables.js';
import { extractStateBlocks, computeStateSignature, parseStateBlock } from './parser.js';
import { generateSemantic } from './semantic.js';
import { validate, setRule, loadRulesFromMeta, saveRulesToMeta } from './guard.js';
/**
* =========================
* Path / JSON helpers
* =========================
*/
function splitPath(path) {
const s = String(path || '');
const segs = [];
let buf = '';
let i = 0;
while (i < s.length) {
const ch = s[i];
if (ch === '.') {
if (buf) { segs.push(/^\d+$/.test(buf) ? Number(buf) : buf); buf = ''; }
i++;
} else if (ch === '[') {
if (buf) { segs.push(/^\d+$/.test(buf) ? Number(buf) : buf); buf = ''; }
i++;
let val = '';
if (s[i] === '"' || s[i] === "'") {
const q = s[i++];
while (i < s.length && s[i] !== q) val += s[i++];
i++;
} else {
while (i < s.length && s[i] !== ']') val += s[i++];
}
if (s[i] === ']') i++;
segs.push(/^\d+$/.test(val.trim()) ? Number(val.trim()) : val.trim());
} else {
buf += ch;
i++;
}
}
if (buf) segs.push(/^\d+$/.test(buf) ? Number(buf) : buf);
return segs;
}
function normalizePath(path) {
return splitPath(path).map(String).join('.');
}
function safeJSON(v) {
try { return JSON.stringify(v); } catch { return ''; }
}
function safeParse(s) {
if (s == null || s === '') return undefined;
if (typeof s !== 'string') return s;
const t = s.trim();
if (!t) return undefined;
if (t[0] === '{' || t[0] === '[') {
try { return JSON.parse(t); } catch { return s; }
}
if (/^-?\d+(?:\.\d+)?$/.test(t)) return Number(t);
if (t === 'true') return true;
if (t === 'false') return false;
return s;
}
function deepClone(obj) {
try { return structuredClone(obj); } catch {
try { return JSON.parse(JSON.stringify(obj)); } catch { return obj; }
}
}
/**
* =========================
* Variable getters/setters (local vars)
* =========================
*/
function getVar(path) {
const segs = splitPath(path);
if (!segs.length) return undefined;
const rootRaw = getLocalVariable(String(segs[0]));
if (segs.length === 1) return safeParse(rootRaw);
let obj = safeParse(rootRaw);
if (!obj || typeof obj !== 'object') return undefined;
for (let i = 1; i < segs.length; i++) {
obj = obj?.[segs[i]];
if (obj === undefined) return undefined;
}
return obj;
}
function setVar(path, value) {
const segs = splitPath(path);
if (!segs.length) return;
const rootName = String(segs[0]);
if (segs.length === 1) {
const toStore = (value && typeof value === 'object') ? safeJSON(value) : String(value ?? '');
setLocalVariable(rootName, toStore);
return;
}
let root = safeParse(getLocalVariable(rootName));
if (!root || typeof root !== 'object') {
root = typeof segs[1] === 'number' ? [] : {};
}
let cur = root;
for (let i = 1; i < segs.length - 1; i++) {
const key = segs[i];
const nextKey = segs[i + 1];
if (cur[key] == null || typeof cur[key] !== 'object') {
cur[key] = typeof nextKey === 'number' ? [] : {};
}
cur = cur[key];
}
cur[segs[segs.length - 1]] = value;
setLocalVariable(rootName, safeJSON(root));
}
function delVar(path) {
const segs = splitPath(path);
if (!segs.length) return;
const rootName = String(segs[0]);
if (segs.length === 1) {
setLocalVariable(rootName, '');
return;
}
let root = safeParse(getLocalVariable(rootName));
if (!root || typeof root !== 'object') return;
let cur = root;
for (let i = 1; i < segs.length - 1; i++) {
cur = cur?.[segs[i]];
if (!cur || typeof cur !== 'object') return;
}
const lastKey = segs[segs.length - 1];
if (Array.isArray(cur) && typeof lastKey === 'number') {
cur.splice(lastKey, 1);
} else {
delete cur[lastKey];
}
setLocalVariable(rootName, safeJSON(root));
}
function pushVar(path, value) {
const segs = splitPath(path);
if (!segs.length) return { ok: false, reason: 'invalid-path' };
const rootName = String(segs[0]);
if (segs.length === 1) {
let arr = safeParse(getLocalVariable(rootName));
// ✅ 类型检查:必须是数组或不存在
if (arr !== undefined && !Array.isArray(arr)) {
return { ok: false, reason: 'not-array' };
}
if (!Array.isArray(arr)) arr = [];
const items = Array.isArray(value) ? value : [value];
arr.push(...items);
setLocalVariable(rootName, safeJSON(arr));
return { ok: true };
}
let root = safeParse(getLocalVariable(rootName));
if (!root || typeof root !== 'object') {
root = typeof segs[1] === 'number' ? [] : {};
}
let cur = root;
for (let i = 1; i < segs.length - 1; i++) {
const key = segs[i];
const nextKey = segs[i + 1];
if (cur[key] == null || typeof cur[key] !== 'object') {
cur[key] = typeof nextKey === 'number' ? [] : {};
}
cur = cur[key];
}
const lastKey = segs[segs.length - 1];
let arr = cur[lastKey];
// ✅ 类型检查:必须是数组或不存在
if (arr !== undefined && !Array.isArray(arr)) {
return { ok: false, reason: 'not-array' };
}
if (!Array.isArray(arr)) arr = [];
const items = Array.isArray(value) ? value : [value];
arr.push(...items);
cur[lastKey] = arr;
setLocalVariable(rootName, safeJSON(root));
return { ok: true };
}
function popVar(path, value) {
const segs = splitPath(path);
if (!segs.length) return { ok: false, reason: 'invalid-path' };
const rootName = String(segs[0]);
let root = safeParse(getLocalVariable(rootName));
if (segs.length === 1) {
if (!Array.isArray(root)) {
return { ok: false, reason: 'not-array' };
}
const toRemove = Array.isArray(value) ? value : [value];
for (const v of toRemove) {
const vStr = safeJSON(v);
const idx = root.findIndex(x => safeJSON(x) === vStr);
if (idx !== -1) root.splice(idx, 1);
}
setLocalVariable(rootName, safeJSON(root));
return { ok: true };
}
if (!root || typeof root !== 'object') {
return { ok: false, reason: 'not-array' };
}
let cur = root;
for (let i = 1; i < segs.length - 1; i++) {
cur = cur?.[segs[i]];
if (!cur || typeof cur !== 'object') {
return { ok: false, reason: 'path-not-found' };
}
}
const lastKey = segs[segs.length - 1];
let arr = cur[lastKey];
if (!Array.isArray(arr)) {
return { ok: false, reason: 'not-array' };
}
const toRemove = Array.isArray(value) ? value : [value];
for (const v of toRemove) {
const vStr = safeJSON(v);
const idx = arr.findIndex(x => safeJSON(x) === vStr);
if (idx !== -1) arr.splice(idx, 1);
}
setLocalVariable(rootName, safeJSON(root));
return { ok: true };
}
/**
* =========================
* Storage (chat_metadata.extensions.LittleWhiteBox)
* =========================
*/
const EXT_ID = 'LittleWhiteBox';
const ERR_VAR_NAME = 'LWB_STATE_ERRORS';
const LOG_KEY = 'stateLogV2';
const CKPT_KEY = 'stateCkptV2';
/**
* 写入状态错误到本地变量(覆盖写入)
*/
function writeStateErrorsToLocalVar(lines) {
try {
const text = Array.isArray(lines) && lines.length
? lines.map(s => `- ${String(s)}`).join('\n')
: '';
setLocalVariable(ERR_VAR_NAME, text);
} catch {}
}
function getLwbExtMeta() {
const ctx = getContext();
const meta = ctx?.chatMetadata || (ctx.chatMetadata = {});
meta.extensions ||= {};
meta.extensions[EXT_ID] ||= {};
return meta.extensions[EXT_ID];
}
function getStateLog() {
const ext = getLwbExtMeta();
ext[LOG_KEY] ||= { version: 1, floors: {} };
return ext[LOG_KEY];
}
function getCheckpointStore() {
const ext = getLwbExtMeta();
ext[CKPT_KEY] ||= { version: 1, every: 50, points: {} };
return ext[CKPT_KEY];
}
function saveWalRecord(floor, signature, rules, ops) {
const log = getStateLog();
log.floors[String(floor)] = {
signature: String(signature || ''),
rules: Array.isArray(rules) ? deepClone(rules) : [],
ops: Array.isArray(ops) ? deepClone(ops) : [],
ts: Date.now(),
};
getContext()?.saveMetadataDebounced?.();
}
/**
* checkpoint = 执行完 floor 后的全量变量+规则
*/
function saveCheckpointIfNeeded(floor) {
const ckpt = getCheckpointStore();
const every = Number(ckpt.every) || 50;
// floor=0 也可以存,但一般没意义;你可按需调整
if (floor < 0) return;
if (every <= 0) return;
if (floor % every !== 0) return;
const ctx = getContext();
const meta = ctx?.chatMetadata || {};
const vars = deepClone(meta.variables || {});
// 2.0 rules 存在 chatMetadata 里guard.js 写入的位置)
const rules = deepClone(meta.LWB_RULES_V2 || {});
ckpt.points[String(floor)] = { vars, rules, ts: Date.now() };
ctx?.saveMetadataDebounced?.();
}
/**
* =========================
* Applied signature map (idempotent)
* =========================
*/
const LWB_STATE_APPLIED_KEY = 'LWB_STATE_APPLIED_KEY';
function getAppliedMap() {
const meta = getContext()?.chatMetadata || {};
meta[LWB_STATE_APPLIED_KEY] ||= {};
return meta[LWB_STATE_APPLIED_KEY];
}
export function clearStateAppliedFor(floor) {
try {
delete getAppliedMap()[floor];
getContext()?.saveMetadataDebounced?.();
} catch {}
}
export function clearStateAppliedFrom(floorInclusive) {
try {
const map = getAppliedMap();
for (const k of Object.keys(map)) {
if (Number(k) >= floorInclusive) delete map[k];
}
getContext()?.saveMetadataDebounced?.();
} catch {}
}
function isIndexDeleteOp(opItem) {
if (!opItem || opItem.op !== 'del') return false;
const segs = splitPath(opItem.path);
if (!segs.length) return false;
const last = segs[segs.length - 1];
return typeof last === 'number' && Number.isFinite(last);
}
function buildExecOpsWithIndexDeleteReorder(ops) {
// 同一个数组的 index-del按 parentPath 分组,组内 index 倒序
// 其它操作:保持原顺序
const groups = new Map(); // parentPath -> { order, items: [{...opItem, index}] }
const groupOrder = new Map();
let orderCounter = 0;
const normalOps = [];
for (const op of ops) {
if (isIndexDeleteOp(op)) {
const segs = splitPath(op.path);
const idx = segs[segs.length - 1];
const parentPath = segs.slice(0, -1).reduce((acc, s) => {
if (typeof s === 'number') return acc + `[${s}]`;
return acc ? `${acc}.${s}` : String(s);
}, '');
if (!groups.has(parentPath)) {
groups.set(parentPath, []);
groupOrder.set(parentPath, orderCounter++);
}
groups.get(parentPath).push({ op, idx });
} else {
normalOps.push(op);
}
}
// 按“该数组第一次出现的顺序”输出各组(可预测)
const orderedParents = Array.from(groups.keys()).sort((a, b) => (groupOrder.get(a) ?? 0) - (groupOrder.get(b) ?? 0));
const reorderedIndexDeletes = [];
for (const parent of orderedParents) {
const items = groups.get(parent) || [];
// 关键:倒序
items.sort((a, b) => b.idx - a.idx);
for (const it of items) reorderedIndexDeletes.push(it.op);
}
// ✅ 我们把“索引删除”放在最前面执行:这样它们永远按“原索引”删
// (避免在同一轮里先删后 push 导致索引变化)
return [...reorderedIndexDeletes, ...normalOps];
}
/**
* =========================
* Core: apply one message text (<state>...) => update vars + rules + wal + checkpoint
* =========================
*/
export function applyStateForMessage(messageId, messageContent) {
const ctx = getContext();
const chatId = ctx?.chatId || '';
loadRulesFromMeta();
const text = String(messageContent ?? '');
const signature = computeStateSignature(text);
const blocks = extractStateBlocks(text);
// ✅ 统一:只要没有可执行 blocks就视为本层 state 被移除
if (!signature || blocks.length === 0) {
clearStateAppliedFor(messageId);
writeStateErrorsToLocalVar([]);
// delete WAL record
try {
const ext = getLwbExtMeta();
const log = ext[LOG_KEY];
if (log?.floors) delete log.floors[String(messageId)];
getContext()?.saveMetadataDebounced?.();
} catch {}
return { atoms: [], errors: [], skipped: false };
}
const appliedMap = getAppliedMap();
if (appliedMap[messageId] === signature) {
return { atoms: [], errors: [], skipped: true };
}
const atoms = [];
const errors = [];
let idx = 0;
const mergedRules = [];
const mergedOps = [];
for (const block of blocks) {
const parsed = parseStateBlock(block);
mergedRules.push(...(parsed?.rules || []));
mergedOps.push(...(parsed?.ops || []));
}
if (blocks.length) {
// ✅ WAL一次写入完整的 rules/ops
saveWalRecord(messageId, signature, mergedRules, mergedOps);
// ✅ rules 一次性注册
let rulesTouched = false;
for (const { path, rule } of mergedRules) {
if (path && rule && Object.keys(rule).length) {
setRule(normalizePath(path), rule);
rulesTouched = true;
}
}
if (rulesTouched) saveRulesToMeta();
const execOps = buildExecOpsWithIndexDeleteReorder(mergedOps);
// 执行操作(用 execOps
for (const opItem of execOps) {
const { path, op, value, delta, warning } = opItem;
if (!path) continue;
if (warning) errors.push(`[${path}] ${warning}`);
const absPath = normalizePath(path);
const oldValue = getVar(path);
const guard = validate(op, absPath, op === 'inc' ? delta : value, oldValue);
if (!guard.allow) {
errors.push(`${path}: ${guard.reason || '\u88ab\u89c4\u5219\u62d2\u7edd'}`);
continue;
}
// 记录修正信息
if (guard.note) {
if (op === 'inc') {
const raw = Number(delta);
const rawTxt = Number.isFinite(raw) ? `${raw >= 0 ? '+' : ''}${raw}` : String(delta ?? '');
errors.push(`${path}: ${rawTxt} ${guard.note}`);
} else {
errors.push(`${path}: ${guard.note}`);
}
}
let execOk = true;
let execReason = '';
try {
switch (op) {
case 'set':
setVar(path, guard.value);
break;
case 'inc':
// guard.value 对 inc 是最终 nextValue
setVar(path, guard.value);
break;
case 'push': {
const result = pushVar(path, guard.value);
if (!result.ok) { execOk = false; execReason = result.reason; }
break;
}
case 'pop': {
const result = popVar(path, guard.value);
if (!result.ok) { execOk = false; execReason = result.reason; }
break;
}
case 'del':
delVar(path);
break;
default:
execOk = false;
execReason = `未知 op=${op}`;
}
} catch (e) {
execOk = false;
execReason = e?.message || String(e);
}
if (!execOk) {
errors.push(`[${path}] 失败: ${execReason}`);
continue;
}
const newValue = getVar(path);
atoms.push({
atomId: `sa-${messageId}-${idx}`,
chatId,
floor: messageId,
idx,
path,
op,
oldValue,
newValue,
delta: op === 'inc' ? delta : undefined,
semantic: generateSemantic(path, op, oldValue, newValue, delta, value),
timestamp: Date.now(),
});
idx++;
}
}
appliedMap[messageId] = signature;
getContext()?.saveMetadataDebounced?.();
// ✅ checkpoint执行完该楼后可选存一次全量
saveCheckpointIfNeeded(messageId);
// Write error list to local variable
writeStateErrorsToLocalVar(errors);
return { atoms, errors, skipped: false };
}
/**
* =========================
* Restore / Replay (for rollback & rebuild)
* =========================
*/
/**
* 恢复到 targetFloor 执行完成后的变量状态(含规则)
* - 使用最近 checkpoint然后 replay WAL
* - 不依赖消息文本 <state>(避免被正则清掉)
*/
export async function restoreStateV2ToFloor(targetFloor) {
const ctx = getContext();
const meta = ctx?.chatMetadata || {};
const floor = Number(targetFloor);
if (!Number.isFinite(floor) || floor < 0) {
// floor < 0 => 清空
meta.variables = {};
meta.LWB_RULES_V2 = {};
ctx?.saveMetadataDebounced?.();
return { ok: true, usedCheckpoint: null };
}
const log = getStateLog();
const ckpt = getCheckpointStore();
const points = ckpt.points || {};
const available = Object.keys(points)
.map(Number)
.filter(n => Number.isFinite(n) && n <= floor)
.sort((a, b) => b - a);
const ck = available.length ? available[0] : null;
// 1) 恢复 checkpoint 或清空基线
if (ck != null) {
const snap = points[String(ck)];
meta.variables = deepClone(snap?.vars || {});
meta.LWB_RULES_V2 = deepClone(snap?.rules || {});
} else {
meta.variables = {};
meta.LWB_RULES_V2 = {};
}
ctx?.saveMetadataDebounced?.();
// 2) 从 meta 载入规则到内存guard.js 的内存表)
loadRulesFromMeta();
let rulesTouchedAny = false;
// 3) replay WAL: (ck+1 .. floor)
const start = ck == null ? 0 : (ck + 1);
for (let f = start; f <= floor; f++) {
const rec = log.floors?.[String(f)];
if (!rec) continue;
// 先应用 rules
const rules = Array.isArray(rec.rules) ? rec.rules : [];
let touched = false;
for (const r of rules) {
const p = r?.path;
const rule = r?.rule;
if (p && rule && typeof rule === 'object') {
setRule(normalizePath(p), rule);
touched = true;
}
}
if (touched) rulesTouchedAny = true;
// 再应用 ops不产出 atoms、不写 wal
const ops = Array.isArray(rec.ops) ? rec.ops : [];
const execOps = buildExecOpsWithIndexDeleteReorder(ops);
for (const opItem of execOps) {
const path = opItem?.path;
const op = opItem?.op;
if (!path || !op) continue;
const absPath = normalizePath(path);
const oldValue = getVar(path);
const payload = (op === 'inc') ? opItem.delta : opItem.value;
const guard = validate(op, absPath, payload, oldValue);
if (!guard.allow) continue;
try {
switch (op) {
case 'set':
setVar(path, guard.value);
break;
case 'inc':
setVar(path, guard.value);
break;
case 'push': {
const result = pushVar(path, guard.value);
if (!result.ok) {/* ignore */}
break;
}
case 'pop': {
const result = popVar(path, guard.value);
if (!result.ok) {/* ignore */}
break;
}
case 'del':
delVar(path);
break;
}
} catch {
// ignore replay errors
}
}
}
if (rulesTouchedAny) {
saveRulesToMeta();
}
// 4) 清理 applied signaturefloor 之后都要重新计算
clearStateAppliedFrom(floor + 1);
ctx?.saveMetadataDebounced?.();
return { ok: true, usedCheckpoint: ck };
}
/**
* 删除 floor >= fromFloor 的 2.0 持久化数据:
* - WAL: stateLogV2.floors
* - checkpoint: stateCkptV2.points
* - applied signature: LWB_STATE_APPLIED_KEY
*
* 用于 MESSAGE_DELETED 等“物理删除消息”场景,避免 WAL/ckpt 无限膨胀。
*/
export async function trimStateV2FromFloor(fromFloor) {
const start = Number(fromFloor);
if (!Number.isFinite(start)) return { ok: false };
const ctx = getContext();
const meta = ctx?.chatMetadata || {};
meta.extensions ||= {};
meta.extensions[EXT_ID] ||= {};
const ext = meta.extensions[EXT_ID];
// 1) WAL
const log = ext[LOG_KEY];
if (log?.floors && typeof log.floors === 'object') {
for (const k of Object.keys(log.floors)) {
const f = Number(k);
if (Number.isFinite(f) && f >= start) {
delete log.floors[k];
}
}
}
// 2) Checkpoints
const ckpt = ext[CKPT_KEY];
if (ckpt?.points && typeof ckpt.points === 'object') {
for (const k of Object.keys(ckpt.points)) {
const f = Number(k);
if (Number.isFinite(f) && f >= start) {
delete ckpt.points[k];
}
}
}
// 3) Applied signaturesfloor>=start 都要重新算)
try {
clearStateAppliedFrom(start);
} catch {}
ctx?.saveMetadataDebounced?.();
return { ok: true };
}

View File

@@ -0,0 +1,249 @@
import { getContext } from '../../../../../../extensions.js';
const LWB_RULES_V2_KEY = 'LWB_RULES_V2';
let rulesTable = {};
export function loadRulesFromMeta() {
try {
const meta = getContext()?.chatMetadata || {};
rulesTable = meta[LWB_RULES_V2_KEY] || {};
} catch {
rulesTable = {};
}
}
export function saveRulesToMeta() {
try {
const meta = getContext()?.chatMetadata || {};
meta[LWB_RULES_V2_KEY] = { ...rulesTable };
getContext()?.saveMetadataDebounced?.();
} catch {}
}
export function getRuleNode(absPath) {
return matchRuleWithWildcard(absPath);
}
export function setRule(path, rule) {
rulesTable[path] = { ...(rulesTable[path] || {}), ...rule };
}
export function clearRule(path) {
delete rulesTable[path];
saveRulesToMeta();
}
export function clearAllRules() {
rulesTable = {};
saveRulesToMeta();
}
export function getParentPath(absPath) {
const parts = String(absPath).split('.').filter(Boolean);
if (parts.length <= 1) return '';
return parts.slice(0, -1).join('.');
}
/**
* 通配符路径匹配
* 例如data.同行者.张三.HP 可以匹配 data.同行者.*.HP
*/
function matchRuleWithWildcard(absPath) {
// 1. 精确匹配
if (rulesTable[absPath]) return rulesTable[absPath];
const segs = String(absPath).split('.').filter(Boolean);
const n = segs.length;
// 2. 尝试各种 * 替换组合(从少到多)
for (let starCount = 1; starCount <= n; starCount++) {
const patterns = generateStarPatterns(segs, starCount);
for (const pattern of patterns) {
if (rulesTable[pattern]) return rulesTable[pattern];
}
}
// 3. 尝试 [*] 匹配(数组元素模板)
for (let i = 0; i < n; i++) {
if (/^\d+$/.test(segs[i])) {
const trySegs = [...segs];
trySegs[i] = '[*]';
const tryPath = trySegs.join('.');
if (rulesTable[tryPath]) return rulesTable[tryPath];
}
}
return null;
}
/**
* 生成恰好有 starCount 个 * 的所有模式
*/
function generateStarPatterns(segs, starCount) {
const n = segs.length;
const results = [];
function backtrack(idx, stars, path) {
if (idx === n) {
if (stars === starCount) results.push(path.join('.'));
return;
}
// 用原值
if (n - idx > starCount - stars) {
backtrack(idx + 1, stars, [...path, segs[idx]]);
}
// 用 *
if (stars < starCount) {
backtrack(idx + 1, stars + 1, [...path, '*']);
}
}
backtrack(0, 0, []);
return results;
}
function getValueType(v) {
if (Array.isArray(v)) return 'array';
if (v === null) return 'null';
return typeof v;
}
/**
* 验证操作
* @returns {{ allow: boolean, value?: any, reason?: string, note?: string }}
*/
export function validate(op, absPath, payload, currentValue) {
const node = getRuleNode(absPath);
const parentPath = getParentPath(absPath);
const parentNode = parentPath ? getRuleNode(parentPath) : null;
const isNewKey = currentValue === undefined;
const lastSeg = String(absPath).split('.').pop() || '';
// ===== 1. $schema 白名单检查 =====
if (parentNode?.allowedKeys && Array.isArray(parentNode.allowedKeys)) {
if (isNewKey && (op === 'set' || op === 'push')) {
if (!parentNode.allowedKeys.includes(lastSeg)) {
return { allow: false, reason: `字段不在结构模板中` };
}
}
if (op === 'del') {
if (parentNode.allowedKeys.includes(lastSeg)) {
return { allow: false, reason: `模板定义的字段不能删除` };
}
}
}
// ===== 2. 父层结构锁定(无 objectExt / 无 allowedKeys / 无 hasWildcard =====
if (parentNode && parentNode.typeLock === 'object') {
if (!parentNode.objectExt && !parentNode.allowedKeys && !parentNode.hasWildcard) {
if (isNewKey && (op === 'set' || op === 'push')) {
return { allow: false, reason: '父层结构已锁定,不允许新增字段' };
}
}
}
// ===== 3. 类型锁定 =====
if (node?.typeLock && op === 'set') {
let finalPayload = payload;
// 宽松:数字字符串 => 数字
if (node.typeLock === 'number' && typeof payload === 'string') {
if (/^-?\d+(?:\.\d+)?$/.test(payload.trim())) {
finalPayload = Number(payload);
}
}
const finalType = getValueType(finalPayload);
if (node.typeLock !== finalType) {
return { allow: false, reason: `类型不匹配,期望 ${node.typeLock},实际 ${finalType}` };
}
payload = finalPayload;
}
// ===== 4. 数组扩展检查 =====
if (op === 'push') {
if (node && node.typeLock === 'array' && !node.arrayGrow) {
return { allow: false, reason: '数组不允许扩展' };
}
}
// ===== 5. $ro 只读 =====
if (node?.ro && (op === 'set' || op === 'inc')) {
return { allow: false, reason: '只读字段' };
}
// ===== 6. set 操作:数值约束 =====
if (op === 'set') {
const num = Number(payload);
// range 限制
if (Number.isFinite(num) && (node?.min !== undefined || node?.max !== undefined)) {
let v = num;
const min = node?.min;
const max = node?.max;
if (min !== undefined) v = Math.max(v, min);
if (max !== undefined) v = Math.min(v, max);
const clamped = v !== num;
return {
allow: true,
value: v,
note: clamped ? `超出范围,已限制到 ${v}` : undefined,
};
}
// enum 枚举(不自动修正,直接拒绝)
if (node?.enum?.length) {
const s = String(payload ?? '');
if (!node.enum.includes(s)) {
return { allow: false, reason: `枚举不匹配,允许:${node.enum.join(' / ')}` };
}
}
return { allow: true, value: payload };
}
// ===== 7. inc 操作step / range 限制 =====
if (op === 'inc') {
const delta = Number(payload);
if (!Number.isFinite(delta)) return { allow: false, reason: 'delta 不是数字' };
const cur = Number(currentValue) || 0;
let d = delta;
const noteParts = [];
// step 限制
if (node?.step !== undefined && node.step >= 0) {
const before = d;
if (d > node.step) d = node.step;
if (d < -node.step) d = -node.step;
if (d !== before) {
noteParts.push(`超出步长限制,已限制到 ${d >= 0 ? '+' : ''}${d}`);
}
}
let next = cur + d;
// range 限制
const beforeClamp = next;
if (node?.min !== undefined) next = Math.max(next, node.min);
if (node?.max !== undefined) next = Math.min(next, node.max);
if (next !== beforeClamp) {
noteParts.push(`超出范围,已限制到 ${next}`);
}
return {
allow: true,
value: next,
note: noteParts.length ? noteParts.join('') : undefined,
};
}
return { allow: true, value: payload };
}

View File

@@ -0,0 +1,21 @@
export {
applyStateForMessage,
clearStateAppliedFor,
clearStateAppliedFrom,
restoreStateV2ToFloor,
trimStateV2FromFloor,
} from './executor.js';
export { parseStateBlock, extractStateBlocks, computeStateSignature, parseInlineValue } from './parser.js';
export { generateSemantic } from './semantic.js';
export {
validate,
setRule,
clearRule,
clearAllRules,
loadRulesFromMeta,
saveRulesToMeta,
getRuleNode,
getParentPath,
} from './guard.js';

View File

@@ -0,0 +1,514 @@
import jsyaml from '../../../libs/js-yaml.mjs';
/**
* Robust <state> block matcher (no regex)
* - Pairs each </state> with the nearest preceding <state ...>
* - Ignores unclosed <state>
*/
function isValidOpenTagAt(s, i) {
if (s[i] !== '<') return false;
const head = s.slice(i, i + 6).toLowerCase();
if (head !== '<state') return false;
const next = s[i + 6] ?? '';
if (next && !(next === '>' || next === '/' || /\s/.test(next))) return false;
return true;
}
function isValidCloseTagAt(s, i) {
if (s[i] !== '<') return false;
if (s[i + 1] !== '/') return false;
const head = s.slice(i, i + 7).toLowerCase();
if (head !== '</state') return false;
let j = i + 7;
while (j < s.length && /\s/.test(s[j])) j++;
return s[j] === '>';
}
function findTagEnd(s, openIndex) {
const end = s.indexOf('>', openIndex);
return end === -1 ? -1 : end;
}
function findStateBlockSpans(text) {
const s = String(text ?? '');
const closes = [];
for (let i = 0; i < s.length; i++) {
if (s[i] !== '<') continue;
if (isValidCloseTagAt(s, i)) closes.push(i);
}
if (!closes.length) return [];
const spans = [];
let searchEnd = s.length;
for (let cIdx = closes.length - 1; cIdx >= 0; cIdx--) {
const closeStart = closes[cIdx];
if (closeStart >= searchEnd) continue;
let closeEnd = closeStart + 7;
while (closeEnd < s.length && s[closeEnd] !== '>') closeEnd++;
if (s[closeEnd] !== '>') continue;
closeEnd += 1;
let openStart = -1;
for (let i = closeStart - 1; i >= 0; i--) {
if (s[i] !== '<') continue;
if (!isValidOpenTagAt(s, i)) continue;
const tagEnd = findTagEnd(s, i);
if (tagEnd === -1) continue;
if (tagEnd >= closeStart) continue;
openStart = i;
break;
}
if (openStart === -1) continue;
const openTagEnd = findTagEnd(s, openStart);
if (openTagEnd === -1) continue;
spans.push({
openStart,
openTagEnd: openTagEnd + 1,
closeStart,
closeEnd,
});
searchEnd = openStart;
}
spans.reverse();
return spans;
}
export function extractStateBlocks(text) {
const s = String(text ?? '');
const spans = findStateBlockSpans(s);
const out = [];
for (const sp of spans) {
const inner = s.slice(sp.openTagEnd, sp.closeStart);
if (inner.trim()) out.push(inner);
}
return out;
}
export function computeStateSignature(text) {
const s = String(text ?? '');
const spans = findStateBlockSpans(s);
if (!spans.length) return '';
const chunks = spans.map(sp => s.slice(sp.openStart, sp.closeEnd).trim());
return chunks.join('\n---\n');
}
/**
* Parse $schema block
*/
function parseSchemaBlock(basePath, schemaLines) {
const rules = [];
const nonEmpty = schemaLines.filter(l => l.trim());
if (!nonEmpty.length) return rules;
const minIndent = Math.min(...nonEmpty.map(l => l.search(/\S/)));
const yamlText = schemaLines
.map(l => (l.trim() ? l.slice(minIndent) : ''))
.join('\n');
let schemaObj;
try {
schemaObj = jsyaml.load(yamlText);
} catch (e) {
console.warn('[parser] $schema YAML parse failed:', e.message);
return rules;
}
if (!schemaObj || typeof schemaObj !== 'object') return rules;
function walk(obj, curPath) {
if (obj === null || obj === undefined) return;
if (Array.isArray(obj)) {
if (obj.length === 0) {
rules.push({
path: curPath,
rule: { typeLock: 'array', arrayGrow: true },
});
} else {
rules.push({
path: curPath,
rule: { typeLock: 'array', arrayGrow: true },
});
walk(obj[0], curPath ? `${curPath}.[*]` : '[*]');
}
return;
}
if (typeof obj !== 'object') {
const t = typeof obj;
if (t === 'string' || t === 'number' || t === 'boolean') {
rules.push({
path: curPath,
rule: { typeLock: t },
});
}
return;
}
const keys = Object.keys(obj);
if (keys.length === 0) {
rules.push({
path: curPath,
rule: { typeLock: 'object', objectExt: true },
});
return;
}
const hasWildcard = keys.includes('*');
if (hasWildcard) {
rules.push({
path: curPath,
rule: { typeLock: 'object', objectExt: true, hasWildcard: true },
});
const wildcardTemplate = obj['*'];
if (wildcardTemplate !== undefined) {
walk(wildcardTemplate, curPath ? `${curPath}.*` : '*');
}
for (const k of keys) {
if (k === '*') continue;
const childPath = curPath ? `${curPath}.${k}` : k;
walk(obj[k], childPath);
}
return;
}
rules.push({
path: curPath,
rule: { typeLock: 'object', allowedKeys: keys },
});
for (const k of keys) {
const childPath = curPath ? `${curPath}.${k}` : k;
walk(obj[k], childPath);
}
}
walk(schemaObj, basePath);
return rules;
}
/**
* Parse rule line ($ro, $range, $step, $enum)
*/
function parseRuleLine(line) {
const tokens = line.trim().split(/\s+/);
const directives = [];
let pathStart = 0;
for (let i = 0; i < tokens.length; i++) {
if (tokens[i].startsWith('$')) {
directives.push(tokens[i]);
pathStart = i + 1;
} else {
break;
}
}
const path = tokens.slice(pathStart).join(' ').trim();
if (!path || !directives.length) return null;
const rule = {};
for (const tok of directives) {
if (tok === '$ro') { rule.ro = true; continue; }
const rangeMatch = tok.match(/^\$range=\[\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\]$/);
if (rangeMatch) {
rule.min = Math.min(Number(rangeMatch[1]), Number(rangeMatch[2]));
rule.max = Math.max(Number(rangeMatch[1]), Number(rangeMatch[2]));
continue;
}
const stepMatch = tok.match(/^\$step=(\d+(?:\.\d+)?)$/);
if (stepMatch) { rule.step = Math.abs(Number(stepMatch[1])); continue; }
const enumMatch = tok.match(/^\$enum=\{([^}]+)\}$/);
if (enumMatch) {
rule.enum = enumMatch[1].split(/[,、;]/).map(s => s.trim()).filter(Boolean);
continue;
}
}
return { path, rule };
}
export function parseStateBlock(content) {
const lines = String(content ?? '').split(/\r?\n/);
const rules = [];
const dataLines = [];
let inSchema = false;
let schemaPath = '';
let schemaLines = [];
let schemaBaseIndent = -1;
const flushSchema = () => {
if (schemaLines.length) {
const parsed = parseSchemaBlock(schemaPath, schemaLines);
rules.push(...parsed);
}
inSchema = false;
schemaPath = '';
schemaLines = [];
schemaBaseIndent = -1;
};
for (let i = 0; i < lines.length; i++) {
const raw = lines[i];
const trimmed = raw.trim();
const indent = raw.search(/\S/);
if (!trimmed || trimmed.startsWith('#')) {
if (inSchema && schemaBaseIndent >= 0) schemaLines.push(raw);
continue;
}
// $schema 开始
if (trimmed.startsWith('$schema')) {
flushSchema();
const rest = trimmed.slice(7).trim();
schemaPath = rest || '';
inSchema = true;
schemaBaseIndent = -1;
continue;
}
if (inSchema) {
if (schemaBaseIndent < 0) {
schemaBaseIndent = indent;
}
// 缩进回退 => schema 结束
if (indent < schemaBaseIndent && indent >= 0 && trimmed) {
flushSchema();
i--;
continue;
}
schemaLines.push(raw);
continue;
}
// 普通 $rule$ro, $range, $step, $enum
if (trimmed.startsWith('$')) {
const parsed = parseRuleLine(trimmed);
if (parsed) rules.push(parsed);
continue;
}
dataLines.push(raw);
}
flushSchema();
const ops = parseDataLines(dataLines);
return { rules, ops };
}
/**
* 解析数据行
*/
function stripYamlInlineComment(s) {
const text = String(s ?? '');
if (!text) return '';
let inSingle = false;
let inDouble = false;
let escaped = false;
for (let i = 0; i < text.length; i++) {
const ch = text[i];
if (inSingle) {
if (ch === "'") {
if (text[i + 1] === "'") { i++; continue; }
inSingle = false;
}
continue;
}
if (inDouble) {
if (escaped) { escaped = false; continue; }
if (ch === '\\') { escaped = true; continue; }
if (ch === '"') inDouble = false;
continue;
}
if (ch === "'") { inSingle = true; continue; }
if (ch === '"') { inDouble = true; continue; }
if (ch === '#') {
const prev = i > 0 ? text[i - 1] : '';
if (i === 0 || /\s/.test(prev)) {
return text.slice(0, i).trimEnd();
}
}
}
return text.trimEnd();
}
function parseDataLines(lines) {
const results = [];
let pendingPath = null;
let pendingLines = [];
const flushPending = () => {
if (!pendingPath) return;
if (!pendingLines.length) {
results.push({ path: pendingPath, op: 'set', value: '' });
pendingPath = null;
pendingLines = [];
return;
}
try {
const nonEmpty = pendingLines.filter(l => l.trim());
const minIndent = nonEmpty.length
? Math.min(...nonEmpty.map(l => l.search(/\S/)))
: 0;
const yamlText = pendingLines
.map(l => (l.trim() ? l.slice(minIndent) : ''))
.join('\n');
const obj = jsyaml.load(yamlText);
results.push({ path: pendingPath, op: 'set', value: obj });
} catch (e) {
results.push({ path: pendingPath, op: 'set', value: null, warning: `YAML 解析失败: ${e.message}` });
} finally {
pendingPath = null;
pendingLines = [];
}
};
for (const raw of lines) {
const trimmed = raw.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
const indent = raw.search(/\S/);
if (indent === 0) {
flushPending();
const colonIdx = findTopLevelColon(trimmed);
if (colonIdx === -1) continue;
const path = trimmed.slice(0, colonIdx).trim();
let rhs = trimmed.slice(colonIdx + 1).trim();
rhs = stripYamlInlineComment(rhs);
if (!path) continue;
if (!rhs) {
pendingPath = path;
pendingLines = [];
} else {
results.push({ path, ...parseInlineValue(rhs) });
}
} else if (pendingPath) {
pendingLines.push(raw);
}
}
flushPending();
return results;
}
function findTopLevelColon(line) {
let inQuote = false;
let q = '';
let esc = false;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (esc) { esc = false; continue; }
if (ch === '\\') { esc = true; continue; }
if (!inQuote && (ch === '"' || ch === "'")) { inQuote = true; q = ch; continue; }
if (inQuote && ch === q) { inQuote = false; q = ''; continue; }
if (!inQuote && ch === ':') return i;
}
return -1;
}
function unescapeString(s) {
return String(s ?? '')
.replace(/\\n/g, '\n')
.replace(/\\t/g, '\t')
.replace(/\\r/g, '\r')
.replace(/\\"/g, '"')
.replace(/\\'/g, "'")
.replace(/\\\\/g, '\\');
}
export function parseInlineValue(raw) {
const t = String(raw ?? '').trim();
if (t === 'null') return { op: 'del' };
const parenNum = t.match(/^\((-?\d+(?:\.\d+)?)\)$/);
if (parenNum) return { op: 'set', value: Number(parenNum[1]) };
if (/^\+\d/.test(t) || /^-\d/.test(t)) {
const n = Number(t);
if (Number.isFinite(n)) return { op: 'inc', delta: n };
}
const pushD = t.match(/^\+"((?:[^"\\]|\\.)*)"\s*$/);
if (pushD) return { op: 'push', value: unescapeString(pushD[1]) };
const pushS = t.match(/^\+'((?:[^'\\]|\\.)*)'\s*$/);
if (pushS) return { op: 'push', value: unescapeString(pushS[1]) };
if (t.startsWith('+[')) {
try {
const arr = JSON.parse(t.slice(1));
if (Array.isArray(arr)) return { op: 'push', value: arr };
} catch {}
return { op: 'set', value: t, warning: '+[] 解析失败' };
}
const popD = t.match(/^-"((?:[^"\\]|\\.)*)"\s*$/);
if (popD) return { op: 'pop', value: unescapeString(popD[1]) };
const popS = t.match(/^-'((?:[^'\\]|\\.)*)'\s*$/);
if (popS) return { op: 'pop', value: unescapeString(popS[1]) };
if (t.startsWith('-[')) {
try {
const arr = JSON.parse(t.slice(1));
if (Array.isArray(arr)) return { op: 'pop', value: arr };
} catch {}
return { op: 'set', value: t, warning: '-[] 解析失败' };
}
if (/^-?\d+(?:\.\d+)?$/.test(t)) return { op: 'set', value: Number(t) };
const strD = t.match(/^"((?:[^"\\]|\\.)*)"\s*$/);
if (strD) return { op: 'set', value: unescapeString(strD[1]) };
const strS = t.match(/^'((?:[^'\\]|\\.)*)'\s*$/);
if (strS) return { op: 'set', value: unescapeString(strS[1]) };
if (t === 'true') return { op: 'set', value: true };
if (t === 'false') return { op: 'set', value: false };
if (t.startsWith('{') || t.startsWith('[')) {
try { return { op: 'set', value: JSON.parse(t) }; }
catch { return { op: 'set', value: t, warning: 'JSON 解析失败' }; }
}
return { op: 'set', value: t };
}

View File

@@ -0,0 +1,41 @@
export function generateSemantic(path, op, oldValue, newValue, delta, operandValue) {
const p = String(path ?? '').replace(/\./g, ' > ');
const fmt = (v) => {
if (v === undefined) return '空';
if (v === null) return 'null';
try {
return JSON.stringify(v);
} catch {
return String(v);
}
};
switch (op) {
case 'set':
return oldValue === undefined
? `${p} 设为 ${fmt(newValue)}`
: `${p}${fmt(oldValue)} 变为 ${fmt(newValue)}`;
case 'inc': {
const sign = (delta ?? 0) >= 0 ? '+' : '';
return `${p} ${sign}${delta}${fmt(oldValue)}${fmt(newValue)}`;
}
case 'push': {
const items = Array.isArray(operandValue) ? operandValue : [operandValue];
return `${p} 加入 ${items.map(fmt).join('、')}`;
}
case 'pop': {
const items = Array.isArray(operandValue) ? operandValue : [operandValue];
return `${p} 移除 ${items.map(fmt).join('、')}`;
}
case 'del':
return `${p} 被删除(原值 ${fmt(oldValue)}`;
default:
return `${p} 操作 ${op}`;
}
}

View File

@@ -6,6 +6,7 @@
import { getContext } from "../../../../../extensions.js";
import { getLocalVariable, setLocalVariable } from "../../../../../variables.js";
import { createModuleEvents, event_types } from "../../core/event-manager.js";
import jsYaml from "../../libs/js-yaml.mjs";
import {
lwbSplitPathWithBrackets,
lwbSplitPathAndValue,
@@ -19,6 +20,8 @@ import {
const MODULE_ID = 'varCommands';
const TAG_RE_XBGETVAR = /\{\{xbgetvar::([^}]+)\}\}/gi;
const TAG_RE_XBGETVAR_YAML = /\{\{xbgetvar_yaml::([^}]+)\}\}/gi;
const TAG_RE_XBGETVAR_YAML_IDX = /\{\{xbgetvar_yaml_idx::([^}]+)\}\}/gi;
let events = null;
let initialized = false;
@@ -94,12 +97,22 @@ function setDeepBySegments(target, segs, value) {
cur[key] = value;
} else {
const nxt = cur[key];
if (typeof nxt === 'object' && nxt && !Array.isArray(nxt)) {
const nextSeg = segs[i + 1];
const wantArray = (typeof nextSeg === 'number');
// 已存在且类型正确:继续深入
if (wantArray && Array.isArray(nxt)) {
cur = nxt;
} else {
cur[key] = {};
cur = cur[key];
continue;
}
if (!wantArray && (nxt && typeof nxt === 'object') && !Array.isArray(nxt)) {
cur = nxt;
continue;
}
// 不存在或类型不匹配:创建正确的容器
cur[key] = wantArray ? [] : {};
cur = cur[key];
}
}
}
@@ -139,6 +152,153 @@ export function replaceXbGetVarInString(s) {
return s.replace(TAG_RE_XBGETVAR, (_, p) => lwbResolveVarPath(p));
}
/**
* 将 {{xbgetvar_yaml::路径}} 替换为 YAML 格式的值
* @param {string} s
* @returns {string}
*/
export function replaceXbGetVarYamlInString(s) {
s = String(s ?? '');
if (!s || s.indexOf('{{xbgetvar_yaml::') === -1) return s;
TAG_RE_XBGETVAR_YAML.lastIndex = 0;
return s.replace(TAG_RE_XBGETVAR_YAML, (_, p) => {
const value = lwbResolveVarPath(p);
if (!value) return '';
// 尝试解析为对象/数组,然后转 YAML
try {
const parsed = JSON.parse(value);
if (typeof parsed === 'object' && parsed !== null) {
return jsYaml.dump(parsed, {
indent: 2,
lineWidth: -1,
noRefs: true,
quotingType: '"',
}).trim();
}
return value;
} catch {
return value;
}
});
}
/**
* 将 {{xbgetvar_yaml_idx::路径}} 替换为带索引注释的 YAML
*/
export function replaceXbGetVarYamlIdxInString(s) {
s = String(s ?? '');
if (!s || s.indexOf('{{xbgetvar_yaml_idx::') === -1) return s;
TAG_RE_XBGETVAR_YAML_IDX.lastIndex = 0;
return s.replace(TAG_RE_XBGETVAR_YAML_IDX, (_, p) => {
const value = lwbResolveVarPath(p);
if (!value) return '';
try {
const parsed = JSON.parse(value);
if (typeof parsed === 'object' && parsed !== null) {
return formatYamlWithIndex(parsed, 0).trim();
}
return value;
} catch {
return value;
}
});
}
function formatYamlWithIndex(obj, indent) {
const pad = ' '.repeat(indent);
if (Array.isArray(obj)) {
if (obj.length === 0) return `${pad}[]`;
const lines = [];
obj.forEach((item, idx) => {
if (item && typeof item === 'object' && !Array.isArray(item)) {
const keys = Object.keys(item);
if (keys.length === 0) {
lines.push(`${pad}- {} # [${idx}]`);
} else {
const firstKey = keys[0];
const firstVal = item[firstKey];
const firstFormatted = formatValue(firstVal, indent + 2);
if (typeof firstVal === 'object' && firstVal !== null) {
lines.push(`${pad}- ${firstKey}: # [${idx}]`);
lines.push(firstFormatted);
} else {
lines.push(`${pad}- ${firstKey}: ${firstFormatted} # [${idx}]`);
}
for (let i = 1; i < keys.length; i++) {
const k = keys[i];
const v = item[k];
const vFormatted = formatValue(v, indent + 2);
if (typeof v === 'object' && v !== null) {
lines.push(`${pad} ${k}:`);
lines.push(vFormatted);
} else {
lines.push(`${pad} ${k}: ${vFormatted}`);
}
}
}
} else if (Array.isArray(item)) {
lines.push(`${pad}- # [${idx}]`);
lines.push(formatYamlWithIndex(item, indent + 1));
} else {
lines.push(`${pad}- ${formatScalar(item)} # [${idx}]`);
}
});
return lines.join('\n');
}
if (obj && typeof obj === 'object') {
if (Object.keys(obj).length === 0) return `${pad}{}`;
const lines = [];
for (const [key, val] of Object.entries(obj)) {
const vFormatted = formatValue(val, indent + 1);
if (typeof val === 'object' && val !== null) {
lines.push(`${pad}${key}:`);
lines.push(vFormatted);
} else {
lines.push(`${pad}${key}: ${vFormatted}`);
}
}
return lines.join('\n');
}
return `${pad}${formatScalar(obj)}`;
}
function formatValue(val, indent) {
if (Array.isArray(val)) return formatYamlWithIndex(val, indent);
if (val && typeof val === 'object') return formatYamlWithIndex(val, indent);
return formatScalar(val);
}
function formatScalar(v) {
if (v === null) return 'null';
if (v === undefined) return '';
if (typeof v === 'boolean') return String(v);
if (typeof v === 'number') return String(v);
if (typeof v === 'string') {
const needsQuote =
v === '' ||
/^\s|\s$/.test(v) || // 首尾空格
/[:[]\]{}&*!|>'"%@`#,]/.test(v) || // YAML 易歧义字符
/^(?:true|false|null)$/i.test(v) || // YAML 关键字
/^-?(?:\d+(?:\.\d+)?|\.\d+)$/.test(v); // 纯数字字符串
if (needsQuote) {
return `"${v.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`;
}
return v;
}
return String(v);
}
export function replaceXbGetVarInChat(chat) {
if (!Array.isArray(chat)) return;
@@ -148,9 +308,15 @@ export function replaceXbGetVarInChat(chat) {
if (!key) continue;
const old = String(msg[key] ?? '');
if (old.indexOf('{{xbgetvar::') === -1) continue;
const hasJson = old.indexOf('{{xbgetvar::') !== -1;
const hasYaml = old.indexOf('{{xbgetvar_yaml::') !== -1;
const hasYamlIdx = old.indexOf('{{xbgetvar_yaml_idx::') !== -1;
if (!hasJson && !hasYaml && !hasYamlIdx) continue;
msg[key] = replaceXbGetVarInString(old);
let result = hasJson ? replaceXbGetVarInString(old) : old;
result = hasYaml ? replaceXbGetVarYamlInString(result) : result;
result = hasYamlIdx ? replaceXbGetVarYamlIdxInString(result) : result;
msg[key] = result;
} catch {}
}
}
@@ -165,9 +331,14 @@ export function applyXbGetVarForMessage(messageId, writeback = true) {
if (!key) return;
const old = String(msg[key] ?? '');
if (old.indexOf('{{xbgetvar::') === -1) return;
const hasJson = old.indexOf('{{xbgetvar::') !== -1;
const hasYaml = old.indexOf('{{xbgetvar_yaml::') !== -1;
const hasYamlIdx = old.indexOf('{{xbgetvar_yaml_idx::') !== -1;
if (!hasJson && !hasYaml && !hasYamlIdx) return;
const out = replaceXbGetVarInString(old);
let out = hasJson ? replaceXbGetVarInString(old) : old;
out = hasYaml ? replaceXbGetVarYamlInString(out) : out;
out = hasYamlIdx ? replaceXbGetVarYamlIdxInString(out) : out;
if (writeback && out !== old) {
msg[key] = out;
}
@@ -616,6 +787,62 @@ export function lwbPushVarPath(path, value) {
}
}
export function lwbRemoveArrayItemByValue(path, valuesToRemove) {
try {
const segs = lwbSplitPathWithBrackets(path);
if (!segs.length) return '';
const rootName = String(segs[0]);
const rootRaw = getLocalVariable(rootName);
const rootObj = maybeParseObject(rootRaw);
if (!rootObj) return '';
// 定位到目标数组
let cur = rootObj;
for (let i = 1; i < segs.length; i++) {
cur = cur?.[segs[i]];
if (cur == null) return '';
}
if (!Array.isArray(cur)) return '';
const toRemove = Array.isArray(valuesToRemove) ? valuesToRemove : [valuesToRemove];
if (!toRemove.length) return '';
// 找到索引(每个值只删除一个匹配项)
const indices = [];
for (const v of toRemove) {
const vStr = safeJSONStringify(v);
if (!vStr) continue;
const idx = cur.findIndex(x => safeJSONStringify(x) === vStr);
if (idx !== -1) indices.push(idx);
}
if (!indices.length) return '';
// 倒序删除,且逐个走 guardian 的 delNode 校验(用 index path
indices.sort((a, b) => b - a);
for (const idx of indices) {
const absIndexPath = normalizePath(`${path}[${idx}]`);
try {
if (globalThis.LWB_Guard?.validate) {
const g = globalThis.LWB_Guard.validate('delNode', absIndexPath);
if (!g?.allow) continue;
}
} catch {}
if (idx >= 0 && idx < cur.length) {
cur.splice(idx, 1);
}
}
setLocalVariable(rootName, safeJSONStringify(rootObj));
return '';
} catch {
return '';
}
}
function registerXbGetVarSlashCommand() {
try {
const ctx = getContext();
@@ -1004,7 +1231,9 @@ export function cleanupVarCommands() {
initialized = false;
}
/**
* 按值从数组中删除元素2.0 pop 操作)
*/
export {
MODULE_ID,
};

View File

@@ -6,7 +6,7 @@
import { getContext } from "../../../../../extensions.js";
import { getLocalVariable } from "../../../../../variables.js";
import { createModuleEvents } from "../../core/event-manager.js";
import { replaceXbGetVarInString } from "./var-commands.js";
import { replaceXbGetVarInString, replaceXbGetVarYamlInString, replaceXbGetVarYamlIdxInString } from "./var-commands.js";
const MODULE_ID = 'vareventEditor';
const LWB_EXT_ID = 'LittleWhiteBox';
@@ -297,12 +297,18 @@ function installWIHiddenTagStripper() {
}
msg.content = await replaceVareventInString(msg.content, false, false);
}
if (msg.content.indexOf('{{xbgetvar::') !== -1) {
msg.content = replaceXbGetVarInString(msg.content);
}
}
if (Array.isArray(msg?.content)) {
for (const part of msg.content) {
if (msg.content.indexOf('{{xbgetvar::') !== -1) {
msg.content = replaceXbGetVarInString(msg.content);
}
if (msg.content.indexOf('{{xbgetvar_yaml::') !== -1) {
msg.content = replaceXbGetVarYamlInString(msg.content);
}
if (msg.content.indexOf('{{xbgetvar_yaml_idx::') !== -1) {
msg.content = replaceXbGetVarYamlIdxInString(msg.content);
}
}
if (Array.isArray(msg?.content)) {
for (const part of msg.content) {
if (part?.type === 'text' && typeof part.text === 'string') {
if (part.text.includes('<varevent')) {
TAG_RE_VAREVENT.lastIndex = 0;
@@ -312,12 +318,18 @@ function installWIHiddenTagStripper() {
}
part.text = await replaceVareventInString(part.text, false, false);
}
if (part.text.indexOf('{{xbgetvar::') !== -1) {
part.text = replaceXbGetVarInString(part.text);
if (part.text.indexOf('{{xbgetvar::') !== -1) {
part.text = replaceXbGetVarInString(part.text);
}
if (part.text.indexOf('{{xbgetvar_yaml::') !== -1) {
part.text = replaceXbGetVarYamlInString(part.text);
}
if (part.text.indexOf('{{xbgetvar_yaml_idx::') !== -1) {
part.text = replaceXbGetVarYamlIdxInString(part.text);
}
}
}
}
}
}
if (typeof msg?.mes === 'string') {
if (msg.mes.includes('<varevent')) {
TAG_RE_VAREVENT.lastIndex = 0;
@@ -327,12 +339,18 @@ function installWIHiddenTagStripper() {
}
msg.mes = await replaceVareventInString(msg.mes, false, false);
}
if (msg.mes.indexOf('{{xbgetvar::') !== -1) {
msg.mes = replaceXbGetVarInString(msg.mes);
if (msg.mes.indexOf('{{xbgetvar::') !== -1) {
msg.mes = replaceXbGetVarInString(msg.mes);
}
if (msg.mes.indexOf('{{xbgetvar_yaml::') !== -1) {
msg.mes = replaceXbGetVarYamlInString(msg.mes);
}
if (msg.mes.indexOf('{{xbgetvar_yaml_idx::') !== -1) {
msg.mes = replaceXbGetVarYamlIdxInString(msg.mes);
}
}
}
}
}
} catch {}
} catch {}
};
try {
if (eventSource && typeof eventSource.makeLast === 'function') {
@@ -361,6 +379,12 @@ function installWIHiddenTagStripper() {
if (data.prompt.indexOf('{{xbgetvar::') !== -1) {
data.prompt = replaceXbGetVarInString(data.prompt);
}
if (data.prompt.indexOf('{{xbgetvar_yaml::') !== -1) {
data.prompt = replaceXbGetVarYamlInString(data.prompt);
}
if (data.prompt.indexOf('{{xbgetvar_yaml_idx::') !== -1) {
data.prompt = replaceXbGetVarYamlIdxInString(data.prompt);
}
}
} catch {}
});

View File

@@ -1,10 +1,10 @@
/**
* @file modules/variables/variables-core.js
* @description 变量管理核心(受开关控制)
* @description 包含 plot-log 解析、快照回滚、变量守护
* @description Variables core (feature-flag controlled)
* @description Includes plot-log parsing, snapshot rollback, and variable guard
*/
import { getContext } from "../../../../../extensions.js";
import { extension_settings, getContext } from "../../../../../extensions.js";
import { updateMessageBlock } from "../../../../../../script.js";
import { getLocalVariable, setLocalVariable } from "../../../../../variables.js";
import { createModuleEvents, event_types } from "../../core/event-manager.js";
@@ -28,6 +28,7 @@ import {
applyXbGetVarForMessage,
parseValueForSet,
} from "./var-commands.js";
import { applyStateForMessage } from "./state2/index.js";
import {
preprocessBumpAliases,
executeQueuedVareventJsAfterTurn,
@@ -36,17 +37,18 @@ import {
TOP_OP_RE,
} from "./varevent-editor.js";
/* ============= 模块常量 ============= */
/* ============ Module Constants ============= */
const MODULE_ID = 'variablesCore';
const EXT_ID = 'LittleWhiteBox';
const LWB_RULES_KEY = 'LWB_RULES';
const LWB_SNAP_KEY = 'LWB_SNAP';
const LWB_PLOT_APPLIED_KEY = 'LWB_PLOT_APPLIED_KEY';
// plot-log 标签正则
// plot-log tag regex
const TAG_RE_PLOTLOG = /<\s*plot-log[^>]*>([\s\S]*?)<\s*\/\s*plot-log\s*>/gi;
// 守护状态
// guardian state
const guardianState = {
table: {},
regexCache: {},
@@ -55,7 +57,8 @@ const guardianState = {
lastMetaSyncAt: 0
};
// 事件管理器
// note
let events = null;
let initialized = false;
let pendingSwipeApply = new Map();
@@ -76,7 +79,7 @@ CacheRegistry.register(MODULE_ID, {
return 0;
}
},
// 新增:估算字节大小(用于 debug-panel 缓存统计)
// estimate bytes for debug panel
getBytes: () => {
try {
let total = 0;
@@ -137,7 +140,7 @@ CacheRegistry.register(MODULE_ID, {
},
});
/* ============= 内部辅助函数 ============= */
/* ============ Internal Helpers ============= */
function getMsgKey(msg) {
return (typeof msg?.mes === 'string') ? 'mes'
@@ -160,7 +163,7 @@ function normalizeOpName(k) {
return OP_MAP[String(k).toLowerCase().trim()] || null;
}
/* ============= 应用签名追踪 ============= */
/* ============ Applied Signature Tracking ============= */
function getAppliedMap() {
const meta = getContext()?.chatMetadata || {};
@@ -206,10 +209,10 @@ function computePlotSignatureFromText(text) {
return chunks.join('\n---\n');
}
/* ============= Plot-Log 解析 ============= */
/* ============ Plot-Log Parsing ============= */
/**
* 提取 plot-log
* Extract plot-log blocks
*/
function extractPlotLogBlocks(text) {
if (!text || typeof text !== 'string') return [];
@@ -224,10 +227,10 @@ function extractPlotLogBlocks(text) {
}
/**
* 解析 plot-log 块内容
* Parse plot-log block content
*/
function parseBlock(innerText) {
// 预处理 bump 别名
// preprocess bump aliases
innerText = preprocessBumpAliases(innerText);
const textForJsonToml = stripLeadingHtmlComments(innerText);
@@ -243,7 +246,7 @@ function parseBlock(innerText) {
};
const norm = (p) => String(p || '').replace(/\[(\d+)\]/g, '.$1');
// 守护指令记录
// guard directive tracking
const guardMap = new Map();
const recordGuardDirective = (path, directives) => {
@@ -292,7 +295,7 @@ function parseBlock(innerText) {
return { directives, curPathRaw, guardTargetRaw, segment: segTrim };
};
// 操作记录函数
// operation record helpers
const putSet = (top, path, value) => {
ops.set[top] ||= {};
ops.set[top][path] = value;
@@ -348,7 +351,7 @@ function parseBlock(innerText) {
return results;
};
// 解码键
// decode key
const decodeKey = (rawKey) => {
const { directives, remainder, original } = extractDirectiveInfo(rawKey);
const path = (remainder || original || String(rawKey)).trim();
@@ -356,7 +359,7 @@ function parseBlock(innerText) {
return path;
};
// 遍历节点
// walk nodes
const walkNode = (op, top, node, basePath = '') => {
if (op === 'set') {
if (node === null || node === undefined) return;
@@ -441,7 +444,7 @@ function parseBlock(innerText) {
}
};
// 处理结构化数据JSON/TOML
// process structured data (json/toml)
const processStructuredData = (data) => {
const process = (d) => {
if (!d || typeof d !== 'object') return;
@@ -507,7 +510,7 @@ function parseBlock(innerText) {
return true;
};
// 尝试 JSON 解析
// try JSON parsing
const tryParseJson = (text) => {
const s = String(text || '').trim();
if (!s || (s[0] !== '{' && s[0] !== '[')) return false;
@@ -563,7 +566,7 @@ function parseBlock(innerText) {
return relaxed !== s && attempt(relaxed);
};
// 尝试 TOML 解析
// try TOML parsing
const tryParseToml = (text) => {
const src = String(text || '').trim();
if (!src || !src.includes('[') || !src.includes('=')) return false;
@@ -638,11 +641,11 @@ function parseBlock(innerText) {
}
};
// 尝试 JSON/TOML
// try JSON/TOML
if (tryParseJson(textForJsonToml)) return finalizeResults();
if (tryParseToml(textForJsonToml)) return finalizeResults();
// YAML 解析
// YAML parsing
let curOp = '';
const stack = [];
@@ -729,7 +732,8 @@ function parseBlock(innerText) {
const curPath = norm(curPathRaw);
if (!curPath) continue;
// 块标量
// note
if (rhs && (rhs[0] === '|' || rhs[0] === '>')) {
const { text, next } = readBlockScalar(i + 1, ind, rhs[0]);
i = next;
@@ -741,7 +745,7 @@ function parseBlock(innerText) {
continue;
}
// 空值(嵌套对象或列表)
// empty value (nested object or list)
if (rhs === '') {
stack.push({
indent: ind,
@@ -791,7 +795,8 @@ function parseBlock(innerText) {
continue;
}
// 普通值
// note
const [top, ...rest] = curPath.split('.');
const rel = rest.join('.');
if (curOp === 'set') {
@@ -817,7 +822,8 @@ function parseBlock(innerText) {
continue;
}
// 顶层列表项del 操作)
// note
const mArr = t.match(/^-+\s*(.+)$/);
if (mArr && stack.length === 0 && curOp === 'del') {
const rawItem = stripQ(stripYamlInlineComment(mArr[1]));
@@ -830,7 +836,8 @@ function parseBlock(innerText) {
continue;
}
// 嵌套列表项
// note
if (mArr && stack.length) {
const curPath = stack[stack.length - 1].path;
const [top, ...rest] = curPath.split('.');
@@ -856,7 +863,7 @@ function parseBlock(innerText) {
return finalizeResults();
}
/* ============= 变量守护与规则集 ============= */
/* ============ Variable Guard & Rules ============= */
function rulesGetTable() {
return guardianState.table || {};
@@ -877,7 +884,7 @@ function rulesLoadFromMeta() {
const raw = meta[LWB_RULES_KEY];
if (raw && typeof raw === 'object') {
rulesSetTable(deepClone(raw));
// 重建正则缓存
// rebuild regex cache
for (const [p, node] of Object.entries(guardianState.table)) {
if (node?.constraints?.regex?.source) {
const src = node.constraints.regex.source;
@@ -1043,7 +1050,7 @@ function getEffectiveParentNode(p) {
}
/**
* 守护验证
* guard validation
*/
export function guardValidate(op, absPath, payload) {
if (guardianState.bypass) return { allow: true, value: payload };
@@ -1057,14 +1064,15 @@ export function guardValidate(op, absPath, payload) {
constraints: {}
};
// 只读检查
// note
if (node.ro) return { allow: false, reason: 'ro' };
const parentPath = getParentPath(p);
const parentNode = parentPath ? (getEffectiveParentNode(p) || { objectPolicy: 'none', arrayPolicy: 'lock' }) : null;
const currentValue = getValueAtPath(p);
// 删除操作
// delete op
if (op === 'delNode') {
if (!parentPath) return { allow: false, reason: 'no-parent' };
@@ -1087,7 +1095,7 @@ export function guardValidate(op, absPath, payload) {
}
}
// 推入操作
// push op
if (op === 'push') {
const arr = getValueAtPath(p);
if (arr === undefined) {
@@ -1124,7 +1132,7 @@ export function guardValidate(op, absPath, payload) {
return { allow: true, value: payload };
}
// 增量操作
// bump op
if (op === 'bump') {
let d = Number(payload);
if (!Number.isFinite(d)) return { allow: false, reason: 'delta-nan' };
@@ -1167,7 +1175,7 @@ export function guardValidate(op, absPath, payload) {
return { allow: true, value: clamped.value };
}
// 设置操作
// set op
if (op === 'set') {
const exists = currentValue !== undefined;
if (!exists) {
@@ -1229,7 +1237,7 @@ export function guardValidate(op, absPath, payload) {
}
/**
* 应用规则增量
* apply rules delta
*/
export function applyRuleDelta(path, delta) {
const p = normalizePath(path);
@@ -1284,7 +1292,7 @@ export function applyRuleDelta(path, delta) {
}
/**
* 从树加载规则
* load rules from tree
*/
export function rulesLoadFromTree(valueTree, basePath) {
const isObj = v => v && typeof v === 'object' && !Array.isArray(v);
@@ -1351,7 +1359,7 @@ export function rulesLoadFromTree(valueTree, basePath) {
}
/**
* 应用规则增量表
* apply rules delta table
*/
export function applyRulesDeltaToTable(delta) {
if (!delta || typeof delta !== 'object') return;
@@ -1362,7 +1370,7 @@ export function applyRulesDeltaToTable(delta) {
}
/**
* 安装变量 API 补丁
* install variable API patch
*/
function installVariableApiPatch() {
try {
@@ -1449,7 +1457,7 @@ function installVariableApiPatch() {
}
/**
* 卸载变量 API 补丁
* uninstall variable API patch
*/
function uninstallVariableApiPatch() {
try {
@@ -1467,7 +1475,7 @@ function uninstallVariableApiPatch() {
} catch {}
}
/* ============= 快照/回滚 ============= */
/* ============ Snapshots / Rollback ============= */
function getSnapMap() {
const meta = getContext()?.chatMetadata || {};
@@ -1488,7 +1496,7 @@ function setVarDict(dict) {
const current = meta.variables || {};
const next = dict || {};
// 清除不存在的变量
// remove missing variables
for (const k of Object.keys(current)) {
if (!(k in next)) {
try { delete current[k]; } catch {}
@@ -1496,7 +1504,8 @@ function setVarDict(dict) {
}
}
// 设置新值
// note
for (const [k, v] of Object.entries(next)) {
let toStore = v;
if (v && typeof v === 'object') {
@@ -1618,6 +1627,7 @@ function rollbackToPreviousOf(messageId) {
const prevId = id - 1;
if (prevId < 0) return;
// 1.0: restore from snapshot if available
const snap = getSnapshot(prevId);
if (snap) {
const normalized = normalizeSnapshotRecord(snap);
@@ -1631,20 +1641,60 @@ function rollbackToPreviousOf(messageId) {
}
}
function rebuildVariablesFromScratch() {
async function rollbackToPreviousOfAsync(messageId) {
const id = Number(messageId);
if (Number.isNaN(id)) return;
// Notify L0 rollback hook for floor >= id
if (typeof globalThis.LWB_StateRollbackHook === 'function') {
try {
await globalThis.LWB_StateRollbackHook(id);
} catch (e) {
console.error('[variablesCore] LWB_StateRollbackHook failed:', e);
}
}
const prevId = id - 1;
const mode = getVariablesMode();
if (mode === '2.0') {
try {
const mod = await import('./state2/index.js');
await mod.restoreStateV2ToFloor(prevId); // prevId < 0 handled by implementation
} catch (e) {
console.error('[variablesCore][2.0] restoreStateV2ToFloor failed:', e);
}
return;
}
// mode === '1.0'
rollbackToPreviousOf(id);
}
async function rebuildVariablesFromScratch() {
try {
const mode = getVariablesMode();
if (mode === '2.0') {
const mod = await import('./state2/index.js');
const chat = getContext()?.chat || [];
const lastId = chat.length ? chat.length - 1 : -1;
await mod.restoreStateV2ToFloor(lastId);
return;
}
// 1.0 legacy logic
setVarDict({});
const chat = getContext()?.chat || [];
for (let i = 0; i < chat.length; i++) {
applyVariablesForMessage(i);
await applyVariablesForMessage(i);
}
} catch {}
}
/* ============= 应用变量到消息 ============= */
/* ============ Apply Variables To Message ============= */
/**
* 将对象模式转换
* switch to object mode
*/
function asObject(rec) {
if (rec.mode !== 'object') {
@@ -1658,7 +1708,7 @@ function asObject(rec) {
}
/**
* 增量操作辅助
* bump helper
*/
function bumpAtPath(rec, path, delta) {
const numDelta = Number(delta);
@@ -1715,7 +1765,7 @@ function bumpAtPath(rec, path, delta) {
}
/**
* 解析标量数组
* parse scalar array
*/
function parseScalarArrayMaybe(str) {
try {
@@ -1727,8 +1777,55 @@ function parseScalarArrayMaybe(str) {
}
/**
* 应用变量到消息
* apply variables for message
*/
function readMessageText(msg) {
if (!msg) return '';
if (typeof msg.mes === 'string') return msg.mes;
if (typeof msg.content === 'string') return msg.content;
if (Array.isArray(msg.content)) {
return msg.content
.filter(p => p?.type === 'text' && typeof p.text === 'string')
.map(p => p.text)
.join('\n');
}
return '';
}
function getVariablesMode() {
try {
return extension_settings?.[EXT_ID]?.variablesMode || '1.0';
} catch {
return '1.0';
}
}
async function applyVarsForMessage(messageId) {
const ctx = getContext();
const msg = ctx?.chat?.[messageId];
if (!msg) return;
const text = readMessageText(msg);
const mode = getVariablesMode();
if (mode === '2.0') {
const result = applyStateForMessage(messageId, text);
if (result.errors?.length) {
console.warn('[variablesCore][2.0] warnings:', result.errors);
}
if (result.atoms?.length) {
$(document).trigger('xiaobaix:variables:stateAtomsGenerated', {
messageId,
atoms: result.atoms
});
}
return;
}
await applyVariablesForMessage(messageId);
}
async function applyVariablesForMessage(messageId) {
try {
const ctx = getContext();
@@ -1739,7 +1836,7 @@ async function applyVariablesForMessage(messageId) {
const preview = (text, max = 220) => {
try {
const s = String(text ?? '').replace(/\s+/g, ' ').trim();
return s.length > max ? s.slice(0, max) + '' : s;
return s.length > max ? s.slice(0, max) + '...' : s;
} catch {
return '';
}
@@ -1779,7 +1876,7 @@ async function applyVariablesForMessage(messageId) {
} catch (e) {
parseErrors++;
if (debugOn) {
try { xbLog.error(MODULE_ID, `plot-log 解析失败:楼层=${messageId}#${idx + 1} 预览=${preview(b)}`, e); } catch {}
try { xbLog.error(MODULE_ID, `plot-log 解析失败:楼层${messageId}${idx + 1} 预览=${preview(b)}`, e); } catch {}
}
return;
}
@@ -1810,7 +1907,7 @@ async function applyVariablesForMessage(messageId) {
try {
xbLog.warn(
MODULE_ID,
`plot-log 未产生可执行指令:楼层=${messageId} 块数=${blocks.length} 解析条目=${parsedPartsTotal} 解析失败=${parseErrors} 预览=${preview(blocks[0])}`
`plot-log 未产生可执行指令:楼层${messageId} 块数=${blocks.length} 解析条目=${parsedPartsTotal} 解析失败=${parseErrors} 预览=${preview(blocks[0])}`
);
} catch {}
}
@@ -1818,7 +1915,7 @@ async function applyVariablesForMessage(messageId) {
return;
}
// 构建变量记录
// build variable records
const byName = new Map();
for (const { name } of ops) {
@@ -1838,9 +1935,9 @@ async function applyVariablesForMessage(messageId) {
const norm = (p) => String(p || '').replace(/\[(\d+)\]/g, '.$1');
// 执行操作
// execute operations
for (const op of ops) {
// 守护指令
// guard directives
if (op.operation === 'guard') {
for (const entry of op.data) {
const path = typeof entry?.path === 'string' ? entry.path.trim() : '';
@@ -1865,7 +1962,7 @@ async function applyVariablesForMessage(messageId) {
const rec = byName.get(root);
if (!rec) continue;
// SET 操作
// set op
if (op.operation === 'setObject') {
for (const [k, v] of Object.entries(op.data)) {
const localPath = joinPath(subPath, k);
@@ -1903,7 +2000,7 @@ async function applyVariablesForMessage(messageId) {
}
}
// DEL 操作
// delete op
else if (op.operation === 'del') {
const obj = asObject(rec);
const pending = [];
@@ -1951,7 +2048,8 @@ async function applyVariablesForMessage(messageId) {
});
}
// 按索引分组(倒序删除)
// note
const arrGroups = new Map();
const objDeletes = [];
@@ -1977,7 +2075,7 @@ async function applyVariablesForMessage(messageId) {
}
}
// PUSH 操作
// push op
else if (op.operation === 'push') {
for (const [k, vals] of Object.entries(op.data)) {
const localPath = joinPath(subPath, k);
@@ -2033,7 +2131,7 @@ async function applyVariablesForMessage(messageId) {
}
}
// BUMP 操作
// bump op
else if (op.operation === 'bump') {
for (const [k, delta] of Object.entries(op.data)) {
const num = Number(delta);
@@ -2077,7 +2175,7 @@ async function applyVariablesForMessage(messageId) {
}
}
// 检查是否有变化
// check for changes
const hasChanges = Array.from(byName.values()).some(rec => rec?.changed === true);
if (!hasChanges && delVarNames.size === 0) {
if (debugOn) {
@@ -2085,7 +2183,7 @@ async function applyVariablesForMessage(messageId) {
const denied = guardDenied ? `,被规则拦截=${guardDenied}` : '';
xbLog.warn(
MODULE_ID,
`plot-log 指令执行后无变化:楼层=${messageId} 指令数=${ops.length}${denied} 示例=${preview(JSON.stringify(guardDeniedSamples))}`
`plot-log 指令执行后无变化:楼层${messageId} 指令数${ops.length}${denied} 示例=${preview(JSON.stringify(guardDeniedSamples))}`
);
} catch {}
}
@@ -2093,7 +2191,7 @@ async function applyVariablesForMessage(messageId) {
return;
}
// 保存变量
// save variables
for (const [name, rec] of byName.entries()) {
if (!rec.changed) continue;
try {
@@ -2105,7 +2203,7 @@ async function applyVariablesForMessage(messageId) {
} catch {}
}
// 删除变量
// delete variables
if (delVarNames.size > 0) {
try {
for (const v of delVarNames) {
@@ -2124,7 +2222,7 @@ async function applyVariablesForMessage(messageId) {
} catch {}
}
/* ============= 事件处理 ============= */
/* ============ Event Handling ============= */
function getMsgIdLoose(payload) {
if (payload && typeof payload === 'object') {
@@ -2150,56 +2248,57 @@ function bindEvents() {
let lastSwipedId;
suppressUpdatedOnce = new Set();
// 消息发送
// note
events?.on(event_types.MESSAGE_SENT, async () => {
try {
snapshotCurrentLastFloor();
if (getVariablesMode() !== '2.0') snapshotCurrentLastFloor();
const chat = getContext()?.chat || [];
const id = chat.length ? chat.length - 1 : undefined;
if (typeof id === 'number') {
await applyVariablesForMessage(id);
await applyVarsForMessage(id);
applyXbGetVarForMessage(id, true);
}
} catch {}
});
// 消息接收
// message received
events?.on(event_types.MESSAGE_RECEIVED, async (data) => {
try {
const id = getMsgIdLoose(data);
if (typeof id === 'number') {
await applyVariablesForMessage(id);
await applyVarsForMessage(id);
applyXbGetVarForMessage(id, true);
await executeQueuedVareventJsAfterTurn();
}
} catch {}
});
// 用户消息渲染
// user message rendered
events?.on(event_types.USER_MESSAGE_RENDERED, async (data) => {
try {
const id = getMsgIdLoose(data);
if (typeof id === 'number') {
await applyVariablesForMessage(id);
await applyVarsForMessage(id);
applyXbGetVarForMessage(id, true);
snapshotForMessageId(id);
if (getVariablesMode() !== '2.0') snapshotForMessageId(id);
}
} catch {}
});
// 角色消息渲染
// character message rendered
events?.on(event_types.CHARACTER_MESSAGE_RENDERED, async (data) => {
try {
const id = getMsgIdLoose(data);
if (typeof id === 'number') {
await applyVariablesForMessage(id);
await applyVarsForMessage(id);
applyXbGetVarForMessage(id, true);
snapshotForMessageId(id);
if (getVariablesMode() !== '2.0') snapshotForMessageId(id);
}
} catch {}
});
// 消息更新
// message updated
events?.on(event_types.MESSAGE_UPDATED, async (data) => {
try {
const id = getMsgIdLoose(data);
@@ -2208,84 +2307,103 @@ function bindEvents() {
suppressUpdatedOnce.delete(id);
return;
}
await applyVariablesForMessage(id);
await applyVarsForMessage(id);
applyXbGetVarForMessage(id, true);
}
} catch {}
});
// 消息编辑
// message edited
events?.on(event_types.MESSAGE_EDITED, async (data) => {
try {
const id = getMsgIdLoose(data);
if (typeof id === 'number') {
clearAppliedFor(id);
rollbackToPreviousOf(id);
if (typeof id !== 'number') return;
setTimeout(async () => {
await applyVariablesForMessage(id);
applyXbGetVarForMessage(id, true);
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
try {
const ctx = getContext();
const msg = ctx?.chat?.[id];
if (msg) updateMessageBlock(id, msg, { rerenderMessage: true });
} catch {}
// Roll back first so re-apply uses the edited message
await rollbackToPreviousOfAsync(id);
try {
const ctx = getContext();
const es = ctx?.eventSource;
const et = ctx?.event_types;
if (es?.emit && et?.MESSAGE_UPDATED) {
suppressUpdatedOnce.add(id);
await es.emit(et.MESSAGE_UPDATED, id);
}
} catch {}
setTimeout(async () => {
await applyVarsForMessage(id);
applyXbGetVarForMessage(id, true);
await executeQueuedVareventJsAfterTurn();
}, 10);
}
try {
const ctx = getContext();
const msg = ctx?.chat?.[id];
if (msg) updateMessageBlock(id, msg, { rerenderMessage: true });
} catch {}
try {
const ctx = getContext();
const es = ctx?.eventSource;
const et = ctx?.event_types;
if (es?.emit && et?.MESSAGE_UPDATED) {
suppressUpdatedOnce.add(id);
await es.emit(et.MESSAGE_UPDATED, id);
}
} catch {}
await executeQueuedVareventJsAfterTurn();
}, 10);
} catch {}
});
// 消息滑动
// message swiped
events?.on(event_types.MESSAGE_SWIPED, async (data) => {
try {
const id = getMsgIdLoose(data);
if (typeof id === 'number') {
lastSwipedId = id;
clearAppliedFor(id);
rollbackToPreviousOf(id);
if (typeof id !== 'number') return;
const tId = setTimeout(async () => {
pendingSwipeApply.delete(id);
await applyVariablesForMessage(id);
await executeQueuedVareventJsAfterTurn();
}, 10);
lastSwipedId = id;
if (getVariablesMode() !== '2.0') clearAppliedFor(id);
pendingSwipeApply.set(id, tId);
}
// Roll back first so swipe applies cleanly
await rollbackToPreviousOfAsync(id);
const tId = setTimeout(async () => {
pendingSwipeApply.delete(id);
await applyVarsForMessage(id);
await executeQueuedVareventJsAfterTurn();
}, 10);
pendingSwipeApply.set(id, tId);
} catch {}
});
// 消息删除
events?.on(event_types.MESSAGE_DELETED, (data) => {
// message deleted
events?.on(event_types.MESSAGE_DELETED, async (data) => {
try {
const id = getMsgIdStrict(data);
if (typeof id === 'number') {
rollbackToPreviousOf(id);
if (typeof id !== 'number') return;
// Roll back first before delete handling
await rollbackToPreviousOfAsync(id);
// 2.0: physical delete -> trim WAL/ckpt to avoid bloat
if (getVariablesMode() === '2.0') {
try {
const mod = await import('./state2/index.js');
await mod.trimStateV2FromFloor(id);
} catch (e) {
console.error('[variablesCore][2.0] trimStateV2FromFloor failed:', e);
}
}
if (getVariablesMode() !== '2.0') {
clearSnapshotsFrom(id);
clearAppliedFrom(id);
}
} catch {}
});
// 生成开始
// note
events?.on(event_types.GENERATION_STARTED, (data) => {
try {
snapshotPreviousFloor();
if (getVariablesMode() !== '2.0') snapshotPreviousFloor();
// 取消滑动延迟
// cancel swipe delay
const t = (typeof data === 'string' ? data : (data?.type || '')).toLowerCase();
if (t === 'swipe' && lastSwipedId != null) {
const tId = pendingSwipeApply.get(lastSwipedId);
@@ -2297,8 +2415,8 @@ function bindEvents() {
} catch {}
});
// 聊天切换
events?.on(event_types.CHAT_CHANGED, () => {
// chat changed
events?.on(event_types.CHAT_CHANGED, async () => {
try {
rulesClearCache();
rulesLoadFromMeta();
@@ -2306,33 +2424,42 @@ function bindEvents() {
const meta = getContext()?.chatMetadata || {};
meta[LWB_PLOT_APPLIED_KEY] = {};
getContext()?.saveMetadataDebounced?.();
if (getVariablesMode() === '2.0') {
try {
const mod = await import('./state2/index.js');
mod.clearStateAppliedFrom(0);
} catch {}
}
} catch {}
});
}
/* ============= 初始化与清理 ============= */
/* ============ Init & Cleanup ============= */
/**
* 初始化模块
* init module
*/
export function initVariablesCore() {
try { xbLog.info('variablesCore', '变量系统启动'); } catch {}
if (initialized) return;
initialized = true;
// 创建事件管理器
// init events
events = createModuleEvents(MODULE_ID);
// 加载规则
// load rules
rulesLoadFromMeta();
// 安装 API 补丁
// install API patch
installVariableApiPatch();
// 绑定事件
// bind events
bindEvents();
// 挂载全局函数(供 var-commands.js 使用)
// note
globalThis.LWB_Guard = {
validate: guardValidate,
loadRules: rulesLoadFromTree,
@@ -2340,48 +2467,76 @@ export function initVariablesCore() {
applyDeltaTable: applyRulesDeltaToTable,
save: rulesSaveToMeta,
};
globalThis.LWB_StateV2 = {
/**
* @param {string} text - 包含 <state>...</state> 的文本
* @param {{ floor?: number, silent?: boolean }} [options]
* - floor: 指定写入/记录用楼层(默认:最后一楼)
* - silent: true 时不触发 stateAtomsGenerated初始化用
*/
applyText: async (text, options = {}) => {
const { applyStateForMessage } = await import('./state2/index.js');
const ctx = getContext();
const floor =
Number.isFinite(options.floor)
? Number(options.floor)
: Math.max(0, (ctx?.chat?.length || 1) - 1);
const result = applyStateForMessage(floor, String(text || ''));
// ✅ 默认会触发(当作事件)
// ✅ 初始化时 silent=true不触发当作基线写入
if (!options.silent && result?.atoms?.length) {
$(document).trigger('xiaobaix:variables:stateAtomsGenerated', {
messageId: floor,
atoms: result.atoms,
});
}
return result;
},
};
}
/**
* 清理模块
* cleanup module
*/
export function cleanupVariablesCore() {
try { xbLog.info('variablesCore', '变量系统清理'); } catch {}
if (!initialized) return;
// 清理事件
// cleanup events
events?.cleanup();
events = null;
// 卸载 API 补丁
// uninstall API patch
uninstallVariableApiPatch();
// 清理规则
// clear rules
rulesClearCache();
// 清理全局函数
// clear global hooks
delete globalThis.LWB_Guard;
delete globalThis.LWB_StateV2;
// 清理守护状态
// clear guard state
guardBypass(false);
initialized = false;
}
/* ============= 导出 ============= */
/* ============ Exports ============= */
export {
MODULE_ID,
// 解析
// parsing
parseBlock,
applyVariablesForMessage,
extractPlotLogBlocks,
// 快照
// snapshots
snapshotCurrentLastFloor,
snapshotForMessageId,
rollbackToPreviousOf,
rebuildVariablesFromScratch,
// 规则
// rules
rulesGetTable,
rulesSetTable,
rulesLoadFromMeta,

View File

@@ -117,38 +117,64 @@ const VT = {
global: { getter: getGlobalVariable, setter: setGlobalVariable, storage: ()=> extension_settings.variables?.global || ((extension_settings.variables = { global: {} }).global), save: saveSettingsDebounced },
};
const LWB_RULES_KEY='LWB_RULES';
const getRulesTable = () => { try { return getContext()?.chatMetadata?.[LWB_RULES_KEY] || {}; } catch { return {}; } };
const EXT_ID = 'LittleWhiteBox';
const LWB_RULES_V1_KEY = 'LWB_RULES';
const LWB_RULES_V2_KEY = 'LWB_RULES_V2';
const getRulesTable = () => {
try {
const ctx = getContext();
const mode = extension_settings?.[EXT_ID]?.variablesMode || '1.0';
const meta = ctx?.chatMetadata || {};
return mode === '2.0'
? (meta[LWB_RULES_V2_KEY] || {})
: (meta[LWB_RULES_V1_KEY] || {});
} catch {
return {};
}
};
const pathKey = (arr)=>{ try { return (arr||[]).map(String).join('.'); } catch { return ''; } };
const getRuleNodeByPath = (arr)=> (pathKey(arr) ? (getRulesTable()||{})[pathKey(arr)] : undefined);
const hasAnyRule = (n)=>{
if(!n) return false;
if(n.ro) return true;
if(n.objectPolicy && n.objectPolicy!=='none') return true;
if(n.arrayPolicy && n.arrayPolicy!=='lock') return true;
const c=n.constraints||{};
return ('min'in c)||('max'in c)||('step'in c)||(Array.isArray(c.enum)&&c.enum.length)||(c.regex&&c.regex.source);
const hasAnyRule = (n) => {
if (!n) return false;
if (n.ro) return true;
if (n.lock) return true;
if (n.min !== undefined || n.max !== undefined) return true;
if (n.step !== undefined) return true;
if (Array.isArray(n.enum) && n.enum.length) return true;
return false;
};
const ruleTip = (n)=>{
if(!n) return '';
const lines=[], c=n.constraints||{};
if(n.ro) lines.push('只读:$ro');
if(n.objectPolicy){ const m={none:'(默认:不可增删键)',ext:'$ext可增键',prune:'$prune可删键',free:'$free可增删键'}; lines.push(`对象策略:${m[n.objectPolicy]||n.objectPolicy}`); }
if(n.arrayPolicy){ const m={lock:'(默认:不可增删项)',grow:'$grow可增项',shrink:'$shrink可删项',list:'$list可增删项'}; lines.push(`数组策略:${m[n.arrayPolicy]||n.arrayPolicy}`); }
if('min'in c||'max'in c){ if('min'in c&&'max'in c) lines.push(`范围:$range=[${c.min},${c.max}]`); else if('min'in c) lines.push(`下限:$min=${c.min}`); else lines.push(`上限:$max=${c.max}`); }
if('step'in c) lines.push(`步长:$step=${c.step}`);
if(Array.isArray(c.enum)&&c.enum.length) lines.push(`枚举:$enum={${c.enum.join(';')}}`);
if(c.regex&&c.regex.source) lines.push(`正则:$match=/${c.regex.source}/${c.regex.flags||''}`);
const ruleTip = (n) => {
if (!n) return '';
const lines = [];
if (n.ro) lines.push('只读:$ro');
if (n.lock) lines.push('结构锁:$lock禁止增删该层 key/项)');
if (n.min !== undefined || n.max !== undefined) {
const a = n.min !== undefined ? n.min : '-∞';
const b = n.max !== undefined ? n.max : '+∞';
lines.push(`范围:$range=[${a},${b}]`);
}
if (n.step !== undefined) lines.push(`步长:$step=${n.step}`);
if (Array.isArray(n.enum) && n.enum.length) lines.push(`枚举:$enum={${n.enum.join(';')}}`);
return lines.join('\n');
};
const badgesHtml = (n)=>{
if(!hasAnyRule(n)) return '';
const tip=ruleTip(n).replace(/"/g,'&quot;'), out=[];
if(n.ro) out.push(`<span class="vm-badge" data-type="ro" title="${tip}"><i class="fa-solid fa-shield-halved"></i></span>`);
if((n.objectPolicy&&n.objectPolicy!=='none')||(n.arrayPolicy&&n.arrayPolicy!=='lock')) out.push(`<span class="vm-badge" data-type="struct" title="${tip}"><i class="fa-solid fa-diagram-project"></i></span>`);
const c=n.constraints||{}; if(('min'in c)||('max'in c)||('step'in c)||(Array.isArray(c.enum)&&c.enum.length)||(c.regex&&c.regex.source)) out.push(`<span class="vm-badge" data-type="cons" title="${tip}"><i class="fa-solid fa-ruler-vertical"></i></span>`);
return out.length?`<span class="vm-badges">${out.join('')}</span>`:'';
const badgesHtml = (n) => {
if (!hasAnyRule(n)) return '';
const tip = ruleTip(n).replace(/"/g,'&quot;');
const out = [];
if (n.ro) out.push(`<span class="vm-badge" data-type="ro" title="${tip}"><i class="fa-solid fa-shield-halved"></i></span>`);
if (n.lock) out.push(`<span class="vm-badge" data-type="struct" title="${tip}"><i class="fa-solid fa-diagram-project"></i></span>`);
if ((n.min !== undefined || n.max !== undefined) || (n.step !== undefined) || (Array.isArray(n.enum) && n.enum.length)) {
out.push(`<span class="vm-badge" data-type="cons" title="${tip}"><i class="fa-solid fa-ruler-vertical"></i></span>`);
}
return out.length ? `<span class="vm-badges">${out.join('')}</span>` : '';
};
const debounce=(fn,ms=200)=>{let t;return(...a)=>{clearTimeout(t);t=setTimeout(()=>fn(...a),ms);}};
class VariablesPanel {

View File

@@ -3,7 +3,7 @@
"private": true,
"type": "module",
"scripts": {
"lint": "eslint \"**/*.js\"",
"lint": "node scripts/check-garbled.js && eslint \"**/*.js\"",
"lint:fix": "eslint \"**/*.js\" --fix"
},
"devDependencies": {

80
scripts/check-garbled.js Normal file
View File

@@ -0,0 +1,80 @@
/* eslint-env node */
import fs from 'fs';
import path from 'path';
const root = process.cwd();
const includeExts = new Set(['.js', '.html', '.css']);
const ignoreDirs = new Set(['node_modules', '.git']);
const patterns = [
{ name: 'question-marks', regex: /\?\?\?/g },
{ name: 'replacement-char', regex: /\uFFFD/g },
];
function isIgnoredDir(dirName) {
return ignoreDirs.has(dirName);
}
function walk(dir, files = []) {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
if (isIgnoredDir(entry.name)) continue;
walk(path.join(dir, entry.name), files);
} else if (entry.isFile()) {
const ext = path.extname(entry.name);
if (includeExts.has(ext)) {
files.push(path.join(dir, entry.name));
}
}
}
return files;
}
function scanFile(filePath) {
let content = '';
try {
content = fs.readFileSync(filePath, 'utf8');
} catch {
return [];
}
const lines = content.split(/\r?\n/);
const hits = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
for (const { name, regex } of patterns) {
regex.lastIndex = 0;
if (regex.test(line)) {
const preview = line.replace(/\t/g, '\\t').slice(0, 200);
hits.push({ line: i + 1, name, preview });
}
}
}
return hits;
}
const files = walk(root);
const issues = [];
for (const file of files) {
const hits = scanFile(file);
if (hits.length) {
issues.push({ file, hits });
}
}
if (issues.length) {
console.error('Garbled text check failed:');
for (const issue of issues) {
const rel = path.relative(root, issue.file);
for (const hit of issue.hits) {
console.error(`- ${rel}:${hit.line} [${hit.name}] ${hit.preview}`);
}
}
process.exit(1);
} else {
console.log('Garbled text check passed.');
}

View File

@@ -213,10 +213,16 @@
<br>
<div class="section-divider">变量控制</div>
<hr class="sysHR" />
<div class="flex-container">
<div class="flex-container" style="gap:8px;flex-wrap:wrap;align-items:center;">
<input type="checkbox" id="xiaobaix_variables_core_enabled" />
<label for="xiaobaix_variables_core_enabled">变量管理</label>
<select id="xiaobaix_variables_mode" class="text_pole" style="width:auto;margin-left:8px;padding:2px 6px;">
<option value="1.0">1.0 (plot-log)</option>
<option value="2.0">2.0 (state)</option>
</select>
</div>
<div class="flex-container">
<input type="checkbox" id="xiaobaix_variables_panel_enabled" />
<label for="xiaobaix_variables_panel_enabled">变量面板</label>