Add vector IO and text filtering

This commit is contained in:
2026-01-29 17:02:51 +08:00
parent fc23781e17
commit ee5f02fff9
10 changed files with 3368 additions and 42 deletions

View File

@@ -18,6 +18,7 @@ import {
} from './chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
import { filterText } from './text-filter.js';
const MODULE_ID = 'chunk-builder';
@@ -47,9 +48,9 @@ export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
const speaker = message.name || (message.is_user ? '用户' : '角色');
const isUser = !!message.is_user;
const cleanText = text
.replace(/<think>[\s\S]*?<\/think>/gi, '')
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
const cleanText = filterText(text)
.replace(/\[tts:[^\]]*\]/gi, '')
.trim();