Files
LittleWhiteBox/modules/tts/tts-text.js
2026-01-17 16:34:39 +08:00

318 lines
11 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// tts-text.js
/**
* TTS 文本提取与情绪处理
*/
// ============ 文本提取 ============
export function extractSpeakText(rawText, rules = {}) {
if (!rawText || typeof rawText !== 'string') return '';
let text = rawText;
const ttsPlaceholders = [];
text = text.replace(/\[tts:[^\]]*\]/gi, (match) => {
const placeholder = `__TTS_TAG_${ttsPlaceholders.length}__`;
ttsPlaceholders.push(match);
return placeholder;
});
const ranges = Array.isArray(rules.skipRanges) ? rules.skipRanges : [];
for (const range of ranges) {
const start = String(range?.start ?? '').trim();
const end = String(range?.end ?? '').trim();
if (!start && !end) continue;
if (!start && end) {
const endIdx = text.indexOf(end);
if (endIdx !== -1) text = text.slice(endIdx + end.length);
continue;
}
if (start && !end) {
const startIdx = text.indexOf(start);
if (startIdx !== -1) text = text.slice(0, startIdx);
continue;
}
let out = '';
let i = 0;
while (true) {
const sIdx = text.indexOf(start, i);
if (sIdx === -1) {
out += text.slice(i);
break;
}
out += text.slice(i, sIdx);
const eIdx = text.indexOf(end, sIdx + start.length);
if (eIdx === -1) break;
i = eIdx + end.length;
}
text = out;
}
const readRanges = Array.isArray(rules.readRanges) ? rules.readRanges : [];
if (rules.readRangesEnabled && readRanges.length) {
const keepSpans = [];
for (const range of readRanges) {
const start = String(range?.start ?? '').trim();
const end = String(range?.end ?? '').trim();
if (!start && !end) {
keepSpans.push({ start: 0, end: text.length });
continue;
}
if (!start && end) {
const endIdx = text.indexOf(end);
if (endIdx !== -1) keepSpans.push({ start: 0, end: endIdx });
continue;
}
if (start && !end) {
const startIdx = text.indexOf(start);
if (startIdx !== -1) keepSpans.push({ start: startIdx + start.length, end: text.length });
continue;
}
let i = 0;
while (true) {
const sIdx = text.indexOf(start, i);
if (sIdx === -1) break;
const eIdx = text.indexOf(end, sIdx + start.length);
if (eIdx === -1) {
keepSpans.push({ start: sIdx + start.length, end: text.length });
break;
}
keepSpans.push({ start: sIdx + start.length, end: eIdx });
i = eIdx + end.length;
}
}
if (keepSpans.length) {
keepSpans.sort((a, b) => a.start - b.start || a.end - b.end);
const merged = [];
for (const span of keepSpans) {
if (!merged.length || span.start > merged[merged.length - 1].end) {
merged.push({ start: span.start, end: span.end });
} else {
merged[merged.length - 1].end = Math.max(merged[merged.length - 1].end, span.end);
}
}
text = merged.map(span => text.slice(span.start, span.end)).join('');
} else {
text = '';
}
}
text = text.replace(/<script[\s\S]*?<\/script>/gi, '');
text = text.replace(/<style[\s\S]*?<\/style>/gi, '');
text = text.replace(/\n{3,}/g, '\n\n').trim();
for (let i = 0; i < ttsPlaceholders.length; i++) {
text = text.replace(`__TTS_TAG_${i}__`, ttsPlaceholders[i]);
}
return text;
}
// ============ 分段解析 ============
export function parseTtsSegments(text) {
if (!text || typeof text !== 'string') return [];
const segments = [];
const re = /\[tts:([^\]]*)\]/gi;
let lastIndex = 0;
let match = null;
// 当前块的配置,每遇到新 [tts:] 块都重置
let current = { emotion: '', context: '', speaker: '' };
const pushSegment = (segmentText) => {
const t = String(segmentText || '').trim();
if (!t) return;
segments.push({
text: t,
emotion: current.emotion || '',
context: current.context || '',
speaker: current.speaker || '', // 空字符串表示使用 UI 默认
});
};
const parseDirective = (raw) => {
// ★ 关键修改:每个新块都重置为空,不继承上一个块的 speaker
const next = { emotion: '', context: '', speaker: '' };
const parts = String(raw || '').split(';').map(s => s.trim()).filter(Boolean);
for (const part of parts) {
const idx = part.indexOf('=');
if (idx === -1) continue;
const key = part.slice(0, idx).trim().toLowerCase();
let val = part.slice(idx + 1).trim();
if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith('\'') && val.endsWith('\''))) {
val = val.slice(1, -1).trim();
}
if (key === 'emotion') next.emotion = val;
if (key === 'context') next.context = val;
if (key === 'speaker') next.speaker = val;
}
current = next;
};
while ((match = re.exec(text)) !== null) {
pushSegment(text.slice(lastIndex, match.index));
parseDirective(match[1]);
lastIndex = match.index + match[0].length;
}
pushSegment(text.slice(lastIndex));
return segments;
}
// ============ 非鉴权分段切割 ============
const FREE_MAX_TEXT = 200;
const FREE_MIN_TEXT = 50;
const FREE_SENTENCE_DELIMS = new Set(['。', '', '', '!', '?', ';', '', '…', '.', '', ',', '、', ':', '']);
function splitLongTextBySentence(text, maxLength) {
const sentences = [];
let buf = '';
for (const ch of String(text || '')) {
buf += ch;
if (FREE_SENTENCE_DELIMS.has(ch)) {
sentences.push(buf);
buf = '';
}
}
if (buf) sentences.push(buf);
const chunks = [];
let current = '';
for (const sentence of sentences) {
if (!sentence) continue;
if (sentence.length > maxLength) {
if (current) {
chunks.push(current);
current = '';
}
for (let i = 0; i < sentence.length; i += maxLength) {
chunks.push(sentence.slice(i, i + maxLength));
}
continue;
}
if (!current) {
current = sentence;
continue;
}
if (current.length + sentence.length > maxLength) {
chunks.push(current);
current = sentence;
continue;
}
current += sentence;
}
if (current) chunks.push(current);
return chunks;
}
function splitTextForFree(text, maxLength = FREE_MAX_TEXT) {
const chunks = [];
const paragraphs = String(text || '').split(/\n\s*\n/).map(s => s.replace(/\n+/g, '\n').trim()).filter(Boolean);
for (const para of paragraphs) {
if (para.length <= maxLength) {
chunks.push(para);
continue;
}
chunks.push(...splitLongTextBySentence(para, maxLength));
}
return chunks;
}
export function splitTtsSegmentsForFree(segments, maxLength = FREE_MAX_TEXT) {
if (!Array.isArray(segments) || !segments.length) return [];
const out = [];
for (const seg of segments) {
const parts = splitTextForFree(seg.text, maxLength);
if (!parts.length) continue;
let buffer = '';
for (const part of parts) {
const t = String(part || '').trim();
if (!t) continue;
if (!buffer) {
buffer = t;
continue;
}
if (buffer.length < FREE_MIN_TEXT && buffer.length + t.length <= maxLength) {
buffer += `\n${t}`;
continue;
}
out.push({
text: buffer,
emotion: seg.emotion || '',
context: seg.context || '',
speaker: seg.speaker || '',
resolvedSpeaker: seg.resolvedSpeaker || '',
resolvedSource: seg.resolvedSource || '',
});
buffer = t;
}
if (buffer) {
out.push({
text: buffer,
emotion: seg.emotion || '',
context: seg.context || '',
speaker: seg.speaker || '',
resolvedSpeaker: seg.resolvedSpeaker || '',
resolvedSource: seg.resolvedSource || '',
});
}
}
return out;
}
// ============ 默认跳过标签 ============
export const DEFAULT_SKIP_TAGS = ['状态栏'];
// ============ 情绪处理 ============
export const TTS_EMOTIONS = new Set([
'happy', 'sad', 'angry', 'surprised', 'fear', 'hate', 'excited', 'coldness', 'neutral',
'depressed', 'lovey-dovey', 'shy', 'comfort', 'tension', 'tender', 'storytelling', 'radio',
'magnetic', 'advertising', 'vocal-fry', 'asmr', 'news', 'entertainment', 'dialect',
'chat', 'warm', 'affectionate', 'authoritative',
]);
export const EMOTION_CN_MAP = {
'开心': 'happy', '高兴': 'happy', '愉悦': 'happy',
'悲伤': 'sad', '难过': 'sad',
'生气': 'angry', '愤怒': 'angry',
'惊讶': 'surprised',
'恐惧': 'fear', '害怕': 'fear',
'厌恶': 'hate',
'激动': 'excited', '兴奋': 'excited',
'冷漠': 'coldness', '中性': 'neutral', '沮丧': 'depressed',
'撒娇': 'lovey-dovey', '害羞': 'shy',
'安慰': 'comfort', '鼓励': 'comfort',
'咆哮': 'tension', '焦急': 'tension',
'温柔': 'tender',
'讲故事': 'storytelling', '自然讲述': 'storytelling',
'情感电台': 'radio', '磁性': 'magnetic',
'广告营销': 'advertising', '气泡音': 'vocal-fry',
'低语': 'asmr', '新闻播报': 'news',
'娱乐八卦': 'entertainment', '方言': 'dialect',
'对话': 'chat', '闲聊': 'chat',
'温暖': 'warm', '深情': 'affectionate', '权威': 'authoritative',
};
export function normalizeEmotion(raw) {
if (!raw) return '';
let val = String(raw).trim();
if (!val) return '';
val = EMOTION_CN_MAP[val] || EMOTION_CN_MAP[val.toLowerCase()] || val.toLowerCase();
if (val === 'vocal - fry' || val === 'vocal_fry') val = 'vocal-fry';
if (val === 'surprise') val = 'surprised';
if (val === 'scare') val = 'fear';
return TTS_EMOTIONS.has(val) ? val : '';
}