318 lines
11 KiB
JavaScript
318 lines
11 KiB
JavaScript
// tts-text.js
|
||
|
||
/**
|
||
* TTS 文本提取与情绪处理
|
||
*/
|
||
|
||
// ============ 文本提取 ============
|
||
|
||
export function extractSpeakText(rawText, rules = {}) {
|
||
if (!rawText || typeof rawText !== 'string') return '';
|
||
|
||
let text = rawText;
|
||
|
||
const ttsPlaceholders = [];
|
||
text = text.replace(/\[tts:[^\]]*\]/gi, (match) => {
|
||
const placeholder = `__TTS_TAG_${ttsPlaceholders.length}__`;
|
||
ttsPlaceholders.push(match);
|
||
return placeholder;
|
||
});
|
||
|
||
const ranges = Array.isArray(rules.skipRanges) ? rules.skipRanges : [];
|
||
for (const range of ranges) {
|
||
const start = String(range?.start ?? '').trim();
|
||
const end = String(range?.end ?? '').trim();
|
||
if (!start && !end) continue;
|
||
|
||
if (!start && end) {
|
||
const endIdx = text.indexOf(end);
|
||
if (endIdx !== -1) text = text.slice(endIdx + end.length);
|
||
continue;
|
||
}
|
||
|
||
if (start && !end) {
|
||
const startIdx = text.indexOf(start);
|
||
if (startIdx !== -1) text = text.slice(0, startIdx);
|
||
continue;
|
||
}
|
||
|
||
let out = '';
|
||
let i = 0;
|
||
while (true) {
|
||
const sIdx = text.indexOf(start, i);
|
||
if (sIdx === -1) {
|
||
out += text.slice(i);
|
||
break;
|
||
}
|
||
out += text.slice(i, sIdx);
|
||
const eIdx = text.indexOf(end, sIdx + start.length);
|
||
if (eIdx === -1) break;
|
||
i = eIdx + end.length;
|
||
}
|
||
text = out;
|
||
}
|
||
|
||
const readRanges = Array.isArray(rules.readRanges) ? rules.readRanges : [];
|
||
if (rules.readRangesEnabled && readRanges.length) {
|
||
const keepSpans = [];
|
||
for (const range of readRanges) {
|
||
const start = String(range?.start ?? '').trim();
|
||
const end = String(range?.end ?? '').trim();
|
||
if (!start && !end) {
|
||
keepSpans.push({ start: 0, end: text.length });
|
||
continue;
|
||
}
|
||
if (!start && end) {
|
||
const endIdx = text.indexOf(end);
|
||
if (endIdx !== -1) keepSpans.push({ start: 0, end: endIdx });
|
||
continue;
|
||
}
|
||
if (start && !end) {
|
||
const startIdx = text.indexOf(start);
|
||
if (startIdx !== -1) keepSpans.push({ start: startIdx + start.length, end: text.length });
|
||
continue;
|
||
}
|
||
let i = 0;
|
||
while (true) {
|
||
const sIdx = text.indexOf(start, i);
|
||
if (sIdx === -1) break;
|
||
const eIdx = text.indexOf(end, sIdx + start.length);
|
||
if (eIdx === -1) {
|
||
keepSpans.push({ start: sIdx + start.length, end: text.length });
|
||
break;
|
||
}
|
||
keepSpans.push({ start: sIdx + start.length, end: eIdx });
|
||
i = eIdx + end.length;
|
||
}
|
||
}
|
||
|
||
if (keepSpans.length) {
|
||
keepSpans.sort((a, b) => a.start - b.start || a.end - b.end);
|
||
const merged = [];
|
||
for (const span of keepSpans) {
|
||
if (!merged.length || span.start > merged[merged.length - 1].end) {
|
||
merged.push({ start: span.start, end: span.end });
|
||
} else {
|
||
merged[merged.length - 1].end = Math.max(merged[merged.length - 1].end, span.end);
|
||
}
|
||
}
|
||
text = merged.map(span => text.slice(span.start, span.end)).join('');
|
||
} else {
|
||
text = '';
|
||
}
|
||
}
|
||
|
||
text = text.replace(/<script[\s\S]*?<\/script>/gi, '');
|
||
text = text.replace(/<style[\s\S]*?<\/style>/gi, '');
|
||
text = text.replace(/\n{3,}/g, '\n\n').trim();
|
||
|
||
for (let i = 0; i < ttsPlaceholders.length; i++) {
|
||
text = text.replace(`__TTS_TAG_${i}__`, ttsPlaceholders[i]);
|
||
}
|
||
|
||
return text;
|
||
}
|
||
|
||
// ============ 分段解析 ============
|
||
|
||
export function parseTtsSegments(text) {
|
||
if (!text || typeof text !== 'string') return [];
|
||
|
||
const segments = [];
|
||
const re = /\[tts:([^\]]*)\]/gi;
|
||
let lastIndex = 0;
|
||
let match = null;
|
||
// 当前块的配置,每遇到新 [tts:] 块都重置
|
||
let current = { emotion: '', context: '', speaker: '' };
|
||
|
||
const pushSegment = (segmentText) => {
|
||
const t = String(segmentText || '').trim();
|
||
if (!t) return;
|
||
segments.push({
|
||
text: t,
|
||
emotion: current.emotion || '',
|
||
context: current.context || '',
|
||
speaker: current.speaker || '', // 空字符串表示使用 UI 默认
|
||
});
|
||
};
|
||
|
||
const parseDirective = (raw) => {
|
||
// ★ 关键修改:每个新块都重置为空,不继承上一个块的 speaker
|
||
const next = { emotion: '', context: '', speaker: '' };
|
||
|
||
const parts = String(raw || '').split(';').map(s => s.trim()).filter(Boolean);
|
||
for (const part of parts) {
|
||
const idx = part.indexOf('=');
|
||
if (idx === -1) continue;
|
||
const key = part.slice(0, idx).trim().toLowerCase();
|
||
let val = part.slice(idx + 1).trim();
|
||
if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith('\'') && val.endsWith('\''))) {
|
||
val = val.slice(1, -1).trim();
|
||
}
|
||
if (key === 'emotion') next.emotion = val;
|
||
if (key === 'context') next.context = val;
|
||
if (key === 'speaker') next.speaker = val;
|
||
}
|
||
current = next;
|
||
};
|
||
|
||
while ((match = re.exec(text)) !== null) {
|
||
pushSegment(text.slice(lastIndex, match.index));
|
||
parseDirective(match[1]);
|
||
lastIndex = match.index + match[0].length;
|
||
}
|
||
pushSegment(text.slice(lastIndex));
|
||
|
||
return segments;
|
||
}
|
||
|
||
|
||
// ============ 非鉴权分段切割 ============
|
||
|
||
const FREE_MAX_TEXT = 200;
|
||
const FREE_MIN_TEXT = 50;
|
||
const FREE_SENTENCE_DELIMS = new Set(['。', '!', '?', '!', '?', ';', ';', '…', '.', ',', ',', '、', ':', ':']);
|
||
|
||
function splitLongTextBySentence(text, maxLength) {
|
||
const sentences = [];
|
||
let buf = '';
|
||
for (const ch of String(text || '')) {
|
||
buf += ch;
|
||
if (FREE_SENTENCE_DELIMS.has(ch)) {
|
||
sentences.push(buf);
|
||
buf = '';
|
||
}
|
||
}
|
||
if (buf) sentences.push(buf);
|
||
|
||
const chunks = [];
|
||
let current = '';
|
||
for (const sentence of sentences) {
|
||
if (!sentence) continue;
|
||
if (sentence.length > maxLength) {
|
||
if (current) {
|
||
chunks.push(current);
|
||
current = '';
|
||
}
|
||
for (let i = 0; i < sentence.length; i += maxLength) {
|
||
chunks.push(sentence.slice(i, i + maxLength));
|
||
}
|
||
continue;
|
||
}
|
||
if (!current) {
|
||
current = sentence;
|
||
continue;
|
||
}
|
||
if (current.length + sentence.length > maxLength) {
|
||
chunks.push(current);
|
||
current = sentence;
|
||
continue;
|
||
}
|
||
current += sentence;
|
||
}
|
||
if (current) chunks.push(current);
|
||
return chunks;
|
||
}
|
||
|
||
function splitTextForFree(text, maxLength = FREE_MAX_TEXT) {
|
||
const chunks = [];
|
||
const paragraphs = String(text || '').split(/\n\s*\n/).map(s => s.replace(/\n+/g, '\n').trim()).filter(Boolean);
|
||
|
||
for (const para of paragraphs) {
|
||
if (para.length <= maxLength) {
|
||
chunks.push(para);
|
||
continue;
|
||
}
|
||
chunks.push(...splitLongTextBySentence(para, maxLength));
|
||
}
|
||
return chunks;
|
||
}
|
||
|
||
export function splitTtsSegmentsForFree(segments, maxLength = FREE_MAX_TEXT) {
|
||
if (!Array.isArray(segments) || !segments.length) return [];
|
||
const out = [];
|
||
for (const seg of segments) {
|
||
const parts = splitTextForFree(seg.text, maxLength);
|
||
if (!parts.length) continue;
|
||
let buffer = '';
|
||
for (const part of parts) {
|
||
const t = String(part || '').trim();
|
||
if (!t) continue;
|
||
if (!buffer) {
|
||
buffer = t;
|
||
continue;
|
||
}
|
||
if (buffer.length < FREE_MIN_TEXT && buffer.length + t.length <= maxLength) {
|
||
buffer += `\n${t}`;
|
||
continue;
|
||
}
|
||
out.push({
|
||
text: buffer,
|
||
emotion: seg.emotion || '',
|
||
context: seg.context || '',
|
||
speaker: seg.speaker || '',
|
||
resolvedSpeaker: seg.resolvedSpeaker || '',
|
||
resolvedSource: seg.resolvedSource || '',
|
||
});
|
||
buffer = t;
|
||
}
|
||
if (buffer) {
|
||
out.push({
|
||
text: buffer,
|
||
emotion: seg.emotion || '',
|
||
context: seg.context || '',
|
||
speaker: seg.speaker || '',
|
||
resolvedSpeaker: seg.resolvedSpeaker || '',
|
||
resolvedSource: seg.resolvedSource || '',
|
||
});
|
||
}
|
||
}
|
||
return out;
|
||
}
|
||
|
||
// ============ 默认跳过标签 ============
|
||
|
||
export const DEFAULT_SKIP_TAGS = ['状态栏'];
|
||
|
||
// ============ 情绪处理 ============
|
||
|
||
export const TTS_EMOTIONS = new Set([
|
||
'happy', 'sad', 'angry', 'surprised', 'fear', 'hate', 'excited', 'coldness', 'neutral',
|
||
'depressed', 'lovey-dovey', 'shy', 'comfort', 'tension', 'tender', 'storytelling', 'radio',
|
||
'magnetic', 'advertising', 'vocal-fry', 'asmr', 'news', 'entertainment', 'dialect',
|
||
'chat', 'warm', 'affectionate', 'authoritative',
|
||
]);
|
||
|
||
export const EMOTION_CN_MAP = {
|
||
'开心': 'happy', '高兴': 'happy', '愉悦': 'happy',
|
||
'悲伤': 'sad', '难过': 'sad',
|
||
'生气': 'angry', '愤怒': 'angry',
|
||
'惊讶': 'surprised',
|
||
'恐惧': 'fear', '害怕': 'fear',
|
||
'厌恶': 'hate',
|
||
'激动': 'excited', '兴奋': 'excited',
|
||
'冷漠': 'coldness', '中性': 'neutral', '沮丧': 'depressed',
|
||
'撒娇': 'lovey-dovey', '害羞': 'shy',
|
||
'安慰': 'comfort', '鼓励': 'comfort',
|
||
'咆哮': 'tension', '焦急': 'tension',
|
||
'温柔': 'tender',
|
||
'讲故事': 'storytelling', '自然讲述': 'storytelling',
|
||
'情感电台': 'radio', '磁性': 'magnetic',
|
||
'广告营销': 'advertising', '气泡音': 'vocal-fry',
|
||
'低语': 'asmr', '新闻播报': 'news',
|
||
'娱乐八卦': 'entertainment', '方言': 'dialect',
|
||
'对话': 'chat', '闲聊': 'chat',
|
||
'温暖': 'warm', '深情': 'affectionate', '权威': 'authoritative',
|
||
};
|
||
|
||
export function normalizeEmotion(raw) {
|
||
if (!raw) return '';
|
||
let val = String(raw).trim();
|
||
if (!val) return '';
|
||
val = EMOTION_CN_MAP[val] || EMOTION_CN_MAP[val.toLowerCase()] || val.toLowerCase();
|
||
if (val === 'vocal - fry' || val === 'vocal_fry') val = 'vocal-fry';
|
||
if (val === 'surprise') val = 'surprised';
|
||
if (val === 'scare') val = 'fear';
|
||
return TTS_EMOTIONS.has(val) ? val : '';
|
||
}
|