Files
LittleWhiteBox/modules/tts/tts-auth-provider.js
2026-01-26 01:16:35 +08:00

315 lines
9.8 KiB
JavaScript

// tts-auth-provider.js
/**
* TTS 鉴权模式播放服务
* 负责火山引擎 V3 API 的调用与流式播放
*/
import { synthesizeV3, synthesizeV3Stream } from './tts-api.js';
import { normalizeEmotion } from './tts-text.js';
import { getRequestHeaders } from "../../../../../../script.js";
// ============ 工具函数(内部) ============
function normalizeSpeed(value) {
const num = Number.isFinite(value) ? value : 1.0;
if (num >= 0.5 && num <= 2.0) return num;
return Math.min(2.0, Math.max(0.5, 1 + num / 100));
}
function estimateDuration(text) {
return Math.max(2, Math.ceil(String(text || '').length / 4));
}
function supportsStreaming() {
try {
return typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg');
} catch {
return false;
}
}
function resolveContextTexts(context, resourceId) {
const text = String(context || '').trim();
if (!text || resourceId !== 'seed-tts-2.0') return [];
return [text];
}
// ============ 导出的工具函数 ============
export function speedToV3SpeechRate(speed) {
return Math.round((normalizeSpeed(speed) - 1) * 100);
}
export function inferResourceIdBySpeaker(value, explicitResourceId = null) {
if (explicitResourceId) {
return explicitResourceId;
}
const v = (value || '').trim();
const lower = v.toLowerCase();
if (lower.startsWith('icl_') || lower.startsWith('s_')) {
return 'seed-icl-2.0';
}
if (v.includes('_uranus_') || v.includes('_saturn_') || v.includes('_moon_')) {
return 'seed-tts-2.0';
}
return 'seed-tts-1.0';
}
export function buildV3Headers(resourceId, config) {
const stHeaders = getRequestHeaders() || {};
const headers = {
...stHeaders,
'Content-Type': 'application/json',
'X-Api-App-Id': config.volc.appId,
'X-Api-Access-Key': config.volc.accessKey,
'X-Api-Resource-Id': resourceId,
};
if (config.volc.usageReturn) {
headers['X-Control-Require-Usage-Tokens-Return'] = 'text_words';
}
return headers;
}
// ============ 参数构建 ============
function buildSynthesizeParams({ text, speaker, resourceId }, config) {
const params = {
providerMode: 'auth',
appId: config.volc.appId,
accessKey: config.volc.accessKey,
resourceId,
speaker,
text,
format: 'mp3',
sampleRate: 24000,
speechRate: speedToV3SpeechRate(config.volc.speechRate),
loudnessRate: 0,
emotionScale: config.volc.emotionScale,
explicitLanguage: config.volc.explicitLanguage,
disableMarkdownFilter: config.volc.disableMarkdownFilter,
disableEmojiFilter: config.volc.disableEmojiFilter,
enableLanguageDetector: config.volc.enableLanguageDetector,
maxLengthToFilterParenthesis: config.volc.maxLengthToFilterParenthesis,
postProcessPitch: config.volc.postProcessPitch,
};
if (resourceId === 'seed-tts-1.0' && config.volc.useTts11 !== false) {
params.model = 'seed-tts-1.1';
}
if (config.volc.serverCacheEnabled) {
params.cacheConfig = { text_type: 1, use_cache: true };
}
return params;
}
// ============ 单段播放(导出供混合模式使用) ============
export async function speakSegmentAuth(messageId, segment, segmentIndex, batchId, ctx) {
const {
isFirst,
config,
player,
tryLoadLocalCache,
updateState
} = ctx;
const speaker = segment.resolvedSpeaker;
const resourceId = segment.resolvedResourceId || inferResourceIdBySpeaker(speaker);
const params = buildSynthesizeParams({ text: segment.text, speaker, resourceId }, config);
const emotion = normalizeEmotion(segment.emotion);
const contextTexts = resolveContextTexts(segment.context, resourceId);
if (emotion) params.emotion = emotion;
if (contextTexts.length) params.contextTexts = contextTexts;
// 首段初始化状态
if (isFirst) {
updateState({
status: 'sending',
text: segment.text,
textLength: segment.text.length,
cached: false,
usage: null,
error: '',
duration: estimateDuration(segment.text),
});
}
updateState({ currentSegment: segmentIndex + 1 });
// 尝试缓存
const cacheHit = await tryLoadLocalCache(params);
if (cacheHit?.entry?.blob) {
updateState({
cached: true,
status: 'cached',
audioBlob: cacheHit.entry.blob,
cacheKey: cacheHit.key
});
player.enqueue({
id: `msg-${messageId}-batch-${batchId}-seg-${segmentIndex}`,
messageId,
segmentIndex,
batchId,
audioBlob: cacheHit.entry.blob,
text: segment.text,
});
return;
}
const headers = buildV3Headers(resourceId, config);
try {
if (supportsStreaming()) {
await playWithStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx);
} else {
await playWithoutStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx);
}
} catch (err) {
updateState({ status: 'error', error: err?.message || '请求失败' });
}
}
// ============ 流式播放 ============
async function playWithStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) {
const { player, storeLocalCache, buildCacheKey, updateState } = ctx;
const speaker = segment.resolvedSpeaker;
const resourceId = params.resourceId;
const controller = new AbortController();
const chunks = [];
let resolved = false;
const donePromise = new Promise((resolve, reject) => {
const streamItem = {
id: `msg-${messageId}-batch-${batchId}-seg-${segmentIndex}`,
messageId,
segmentIndex,
batchId,
text: segment.text,
streamFactory: () => ({
mimeType: 'audio/mpeg',
abort: () => controller.abort(),
start: async (append, end, fail) => {
try {
const result = await synthesizeV3Stream(params, headers, {
signal: controller.signal,
onChunk: (bytes) => {
chunks.push(bytes);
append(bytes);
},
});
end();
if (!resolved) {
resolved = true;
resolve({
audioBlob: new Blob(chunks, { type: 'audio/mpeg' }),
usage: result.usage || null,
logid: result.logid
});
}
} catch (err) {
if (!resolved) {
resolved = true;
fail(err);
reject(err);
}
}
},
}),
};
const ok = player.enqueue(streamItem);
if (!ok && !resolved) {
resolved = true;
reject(new Error('播放队列已存在相同任务'));
}
});
donePromise.then(async (result) => {
if (!result?.audioBlob) return;
updateState({ audioBlob: result.audioBlob, usage: result.usage || null });
const cacheKey = buildCacheKey(params);
updateState({ cacheKey });
await storeLocalCache(cacheKey, result.audioBlob, {
text: segment.text.slice(0, 200),
textLength: segment.text.length,
speaker,
resourceId,
usage: result.usage || null,
});
}).catch((err) => {
if (err?.name === 'AbortError' || /aborted/i.test(err?.message || '')) return;
updateState({ status: 'error', error: err?.message || '请求失败' });
});
updateState({ status: 'queued' });
}
// ============ 非流式播放 ============
async function playWithoutStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) {
const { player, storeLocalCache, buildCacheKey, updateState } = ctx;
const speaker = segment.resolvedSpeaker;
const resourceId = params.resourceId;
const result = await synthesizeV3(params, headers);
updateState({ audioBlob: result.audioBlob, usage: result.usage, status: 'queued' });
const cacheKey = buildCacheKey(params);
updateState({ cacheKey });
await storeLocalCache(cacheKey, result.audioBlob, {
text: segment.text.slice(0, 200),
textLength: segment.text.length,
speaker,
resourceId,
usage: result.usage || null,
});
player.enqueue({
id: `msg-${messageId}-batch-${batchId}-seg-${segmentIndex}`,
messageId,
segmentIndex,
batchId,
audioBlob: result.audioBlob,
text: segment.text,
});
}
// ============ 主入口 ============
export async function speakMessageAuth(options) {
const {
messageId,
segments,
batchId,
config,
player,
tryLoadLocalCache,
storeLocalCache,
buildCacheKey,
updateState,
isModuleEnabled,
} = options;
const ctx = {
config,
player,
tryLoadLocalCache,
storeLocalCache,
buildCacheKey,
updateState
};
for (let i = 0; i < segments.length; i++) {
if (isModuleEnabled && !isModuleEnabled()) return;
await speakSegmentAuth(messageId, segments[i], i, batchId, {
isFirst: i === 0,
...ctx
});
}
}