// tts-auth-provider.js /** * TTS 鉴权模式播放服务 * 负责火山引擎 V3 API 的调用与流式播放 */ import { synthesizeV3, synthesizeV3Stream } from './tts-api.js'; import { normalizeEmotion } from './tts-text.js'; import { getRequestHeaders } from "../../../../../../script.js"; // ============ 工具函数(内部) ============ function normalizeSpeed(value) { const num = Number.isFinite(value) ? value : 1.0; if (num >= 0.5 && num <= 2.0) return num; return Math.min(2.0, Math.max(0.5, 1 + num / 100)); } function estimateDuration(text) { return Math.max(2, Math.ceil(String(text || '').length / 4)); } function supportsStreaming() { try { return typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'); } catch { return false; } } function resolveContextTexts(context, resourceId) { const text = String(context || '').trim(); if (!text || resourceId !== 'seed-tts-2.0') return []; return [text]; } // ============ 导出的工具函数 ============ export function speedToV3SpeechRate(speed) { return Math.round((normalizeSpeed(speed) - 1) * 100); } export function inferResourceIdBySpeaker(value) { const v = (value || '').trim(); const lower = v.toLowerCase(); if (lower.startsWith('icl_') || lower.startsWith('s_')) { return 'seed-icl-2.0'; } if (v.includes('_uranus_') || v.includes('_saturn_') || v.includes('_moon_')) { return 'seed-tts-2.0'; } return 'seed-tts-1.0'; } export function buildV3Headers(resourceId, config) { const stHeaders = getRequestHeaders() || {}; const headers = { ...stHeaders, 'Content-Type': 'application/json', 'X-Api-App-Id': config.volc.appId, 'X-Api-Access-Key': config.volc.accessKey, 'X-Api-Resource-Id': resourceId, }; if (config.volc.usageReturn) { headers['X-Control-Require-Usage-Tokens-Return'] = 'text_words'; } return headers; } // ============ 参数构建 ============ function buildSynthesizeParams({ text, speaker, resourceId }, config) { const params = { providerMode: 'auth', appId: config.volc.appId, accessKey: config.volc.accessKey, resourceId, speaker, text, format: 'mp3', sampleRate: 24000, speechRate: speedToV3SpeechRate(config.volc.speechRate), loudnessRate: 0, emotionScale: config.volc.emotionScale, explicitLanguage: config.volc.explicitLanguage, disableMarkdownFilter: config.volc.disableMarkdownFilter, disableEmojiFilter: config.volc.disableEmojiFilter, enableLanguageDetector: config.volc.enableLanguageDetector, maxLengthToFilterParenthesis: config.volc.maxLengthToFilterParenthesis, postProcessPitch: config.volc.postProcessPitch, }; if (resourceId === 'seed-tts-1.0' && config.volc.useTts11 !== false) { params.model = 'seed-tts-1.1'; } if (config.volc.serverCacheEnabled) { params.cacheConfig = { text_type: 1, use_cache: true }; } return params; } // ============ 单段播放(导出供混合模式使用) ============ export async function speakSegmentAuth(messageId, segment, segmentIndex, batchId, ctx) { const { isFirst, config, player, tryLoadLocalCache, updateState } = ctx; const speaker = segment.resolvedSpeaker; const resourceId = inferResourceIdBySpeaker(speaker); const params = buildSynthesizeParams({ text: segment.text, speaker, resourceId }, config); const emotion = normalizeEmotion(segment.emotion); const contextTexts = resolveContextTexts(segment.context, resourceId); if (emotion) params.emotion = emotion; if (contextTexts.length) params.contextTexts = contextTexts; // 首段初始化状态 if (isFirst) { updateState({ status: 'sending', text: segment.text, textLength: segment.text.length, cached: false, usage: null, error: '', duration: estimateDuration(segment.text), }); } updateState({ currentSegment: segmentIndex + 1 }); // 尝试缓存 const cacheHit = await tryLoadLocalCache(params); if (cacheHit?.entry?.blob) { updateState({ cached: true, status: 'cached', audioBlob: cacheHit.entry.blob, cacheKey: cacheHit.key }); player.enqueue({ id: `msg-${messageId}-batch-${batchId}-seg-${segmentIndex}`, messageId, segmentIndex, batchId, audioBlob: cacheHit.entry.blob, text: segment.text, }); return; } const headers = buildV3Headers(resourceId, config); try { if (supportsStreaming()) { await playWithStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx); } else { await playWithoutStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx); } } catch (err) { updateState({ status: 'error', error: err?.message || '请求失败' }); } } // ============ 流式播放 ============ async function playWithStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) { const { player, storeLocalCache, buildCacheKey, updateState } = ctx; const speaker = segment.resolvedSpeaker; const resourceId = inferResourceIdBySpeaker(speaker); const controller = new AbortController(); const chunks = []; let resolved = false; const donePromise = new Promise((resolve, reject) => { const streamItem = { id: `msg-${messageId}-batch-${batchId}-seg-${segmentIndex}`, messageId, segmentIndex, batchId, text: segment.text, streamFactory: () => ({ mimeType: 'audio/mpeg', abort: () => controller.abort(), start: async (append, end, fail) => { try { const result = await synthesizeV3Stream(params, headers, { signal: controller.signal, onChunk: (bytes) => { chunks.push(bytes); append(bytes); }, }); end(); if (!resolved) { resolved = true; resolve({ audioBlob: new Blob(chunks, { type: 'audio/mpeg' }), usage: result.usage || null, logid: result.logid }); } } catch (err) { if (!resolved) { resolved = true; fail(err); reject(err); } } }, }), }; const ok = player.enqueue(streamItem); if (!ok && !resolved) { resolved = true; reject(new Error('播放队列已存在相同任务')); } }); donePromise.then(async (result) => { if (!result?.audioBlob) return; updateState({ audioBlob: result.audioBlob, usage: result.usage || null }); const cacheKey = buildCacheKey(params); updateState({ cacheKey }); await storeLocalCache(cacheKey, result.audioBlob, { text: segment.text.slice(0, 200), textLength: segment.text.length, speaker, resourceId, usage: result.usage || null, }); }).catch((err) => { if (err?.name === 'AbortError' || /aborted/i.test(err?.message || '')) return; updateState({ status: 'error', error: err?.message || '请求失败' }); }); updateState({ status: 'queued' }); } // ============ 非流式播放 ============ async function playWithoutStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) { const { player, storeLocalCache, buildCacheKey, updateState } = ctx; const speaker = segment.resolvedSpeaker; const resourceId = inferResourceIdBySpeaker(speaker); const result = await synthesizeV3(params, headers); updateState({ audioBlob: result.audioBlob, usage: result.usage, status: 'queued' }); const cacheKey = buildCacheKey(params); updateState({ cacheKey }); await storeLocalCache(cacheKey, result.audioBlob, { text: segment.text.slice(0, 200), textLength: segment.text.length, speaker, resourceId, usage: result.usage || null, }); player.enqueue({ id: `msg-${messageId}-batch-${batchId}-seg-${segmentIndex}`, messageId, segmentIndex, batchId, audioBlob: result.audioBlob, text: segment.text, }); } // ============ 主入口 ============ export async function speakMessageAuth(options) { const { messageId, segments, batchId, config, player, tryLoadLocalCache, storeLocalCache, buildCacheKey, updateState, isModuleEnabled, } = options; const ctx = { config, player, tryLoadLocalCache, storeLocalCache, buildCacheKey, updateState }; for (let i = 0; i < segments.length; i++) { if (isModuleEnabled && !isModuleEnabled()) return; await speakSegmentAuth(messageId, segments[i], i, batchId, { isFirst: i === 0, ...ctx }); } }