From d1a4aca02595bca00f6d3f3a5fc2e2d138e42622 Mon Sep 17 00:00:00 2001 From: RT15548 Date: Tue, 20 Jan 2026 10:36:35 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B41.0=E9=89=B4=E6=9D=83?= =?UTF-8?q?=E9=9F=B3=E8=89=B2=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/tts/tts-auth-provider.js | 11 +++-- modules/tts/tts-overlay.html | 72 +++++++++++++++------------ modules/tts/tts.js | 83 +++++++++++++++++--------------- 3 files changed, 93 insertions(+), 73 deletions(-) diff --git a/modules/tts/tts-auth-provider.js b/modules/tts/tts-auth-provider.js index 084c3a8..d7e304c 100644 --- a/modules/tts/tts-auth-provider.js +++ b/modules/tts/tts-auth-provider.js @@ -40,7 +40,10 @@ export function speedToV3SpeechRate(speed) { return Math.round((normalizeSpeed(speed) - 1) * 100); } -export function inferResourceIdBySpeaker(value) { +export function inferResourceIdBySpeaker(value, explicitResourceId = null) { + if (explicitResourceId) { + return explicitResourceId; + } const v = (value || '').trim(); const lower = v.toLowerCase(); if (lower.startsWith('icl_') || lower.startsWith('s_')) { @@ -110,7 +113,7 @@ export async function speakSegmentAuth(messageId, segment, segmentIndex, batchId } = ctx; const speaker = segment.resolvedSpeaker; - const resourceId = inferResourceIdBySpeaker(speaker); + const resourceId = segment.resolvedResourceId || inferResourceIdBySpeaker(speaker); const params = buildSynthesizeParams({ text: segment.text, speaker, resourceId }, config); const emotion = normalizeEmotion(segment.emotion); const contextTexts = resolveContextTexts(segment.context, resourceId); @@ -171,7 +174,7 @@ export async function speakSegmentAuth(messageId, segment, segmentIndex, batchId async function playWithStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) { const { player, storeLocalCache, buildCacheKey, updateState } = ctx; const speaker = segment.resolvedSpeaker; - const resourceId = inferResourceIdBySpeaker(speaker); + const resourceId = params.resourceId; const controller = new AbortController(); const chunks = []; @@ -250,7 +253,7 @@ async function playWithStreaming(messageId, segment, segmentIndex, batchId, para async function playWithoutStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) { const { player, storeLocalCache, buildCacheKey, updateState } = ctx; const speaker = segment.resolvedSpeaker; - const resourceId = inferResourceIdBySpeaker(speaker); + const resourceId = params.resourceId; const result = await synthesizeV3(params, headers); updateState({ audioBlob: result.audioBlob, usage: result.usage, status: 'queued' }); diff --git a/modules/tts/tts-overlay.html b/modules/tts/tts-overlay.html index 6f18542..332b232 100644 --- a/modules/tts/tts-overlay.html +++ b/modules/tts/tts-overlay.html @@ -1408,13 +1408,20 @@ select.input { cursor: pointer; } -
- - -
- - - +
+ + +
+
+ + +
+ + + @@ -2149,14 +2156,15 @@ function renderAuthVoiceList() { // 数据处理 // ═══════════════════════════════════════════════════════════════════════════ -function normalizeMySpeakers(list) { - if (!Array.isArray(list)) return []; - return list.map(item => ({ - name: String(item?.name || '').trim(), - value: String(item?.value || '').trim(), - source: item?.source || getVoiceSource(item?.value || ''), - })).filter(item => item.value); -} +function normalizeMySpeakers(list) { + if (!Array.isArray(list)) return []; + return list.map(item => ({ + name: String(item?.name || '').trim(), + value: String(item?.value || '').trim(), + source: item?.source || getVoiceSource(item?.value || ''), + resourceId: item?.resourceId || null, + })).filter(item => item.value); +} function applyCacheStats(stats = {}) { $('cacheCount').textContent = stats.count ?? 0; @@ -2265,13 +2273,16 @@ function doTestVoice(speaker, source, textElId, statusElId) { setTestStatus(statusElId, 'playing', '正在合成...'); - post('xb-tts:test-speak', { + const speakerItem = mySpeakers.find(s => s.value === speaker); + const resolvedResourceId = speakerItem?.resourceId; + + post('xb-tts:test-speak', { text, speaker, source, - resourceId: source === 'auth' ? inferResourceIdBySpeaker(speaker) : '', - }); -} + resourceId: source === 'auth' ? (resolvedResourceId || inferResourceIdBySpeaker(speaker)) : '', + }); +} // ═══════════════════════════════════════════════════════════════════════════ // 消息处理 @@ -2434,17 +2445,18 @@ document.addEventListener('DOMContentLoaded', () => { post('xb-tts:toast', { type: 'success', message: `已添加:${name}` }); }); - $('addMySpeakerBtn').addEventListener('click', () => { - const id = $('newVoiceId').value.trim(); - const name = $('newVoiceName').value.trim(); - if (!id) { post('xb-tts:toast', { type: 'error', message: '请输入音色ID' }); return; } - - if (!isInMyList(id)) { - mySpeakers.push({ name: name || id, value: id, source: 'auth' }); - } - selectedVoiceValue = id; - $('newVoiceId').value = ''; - $('newVoiceName').value = ''; + $('addMySpeakerBtn').addEventListener('click', () => { + const id = $('newVoiceId').value.trim(); + const name = $('newVoiceName').value.trim(); + const resourceId = $('newVoiceResourceId').value; + if (!id) { post('xb-tts:toast', { type: 'error', message: '请输入音色ID' }); return; } + + if (!isInMyList(id)) { + mySpeakers.push({ name: name || id, value: id, source: 'auth', resourceId }); + } + selectedVoiceValue = id; + $('newVoiceId').value = ''; + $('newVoiceName').value = ''; renderMyVoiceList(); updateCurrentVoiceDisplay(); diff --git a/modules/tts/tts.js b/modules/tts/tts.js index 7c6be7c..f5e72c7 100644 --- a/modules/tts/tts.js +++ b/modules/tts/tts.js @@ -250,47 +250,51 @@ function resolveSpeakerWithSource(speakerName, mySpeakers, defaultSpeaker) { }); } - if (!speakerName) { - const defaultItem = list.find(s => s.value === defaultSpeaker); - return { - value: defaultSpeaker, - source: defaultItem?.source || getVoiceSource(defaultSpeaker) - }; - } + if (!speakerName) { + const defaultItem = list.find(s => s.value === defaultSpeaker); + return { + value: defaultSpeaker, + source: defaultItem?.source || getVoiceSource(defaultSpeaker), + resourceId: defaultItem?.resourceId || null + }; + } const byName = list.find(s => s.name === speakerName); console.log('[TTS Debug] byName 查找结果:', byName); // ★ 调试 - if (byName?.value) { - return { - value: byName.value, - source: byName.source || getVoiceSource(byName.value) - }; - } + if (byName?.value) { + return { + value: byName.value, + source: byName.source || getVoiceSource(byName.value), + resourceId: byName.resourceId || null + }; + } const byValue = list.find(s => s.value === speakerName); console.log('[TTS Debug] byValue 查找结果:', byValue); // ★ 调试 - if (byValue?.value) { - return { - value: byValue.value, - source: byValue.source || getVoiceSource(byValue.value) - }; - } + if (byValue?.value) { + return { + value: byValue.value, + source: byValue.source || getVoiceSource(byValue.value), + resourceId: byValue.resourceId || null + }; + } - if (FREE_VOICE_KEYS.has(speakerName)) { - return { value: speakerName, source: 'free' }; - } + if (FREE_VOICE_KEYS.has(speakerName)) { + return { value: speakerName, source: 'free', resourceId: null }; + } // ★ 回退到默认,这是问题发生的地方 console.warn('[TTS Debug] 未找到匹配音色,回退到默认:', defaultSpeaker); - const defaultItem = list.find(s => s.value === defaultSpeaker); - return { - value: defaultSpeaker, - source: defaultItem?.source || getVoiceSource(defaultSpeaker) - }; -} + const defaultItem = list.find(s => s.value === defaultSpeaker); + return { + value: defaultSpeaker, + source: defaultItem?.source || getVoiceSource(defaultSpeaker), + resourceId: defaultItem?.resourceId || null + }; +} // ============ 缓存管理 ============ @@ -616,16 +620,17 @@ async function speakMessage(messageId, { mode = 'manual' } = {}) { return; } - const resolvedSegments = segments.map(seg => { - const resolved = seg.speaker - ? resolveSpeakerWithSource(seg.speaker, mySpeakers, defaultSpeaker) - : defaultResolved; - return { - ...seg, - resolvedSpeaker: resolved.value, - resolvedSource: resolved.source - }; - }); + const resolvedSegments = segments.map(seg => { + const resolved = seg.speaker + ? resolveSpeakerWithSource(seg.speaker, mySpeakers, defaultSpeaker) + : defaultResolved; + return { + ...seg, + resolvedSpeaker: resolved.value, + resolvedSource: resolved.source, + resolvedResourceId: resolved.resourceId + }; + }); const needsAuth = resolvedSegments.some(s => s.resolvedSource === 'auth'); if (needsAuth && !isAuthConfigured()) { @@ -1325,7 +1330,7 @@ export async function initTts() { return; } - const resourceId = options.resourceId || inferResourceIdBySpeaker(resolved.value); + const resourceId = options.resourceId || resolved.resourceId || inferResourceIdBySpeaker(resolved.value); const result = await synthesizeV3({ appId: config.volc.appId, accessKey: config.volc.accessKey,