上傳檔案到「modules/tts」

2026-01-20 02:33:21 +00:00
parent 215ef201ee
commit 3197e245bb
3 changed files with 3754 additions and 3734 deletions
--- a/modules/tts/tts-auth-provider.js
+++ b/modules/tts/tts-auth-provider.js
@@ -40,7 +40,10 @@ export function speedToV3SpeechRate(speed) {
    return Math.round((normalizeSpeed(speed) - 1) * 100);
 }
-export function inferResourceIdBySpeaker(value) {
+export function inferResourceIdBySpeaker(value, explicitResourceId = null) {
    if (explicitResourceId) {
        return explicitResourceId;
    }
    const v = (value || '').trim();
    const lower = v.toLowerCase();
    if (lower.startsWith('icl_') || lower.startsWith('s_')) {
@@ -110,7 +113,7 @@ export async function speakSegmentAuth(messageId, segment, segmentIndex, batchId
    } = ctx;
    const speaker = segment.resolvedSpeaker;
-    const resourceId = inferResourceIdBySpeaker(speaker);
+    const resourceId = segment.resolvedResourceId || inferResourceIdBySpeaker(speaker);
    const params = buildSynthesizeParams({ text: segment.text, speaker, resourceId }, config);
    const emotion = normalizeEmotion(segment.emotion);
    const contextTexts = resolveContextTexts(segment.context, resourceId);
@@ -171,7 +174,7 @@ export async function speakSegmentAuth(messageId, segment, segmentIndex, batchId
 async function playWithStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) {
    const { player, storeLocalCache, buildCacheKey, updateState } = ctx;
    const speaker = segment.resolvedSpeaker;
-    const resourceId = inferResourceIdBySpeaker(speaker);
+    const resourceId = params.resourceId;
    const controller = new AbortController();
    const chunks = [];
@@ -250,7 +253,7 @@ async function playWithStreaming(messageId, segment, segmentIndex, batchId, para
 async function playWithoutStreaming(messageId, segment, segmentIndex, batchId, params, headers, ctx) {
    const { player, storeLocalCache, buildCacheKey, updateState } = ctx;
    const speaker = segment.resolvedSpeaker;
-    const resourceId = inferResourceIdBySpeaker(speaker);
+    const resourceId = params.resourceId;
    const result = await synthesizeV3(params, headers);
    updateState({ audioBlob: result.audioBlob, usage: result.usage, status: 'queued' });
--- a/modules/tts/tts-overlay.html
+++ b/modules/tts/tts-overlay.html
@@ -1412,6 +1412,13 @@ select.input { cursor: pointer; }
                                    <label class="form-label">名称</label>
                                    <input type="text" id="newVoiceName" class="input" placeholder="显示名称">
                                </div>
                                <div class="form-group">
                                    <label class="form-label">复刻版本</label>
                                    <select id="newVoiceResourceId" class="input">
                                        <option value="seed-icl-2.0">复刻 2.0</option>
                                        <option value="seed-icl-1.0">复刻 1.0</option>
                                    </select>
                                </div>
                                <button class="btn btn-primary" id="addMySpeakerBtn" style="margin-top: 18px;"><i class="fa-solid fa-plus"></i></button>
                            </div>
                        </div>
@@ -2155,6 +2162,7 @@ function normalizeMySpeakers(list) {
        name: String(item?.name || '').trim(),
        value: String(item?.value || '').trim(),
        source: item?.source || getVoiceSource(item?.value || ''),
        resourceId: item?.resourceId || null,
    })).filter(item => item.value);
 }
@@ -2265,11 +2273,14 @@ function doTestVoice(speaker, source, textElId, statusElId) {
    setTestStatus(statusElId, 'playing', '正在合成...');
    const speakerItem = mySpeakers.find(s => s.value === speaker);
    const resolvedResourceId = speakerItem?.resourceId;
    post('xb-tts:test-speak', {
        text,
        speaker,
        source,
-        resourceId: source === 'auth' ? inferResourceIdBySpeaker(speaker) : '',
+        resourceId: source === 'auth' ? (resolvedResourceId || inferResourceIdBySpeaker(speaker)) : '',
    });
 }
@@ -2437,10 +2448,11 @@ document.addEventListener('DOMContentLoaded', () => {
    $('addMySpeakerBtn').addEventListener('click', () => {
        const id = $('newVoiceId').value.trim();
        const name = $('newVoiceName').value.trim();
        const resourceId = $('newVoiceResourceId').value;
        if (!id) { post('xb-tts:toast', { type: 'error', message: '请输入音色ID' }); return; }
        if (!isInMyList(id)) {
-            mySpeakers.push({ name: name || id, value: id, source: 'auth' });
+            mySpeakers.push({ name: name || id, value: id, source: 'auth', resourceId });
        }
        selectedVoiceValue = id;
        $('newVoiceId').value = '';
--- a/modules/tts/tts.js
+++ b/modules/tts/tts.js
@@ -254,7 +254,8 @@ function resolveSpeakerWithSource(speakerName, mySpeakers, defaultSpeaker) {
        const defaultItem = list.find(s => s.value === defaultSpeaker);
        return {
            value: defaultSpeaker,
-            source: defaultItem?.source || getVoiceSource(defaultSpeaker)
+            source: defaultItem?.source || getVoiceSource(defaultSpeaker),
            resourceId: defaultItem?.resourceId || null
        };
    }
@@ -264,7 +265,8 @@ function resolveSpeakerWithSource(speakerName, mySpeakers, defaultSpeaker) {
    if (byName?.value) {
        return {
            value: byName.value,
-            source: byName.source || getVoiceSource(byName.value)
+            source: byName.source || getVoiceSource(byName.value),
            resourceId: byName.resourceId || null
        };
    }
@@ -274,12 +276,13 @@ function resolveSpeakerWithSource(speakerName, mySpeakers, defaultSpeaker) {
    if (byValue?.value) {
        return {
            value: byValue.value,
-            source: byValue.source || getVoiceSource(byValue.value)
+            source: byValue.source || getVoiceSource(byValue.value),
            resourceId: byValue.resourceId || null
        };
    }
    if (FREE_VOICE_KEYS.has(speakerName)) {
-        return { value: speakerName, source: 'free' };
+        return { value: speakerName, source: 'free', resourceId: null };
    }
    // ★ 回退到默认，这是问题发生的地方
@@ -288,7 +291,8 @@ function resolveSpeakerWithSource(speakerName, mySpeakers, defaultSpeaker) {
    const defaultItem = list.find(s => s.value === defaultSpeaker);
    return {
        value: defaultSpeaker,
-        source: defaultItem?.source || getVoiceSource(defaultSpeaker)
+        source: defaultItem?.source || getVoiceSource(defaultSpeaker),
        resourceId: defaultItem?.resourceId || null
    };
 }
@@ -623,7 +627,8 @@ async function speakMessage(messageId, { mode = 'manual' } = {}) {
        return { 
            ...seg, 
            resolvedSpeaker: resolved.value, 
-            resolvedSource: resolved.source 
+            resolvedSource: resolved.source,
            resolvedResourceId: resolved.resourceId
        };
    });
@@ -1325,7 +1330,7 @@ export async function initTts() {
                    return;
                }
-                const resourceId = options.resourceId || inferResourceIdBySpeaker(resolved.value);
+                const resourceId = options.resourceId || resolved.resourceId || inferResourceIdBySpeaker(resolved.value);
                const result = await synthesizeV3({
                    appId: config.volc.appId,
                    accessKey: config.volc.accessKey,