Files
LittleWhiteBox/modules/tts/tts-api.js

336 lines
10 KiB
JavaScript
Raw Normal View History

2026-01-17 16:34:39 +08:00
/**
* 火山引擎 TTS API 封装
* V3 单向流式 + V1试用
*/
const V3_URL = 'https://openspeech.bytedance.com/api/v3/tts/unidirectional';
const FREE_V1_URL = 'https://hstts.velure.top';
export const FREE_VOICES = [
{ key: 'female_1', name: '桃夭', tag: '甜蜜仙子', gender: 'female' },
{ key: 'female_2', name: '霜华', tag: '清冷仙子', gender: 'female' },
{ key: 'female_3', name: '顾姐', tag: '御姐烟嗓', gender: 'female' },
{ key: 'female_4', name: '苏菲', tag: '优雅知性', gender: 'female' },
{ key: 'female_5', name: '嘉欣', tag: '港风甜心', gender: 'female' },
{ key: 'female_6', name: '青梅', tag: '清秀少年音', gender: 'female' },
{ key: 'female_7', name: '可莉', tag: '奶音萝莉', gender: 'female' },
{ key: 'male_1', name: '夜枭', tag: '磁性低音', gender: 'male' },
{ key: 'male_2', name: '君泽', tag: '温润公子', gender: 'male' },
{ key: 'male_3', name: '沐阳', tag: '沉稳暖男', gender: 'male' },
{ key: 'male_4', name: '梓辛', tag: '青春少年', gender: 'male' },
];
export const FREE_DEFAULT_VOICE = 'female_1';
// ============ 内部工具 ============
async function proxyFetch(url, options = {}) {
const proxyUrl = '/proxy/' + encodeURIComponent(url);
return fetch(proxyUrl, options);
}
function safeTail(value) {
return value ? String(value).slice(-4) : '';
}
// ============ V3 鉴权模式 ============
/**
* V3 单向流式合成完整下载
*/
export async function synthesizeV3(params, authHeaders = {}) {
const {
appId,
accessKey,
resourceId = 'seed-tts-2.0',
uid = 'st_user',
text,
speaker,
model,
format = 'mp3',
sampleRate = 24000,
speechRate = 0,
loudnessRate = 0,
emotion,
emotionScale,
contextTexts,
explicitLanguage,
disableMarkdownFilter = true,
disableEmojiFilter,
enableLanguageDetector,
maxLengthToFilterParenthesis,
postProcessPitch,
cacheConfig,
} = params;
if (!appId || !accessKey || !text || !speaker) {
throw new Error('缺少必要参数: appId/accessKey/text/speaker');
}
console.log('[TTS API] V3 request:', {
appIdTail: safeTail(appId),
accessKeyTail: safeTail(accessKey),
resourceId,
speaker,
textLength: text.length,
hasContextTexts: !!contextTexts?.length,
hasEmotion: !!emotion,
});
const additions = {};
if (contextTexts?.length) additions.context_texts = contextTexts;
if (explicitLanguage) additions.explicit_language = explicitLanguage;
if (disableMarkdownFilter) additions.disable_markdown_filter = true;
if (disableEmojiFilter) additions.disable_emoji_filter = true;
if (enableLanguageDetector) additions.enable_language_detector = true;
if (Number.isFinite(maxLengthToFilterParenthesis)) {
additions.max_length_to_filter_parenthesis = maxLengthToFilterParenthesis;
}
if (Number.isFinite(postProcessPitch) && postProcessPitch !== 0) {
additions.post_process = { pitch: postProcessPitch };
}
if (cacheConfig && typeof cacheConfig === 'object') {
additions.cache_config = cacheConfig;
}
const body = {
user: { uid },
req_params: {
text,
speaker,
audio_params: {
format,
sample_rate: sampleRate,
speech_rate: speechRate,
loudness_rate: loudnessRate,
},
},
};
if (model) body.req_params.model = model;
if (emotion) {
body.req_params.audio_params.emotion = emotion;
body.req_params.audio_params.emotion_scale = emotionScale || 4;
}
if (Object.keys(additions).length > 0) {
body.req_params.additions = JSON.stringify(additions);
}
const resp = await proxyFetch(V3_URL, {
method: 'POST',
headers: authHeaders,
body: JSON.stringify(body),
});
const logid = resp.headers.get('X-Tt-Logid') || '';
if (!resp.ok) {
const errText = await resp.text().catch(() => '');
throw new Error(`V3 请求失败: ${resp.status} ${errText}${logid ? ` (logid: ${logid})` : ''}`);
}
const reader = resp.body.getReader();
const decoder = new TextDecoder();
const audioChunks = [];
let usage = null;
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
if (json.data) {
const binary = atob(json.data);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
audioChunks.push(bytes);
}
if (json.code === 20000000 && json.usage) {
usage = json.usage;
}
} catch {}
}
}
if (audioChunks.length === 0) {
throw new Error(`未收到音频数据${logid ? ` (logid: ${logid})` : ''}`);
}
return {
audioBlob: new Blob(audioChunks, { type: 'audio/mpeg' }),
usage,
logid,
};
}
/**
* V3 单向流式合成边生成边回调
*/
export async function synthesizeV3Stream(params, authHeaders = {}, options = {}) {
const {
appId,
accessKey,
uid = 'st_user',
text,
speaker,
model,
format = 'mp3',
sampleRate = 24000,
speechRate = 0,
loudnessRate = 0,
emotion,
emotionScale,
contextTexts,
explicitLanguage,
disableMarkdownFilter = true,
disableEmojiFilter,
enableLanguageDetector,
maxLengthToFilterParenthesis,
postProcessPitch,
cacheConfig,
} = params;
if (!appId || !accessKey || !text || !speaker) {
throw new Error('缺少必要参数: appId/accessKey/text/speaker');
}
const additions = {};
if (contextTexts?.length) additions.context_texts = contextTexts;
if (explicitLanguage) additions.explicit_language = explicitLanguage;
if (disableMarkdownFilter) additions.disable_markdown_filter = true;
if (disableEmojiFilter) additions.disable_emoji_filter = true;
if (enableLanguageDetector) additions.enable_language_detector = true;
if (Number.isFinite(maxLengthToFilterParenthesis)) {
additions.max_length_to_filter_parenthesis = maxLengthToFilterParenthesis;
}
if (Number.isFinite(postProcessPitch) && postProcessPitch !== 0) {
additions.post_process = { pitch: postProcessPitch };
}
if (cacheConfig && typeof cacheConfig === 'object') {
additions.cache_config = cacheConfig;
}
const body = {
user: { uid },
req_params: {
text,
speaker,
audio_params: {
format,
sample_rate: sampleRate,
speech_rate: speechRate,
loudness_rate: loudnessRate,
},
},
};
if (model) body.req_params.model = model;
if (emotion) {
body.req_params.audio_params.emotion = emotion;
body.req_params.audio_params.emotion_scale = emotionScale || 4;
}
if (Object.keys(additions).length > 0) {
body.req_params.additions = JSON.stringify(additions);
}
const resp = await proxyFetch(V3_URL, {
method: 'POST',
headers: authHeaders,
body: JSON.stringify(body),
signal: options.signal,
});
const logid = resp.headers.get('X-Tt-Logid') || '';
if (!resp.ok) {
const errText = await resp.text().catch(() => '');
throw new Error(`V3 请求失败: ${resp.status} ${errText}${logid ? ` (logid: ${logid})` : ''}`);
}
const reader = resp.body?.getReader();
if (!reader) throw new Error('V3 响应流不可用');
const decoder = new TextDecoder();
let usage = null;
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
if (json.data) {
const binary = atob(json.data);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
options.onChunk?.(bytes);
}
if (json.code === 20000000 && json.usage) {
usage = json.usage;
}
} catch {}
}
}
return { usage, logid };
}
// ============ 试用模式 ============
export async function synthesizeFreeV1(params, options = {}) {
const {
voiceKey = FREE_DEFAULT_VOICE,
text,
speed = 1.0,
emotion = null,
} = params || {};
if (!text) {
throw new Error('缺少必要参数: text');
}
const requestBody = {
voiceKey,
text: String(text || ''),
speed: Number(speed) || 1.0,
uid: 'xb_' + Date.now(),
reqid: crypto.randomUUID?.() || `${Date.now()}_${Math.random().toString(36).slice(2)}`,
};
if (emotion) {
requestBody.emotion = emotion;
requestBody.emotionScale = 5;
}
const res = await fetch(FREE_V1_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: options.signal,
});
if (!res.ok) throw new Error(`TTS HTTP ${res.status}`);
const data = await res.json();
if (data.code !== 3000) throw new Error(data.message || 'TTS 合成失败');
return { audioBase64: data.data };
}