【AI】二、spring ai 调用微软云 Azure 的 gtp4-mini-tts聊天模型TTS模型(文字转音频)
本文介绍了微软Azure语音服务的部署与使用方法。首先展示语音服务配置界面,包括文本转语音和语音转文本的API端点、区域及密钥等参数。文中提供了Windows系统的cURL请求模板和调试可视化界面操作方法。重点介绍了Java代码实现文本转语音功能,包括接口参数配置、SSML语音参数设置、OkHttp请求构建及响应处理流程。代码示例展示了如何通过Azure语音服务API将文本转换为音频文件并保存为M
·
一、先部署语音服务
二、需要的配置参数
端点:
文本转语音 https://eastus2.tts.speech.microsoft.com
语音转文本 https://eastus2.stt.speech.microsoft.com
区域 eastus2
秘钥: 1IjIIlFN9jT3U0auo46n7Xyu9NGJ6Soxxxxxxxxxxxxxxxx
接口说明文档
https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/get-started-text-to-speech?tabs=windows&pivots=programming-language-rest
windows 请求官方模版
curl --location --request POST "https://%SPEECH_REGION%.tts.speech.microsoft.com/cognitiveservices/v1" ^
--header "Ocp-Apim-Subscription-Key: %SPEECH_KEY%" ^
--header "Content-Type: application/ssml+xml" ^
--header "X-Microsoft-OutputFormat: audio-16khz-128kbitrate-mono-mp3" ^
--header "User-Agent: curl" ^
--data-raw "<speak version='1.0' xml:lang='en-US'><voice xml:lang='en-US' xml:gender='Female' name='en-US-AvaMultilingualNeural'>my voice is my passport verify me</voice></speak>" --output output.mp3
三、调试 文字转音频(可视化)
点击【在广场中打开】可以调试 文字转音频
四、java代码
package com.xiaozhi.tts;
import okhttp3.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.concurrent.TimeUnit;
public class AzureTTS_Test {
private static final Logger logger = LoggerFactory.getLogger(AzureTTS_Test.class);
public static void main(String[] args) throws Exception {
//============================接口相关参数============================
//地区
String speechRegion = "eastus2";
//秘钥
String speechKey = "1IjIIlFN9jT3U0auo46n7Xyu9NGJ6SoxOGTdu5";
// 构建请求URL
String apiUrl = "https://"+ speechRegion +".tts.speech.microsoft.com/cognitiveservices/v1";
//输出文件路径+文件名
String outputFilePath = "output.mp3";
//输出文件格式
String outputFormat = "audio-16khz-128kbitrate-mono-mp3";
// ====================SSML配置参数(语音相关参数)====================
String xmlLang = "en-US"; //指定语言
String voiceGender = "Female"; //指定语音性别(男声 / 女声等)
String voiceName = "en-US-AvaMultilingualNeural"; // 指定具体的语音名称(通常对应特定的语音模型)
String textToSpeak = "需要转音频的文本"; // 需要转音频的文本
// ====================其他参数====================
String userAgent = "curl"; //请求方式
// 构建SSML请求体 (语音相关参数xml信息)
String ssmlRequestBody = "<speak version='1.0' xml:lang='"+ xmlLang +"'><voice xml:lang='"+ xmlLang +"' xml:gender='"+ voiceGender +"' name='"+ voiceName +"'>"+ textToSpeak +"</voice></speak>";
// 调用API
callTextToSpeechApi(apiUrl, speechKey, outputFormat, userAgent, ssmlRequestBody, outputFilePath);
}
/**
* 调用文本转语音API并保存结果到文件
* @param apiUrl 接口地址
* @param speechKey 秘钥
* @param outputFormat 输出格式 audio-16khz-128kbitrate-mono-mp3
* @param userAgent 请求方式
* @param ssmlRequestBody 语音相关参数xml信息
* @param outputFilePath 输出文件路径+文件名
*/
private static boolean callTextToSpeechApi(String apiUrl, String speechKey, String outputFormat, String userAgent, String ssmlRequestBody, String outputFilePath) throws Exception {
try {
// 创建OkHttpClient实例
OkHttpClient client = new OkHttpClient.Builder()
.connectTimeout(30, TimeUnit.SECONDS)
.readTimeout(30, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
// 创建请求体
RequestBody requestBody = RequestBody.create(ssmlRequestBody, MediaType.parse("application/ssml+xml"));
// 构建请求
Request request = new Request.Builder()
.url(apiUrl)
.post(requestBody)
.addHeader("Ocp-Apim-Subscription-Key", speechKey)
.addHeader("Content-Type", "application/ssml+xml")
.addHeader("X-Microsoft-OutputFormat", outputFormat)
.addHeader("User-Agent", userAgent)
.build();
try (Response response = client.newCall(request).execute()) {
try (ResponseBody responseBody = response.body()) {
if (!response.isSuccessful()) {
String errorDetails = responseBody != null ? responseBody.string() : "无详细信息";
System.err.println("请求失败: " + response.code() + " " + response.message());
System.err.println("错误详情: " + errorDetails);
return false;
}
if (responseBody == null) {
System.err.println("响应体为空");
return false;
}
// 保存音频文件
try (InputStream inputStream = responseBody.byteStream();
OutputStream outputStream = new FileOutputStream(outputFilePath)) {
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
File outputFile = new File(outputFilePath);
System.out.println("语音文件已成功保存至: " + outputFile.getAbsolutePath());
} catch (IOException e) {
logger.error("【TTS文本转音频错误】保存文件失败: {}", e.getMessage());
e.printStackTrace();
}
}
}
}catch (Exception e){
e.printStackTrace();
}
return false;
}
}
更多推荐
所有评论(0)