一、介绍
语音合成标记语言 (SSML) 是一种基于 XML 的标记语言,可用于微调文本转语音输出属性,例如音调、发音、语速、音量等。 与纯文本输入相比,它可以提供更多的控制权和灵活性。
二、代码
public class AzureTextToSpeakSystem
{
//进行语音合成的终结点,这个好像也是后台同学给的,
private const string EndpointUri = "https://chinaeast2.tts.speech.azure.cn/**********/v1";
//服务资源的密钥,后端同学给的,不知道从哪来的
private const string Key = "********";
TaskIE.Task _getVoiceTask;
UnityWebRequest _speechRequest;
UploadHandlerRaw _uploadHandlerRaw;
DownloadHandlerAudioClip _audioClipHandler;
protected override void OnInit()
{
}
/// <summary>
/// 语音合成::当前角色,用于在线生成音频
/// </summary>
/// <param _roleName="_msg"></param>
/// <param _roleName="_callback"></param>
public void TextToSpeech(string _msg, Action<AudioClip, Role> _callback)
{
_getVoiceTask = new TaskIE.Task( GetCurrentRoleVoice(_msg, _callback,this.GetModel<IGameModel>().CurrentRole));
}
/// <summary>
/// 语音合成::指定角色,用于提前生成本地音频
/// </summary>
/// <param _roleName="_msg"></param>
/// <param _roleName="_callback"></param>
public void TextToSpeech(string _msg, Action<AudioClip,Role> _callback,Role role)
{
ActionKit.Coroutine(() => GetAssignRoleVoice (_msg, _callback, role)).StartGlobal();
}
/// <summary>
/// restful api语音合成
/// </summary>
/// <param _roleName="_msg"></param>
/// <param _roleName="_callback"></param>
/// <returns></returns>
private IEnumerator GetCurrentRoleVoice(string _msg, Action<AudioClip, Role> _callback, Role role)
{
//发送报文
string textToSpeechRequestBody = GenerateTextToSpeech(_msg, role);
using (_speechRequest = new UnityWebRequest(EndpointUri, "POST"))
{
byte[] data = System.Text.Encoding.UTF8.GetBytes(textToSpeechRequestBody);
_speechRequest.SetRequestHeader("Ocp-Apim-Subscription-Key", Key);
_speechRequest.SetRequestHeader("X-Microsoft-OutputFormat", "riff-24khz-16bit-mono-pcm");
_speechRequest.SetRequestHeader("Content-Type", "application/ssml+xml");
using (_uploadHandlerRaw = new UploadHandlerRaw(data))
{
using (_audioClipHandler = new DownloadHandlerAudioClip(_speechRequest.uri, AudioType.WAV))
{
_speechRequest.uploadHandler = _uploadHandlerRaw;
_speechRequest.downloadHandler = _audioClipHandler;
yield return _speechRequest.SendWebRequest();
if (_speechRequest.responseCode == 200)
{
AudioClip audioClip = DownloadHandlerAudioClip.GetContent(_speechRequest);
_callback(audioClip, role);
}
else
{
Debug.LogError("语音合成失败: " + _speechRequest.error);
}
_audioClipHandler.Dispose();
_uploadHandlerRaw.Dispose();
_speechRequest.disposeDownloadHandlerOnDispose = true;
_speechRequest.disposeUploadHandlerOnDispose = true;
_speechRequest.Dispose();
}
}
}
}
int retryAttempts = 0;
int maxRetries = 5;
private IEnumerator GetAssignRoleVoice(string _msg, Action<AudioClip, Role> _callback, Role role)
{
//发送报文
string textToSpeechRequestBody= GenerateTextToSpeech(_msg, role);
retryAttempts = 0;
while (true)
{
using (UnityWebRequest _speechRequest = new UnityWebRequest(EndpointUri, "POST"))
{
byte[] data = System.Text.Encoding.UTF8.GetBytes(textToSpeechRequestBody);
_speechRequest.SetRequestHeader("Ocp-Apim-Subscription-Key", Key);
_speechRequest.SetRequestHeader("X-Microsoft-OutputFormat", "riff-24khz-16bit-mono-pcm");
_speechRequest.SetRequestHeader("Content-Type", "application/ssml+xml");
using (UploadHandlerRaw _uploadHandlerRaw = new UploadHandlerRaw(data))
{
using (DownloadHandlerAudioClip _audioClipHandler = new DownloadHandlerAudioClip(_speechRequest.uri, AudioType.WAV))
{
_speechRequest.uploadHandler = _uploadHandlerRaw;
_speechRequest.downloadHandler = _audioClipHandler;
_speechRequest.timeout = 10;//10秒超时
yield return _speechRequest.SendWebRequest();
if (_speechRequest.result == UnityWebRequest.Result.ConnectionError || _speechRequest.result == UnityWebRequest.Result.ProtocolError)
{
Debug.Log(_speechRequest.responseCode);
if (_speechRequest.responseCode == 429)
{
int waitTime = (int)Math.Pow(2, retryAttempts);
Debug.LogWarning("Received 429 - Too Many Requests. Retrying in " + waitTime + " seconds.");
yield return new WaitForSeconds(waitTime);
retryAttempts++;
}
else
{
Debug.LogError("语音合成失败:"+ _speechRequest.responseCode);
}
}
else
{
if (_speechRequest.responseCode == 200)
{
AudioClip audioClip = DownloadHandlerAudioClip.GetContent(_speechRequest);
_callback(audioClip, role);
}
else
{
Debug.LogError("语音合成失败: " + _speechRequest.error);
Debug.Log(_speechRequest.result);
Debug.Log(_speechRequest.responseCode);
}
_audioClipHandler.Dispose();
_uploadHandlerRaw.Dispose();
_speechRequest.disposeDownloadHandlerOnDispose = true;
_speechRequest.disposeUploadHandlerOnDispose = true;
_speechRequest.Dispose();
break;
}
}
}
}
}
}
public string GenerateTextToSpeech(string text, Role role)
{
RoleVoice voiceRole = this.GetSystem<IRoleSystem>().GetRoleVoice(role);
string xml = string.Format(@"<speak version=""1.0"" xmlns=""http://www.w3.org/2001/10/synthesis""
xmlns:mstts=""https://www.w3.org/2001/mstts"" xml:lang=""{0}"">
<voice name=""{1}"">
<mstts:express-as style=""{2}"" styledegree=""{3}"" role=""{7}"">
<prosody rate=""{5}"" pitch=""{6}"" volume=""{8}"">
{4}
</prosody>
</mstts:express-as>
</voice>
</speak>", voiceRole.Language, voiceRole.VoiceName, voiceRole.Style, voiceRole.StyleDegree, text, voiceRole.RateLevel==null? voiceRole.Rate + "%": voiceRole.RateLevel, voiceRole.PitchLevel==null? voiceRole.Pitch + "%": voiceRole.PitchLevel, voiceRole.Role, voiceRole.VolumeLevel);
return xml;
}
public void BreakOperation()
{
if (_getVoiceTask != null)
{
_getVoiceTask.Stop();
}
if (_audioClipHandler != null)
{
_audioClipHandler.Dispose();
}
if (_uploadHandlerRaw != null)
{
_uploadHandlerRaw.Dispose();
}
if(_speechRequest!=null)
{
_speechRequest.disposeDownloadHandlerOnDispose = true;
_speechRequest.disposeUploadHandlerOnDispose = true;
_speechRequest.Dispose();
}
}
}
/// <summary>
/// 角色的声音
/// </summary>
public class RoleVoice
{
public string VoiceName;//使用速度声音角色名
public string Language = "zh-CN";
public string Style = "";//感情,讲话风格
public float StyleDegree = 1;//讲话风格强度,范围为0.01-2,默认值为1
public int Rate = 0;//讲话语速,范围为-100%~200%
public int Pitch = 0;//音调
public string Role = "default";//角色
public string VolumeLevel = "default";//音量等级
public string RateLevel ;
public string PitchLevel;
}
//RoleVoice申明示例如下:
RoleVoice luBao = new RoleVoice();
luBao.VoiceName = "zh-CN-YunxiNeural";
luBao.Style = "newscast";//以正式专业的语气叙述新闻
luBao.StyleDegree = 0.5f;
luBao.Rate = 1;
luBao.Pitch = 6;
luBao.Role = "Boy";
luBao.VolumeLevel = "x-loud";