三、微软:1.语音合成标记语言 (SSML)

一、介绍

语音合成标记语言 (SSML) 是一种基于 XML 的标记语言,可用于微调文本转语音输出属性,例如音调、发音、语速、音量等。 与纯文本输入相比,它可以提供更多的控制权和灵活性。

二、代码

public class AzureTextToSpeakSystem 
{      
    //进行语音合成的终结点,这个好像也是后台同学给的,
    private const string EndpointUri = "https://chinaeast2.tts.speech.azure.cn/**********/v1";
    //服务资源的密钥,后端同学给的,不知道从哪来的
    private const string Key = "********";
             
    TaskIE.Task _getVoiceTask;
    UnityWebRequest _speechRequest;
    UploadHandlerRaw _uploadHandlerRaw;
    DownloadHandlerAudioClip _audioClipHandler;
    protected override void OnInit()
	{
             
    }      

	/// <summary>
	/// 语音合成::当前角色,用于在线生成音频
	/// </summary>
	/// <param _roleName="_msg"></param>
	/// <param _roleName="_callback"></param>
	public void TextToSpeech(string _msg, Action<AudioClip, Role> _callback)
    {            
       _getVoiceTask = new TaskIE.Task( GetCurrentRoleVoice(_msg, _callback,this.GetModel<IGameModel>().CurrentRole));           
    }
   
    /// <summary>
    /// 语音合成::指定角色,用于提前生成本地音频
    /// </summary>
    /// <param _roleName="_msg"></param>
    /// <param _roleName="_callback"></param>
    public void TextToSpeech(string _msg, Action<AudioClip,Role> _callback,Role role)
    {         
        ActionKit.Coroutine(() => GetAssignRoleVoice (_msg, _callback, role)).StartGlobal();
    }

   
    /// <summary>
    /// restful api语音合成
    /// </summary>
    /// <param _roleName="_msg"></param>
    /// <param _roleName="_callback"></param>
    /// <returns></returns>
    private IEnumerator GetCurrentRoleVoice(string _msg, Action<AudioClip, Role> _callback, Role role)
    {         
        //发送报文
        string textToSpeechRequestBody = GenerateTextToSpeech(_msg, role);
        using (_speechRequest = new UnityWebRequest(EndpointUri, "POST"))
        {
            byte[] data = System.Text.Encoding.UTF8.GetBytes(textToSpeechRequestBody);               
            _speechRequest.SetRequestHeader("Ocp-Apim-Subscription-Key", Key);
            _speechRequest.SetRequestHeader("X-Microsoft-OutputFormat", "riff-24khz-16bit-mono-pcm");
            _speechRequest.SetRequestHeader("Content-Type", "application/ssml+xml");
			using (_uploadHandlerRaw = new UploadHandlerRaw(data))
            {
                using (_audioClipHandler = new DownloadHandlerAudioClip(_speechRequest.uri, AudioType.WAV))
                {
                    _speechRequest.uploadHandler = _uploadHandlerRaw;
                    _speechRequest.downloadHandler = _audioClipHandler;
                    yield return _speechRequest.SendWebRequest();

                    if (_speechRequest.responseCode == 200)
                    {
						AudioClip audioClip = DownloadHandlerAudioClip.GetContent(_speechRequest);
                        _callback(audioClip, role); 
					}
                    else
                    {
                        Debug.LogError("语音合成失败: " + _speechRequest.error);
					}
                    _audioClipHandler.Dispose();
                    _uploadHandlerRaw.Dispose();
                    _speechRequest.disposeDownloadHandlerOnDispose = true;
                    _speechRequest.disposeUploadHandlerOnDispose = true;
                    _speechRequest.Dispose();
                }
            }
        }          
    }
   
    int retryAttempts = 0;
    int maxRetries = 5;
	
	private IEnumerator GetAssignRoleVoice(string _msg, Action<AudioClip, Role> _callback, Role role)
	{            
		//发送报文
		string textToSpeechRequestBody= GenerateTextToSpeech(_msg, role);
        retryAttempts = 0;
		while (true)
		{
            using (UnityWebRequest _speechRequest = new UnityWebRequest(EndpointUri, "POST"))
			{
                byte[] data = System.Text.Encoding.UTF8.GetBytes(textToSpeechRequestBody);
				_speechRequest.SetRequestHeader("Ocp-Apim-Subscription-Key", Key);
				_speechRequest.SetRequestHeader("X-Microsoft-OutputFormat", "riff-24khz-16bit-mono-pcm");
				_speechRequest.SetRequestHeader("Content-Type", "application/ssml+xml");
				using (UploadHandlerRaw _uploadHandlerRaw = new UploadHandlerRaw(data))
				{
                    using (DownloadHandlerAudioClip _audioClipHandler = new DownloadHandlerAudioClip(_speechRequest.uri, AudioType.WAV))
                    {                           
                        _speechRequest.uploadHandler = _uploadHandlerRaw;
						_speechRequest.downloadHandler = _audioClipHandler;
                        _speechRequest.timeout = 10;//10秒超时
						yield return _speechRequest.SendWebRequest();
						if (_speechRequest.result == UnityWebRequest.Result.ConnectionError || _speechRequest.result == UnityWebRequest.Result.ProtocolError)
						{
							Debug.Log(_speechRequest.responseCode);
                            if (_speechRequest.responseCode == 429)
                            {
                                int waitTime = (int)Math.Pow(2, retryAttempts);
                                Debug.LogWarning("Received 429 - Too Many Requests. Retrying in " + waitTime + " seconds.");
                                yield return new WaitForSeconds(waitTime);
                                retryAttempts++;
                            }
                            else
                            {
                                Debug.LogError("语音合成失败:"+ _speechRequest.responseCode);
                            }
						}
						else
						{
							if (_speechRequest.responseCode == 200)
							{
                              
                                AudioClip audioClip = DownloadHandlerAudioClip.GetContent(_speechRequest);                                  
								_callback(audioClip, role);                                                                     
                            }
							else
							{
								Debug.LogError("语音合成失败: " + _speechRequest.error);
								Debug.Log(_speechRequest.result);
								Debug.Log(_speechRequest.responseCode);
							}
							_audioClipHandler.Dispose();
							_uploadHandlerRaw.Dispose();
							_speechRequest.disposeDownloadHandlerOnDispose = true;
							_speechRequest.disposeUploadHandlerOnDispose = true;
							_speechRequest.Dispose();
							break;
						}
					}                      
                }
            }  
        }
	}
	
    public string GenerateTextToSpeech(string text, Role role)
    {
        RoleVoice voiceRole = this.GetSystem<IRoleSystem>().GetRoleVoice(role);
        string xml = string.Format(@"<speak version=""1.0"" xmlns=""http://www.w3.org/2001/10/synthesis""
              xmlns:mstts=""https://www.w3.org/2001/mstts"" xml:lang=""{0}"">
              <voice name=""{1}""> 
                   <mstts:express-as style=""{2}"" styledegree=""{3}"" role=""{7}"">
                   <prosody rate=""{5}"" pitch=""{6}"" volume=""{8}"">                
                      {4}
                  </prosody>  
                  </mstts:express-as>
              </voice>
          </speak>", voiceRole.Language, voiceRole.VoiceName, voiceRole.Style, voiceRole.StyleDegree, text, voiceRole.RateLevel==null? voiceRole.Rate + "%": voiceRole.RateLevel, voiceRole.PitchLevel==null? voiceRole.Pitch + "%": voiceRole.PitchLevel, voiceRole.Role, voiceRole.VolumeLevel);
        return xml;
    }
  

    public void BreakOperation()
	{
		if (_getVoiceTask != null)
		{
			_getVoiceTask.Stop();
		}
        if (_audioClipHandler != null)
        {
            _audioClipHandler.Dispose();
        }
        if (_uploadHandlerRaw != null)
        {
            _uploadHandlerRaw.Dispose();
        }
        if(_speechRequest!=null)
		{
            _speechRequest.disposeDownloadHandlerOnDispose = true;
            _speechRequest.disposeUploadHandlerOnDispose = true;
            _speechRequest.Dispose();
        }
	}
}	
  /// <summary>
  /// 角色的声音
  /// </summary>
  public class RoleVoice
  {
      public string VoiceName;//使用速度声音角色名
      public string Language = "zh-CN";
      public string Style = "";//感情,讲话风格
      public float StyleDegree = 1;//讲话风格强度,范围为0.01-2,默认值为1
      public int Rate = 0;//讲话语速,范围为-100%~200%
      public int Pitch = 0;//音调
      public string Role = "default";//角色
      public string VolumeLevel = "default";//音量等级
      public string RateLevel ;
      public string PitchLevel;
  }

//RoleVoice申明示例如下:
 RoleVoice luBao = new RoleVoice();
 luBao.VoiceName = "zh-CN-YunxiNeural";
 luBao.Style = "newscast";//以正式专业的语气叙述新闻
 luBao.StyleDegree = 0.5f;
 luBao.Rate = 1;
 luBao.Pitch = 6;
 luBao.Role = "Boy";
 luBao.VolumeLevel = "x-loud";

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值