一、腾讯云:3.语音识别-2:实时语音识别

一、介绍

1.简介

本接口服务采用 websocket 协议,对实时音频流进行识别,同步返回识别结果,达到“边说边出文字”的效果。

在使用该接口前,需要 开通语音识别服务,并进入 API 密钥管理页面 新建密钥,生成 AppID、SecretID 和 SecretKey,用于 API 调用时生成签名,签名将用来进行接口鉴权。

2.说明

  • WebSocket用的是BestHTTP插件,可以在本站搜索下载使用;
  • 发送_webSocket.Send($"{{\"type\": \"end\"}}");通知结束识别时,OnWebSocketError()方法会收到一条报错信息,意思是意外关闭了连接,没有找到原因,但是不影响使用,所以我把打印错误的代码注释掉了。

二、代码

public class TencentCloudRealtimeASRManager : ASRManager
{
	#region 腾讯云TTS参数	
	string _secretId;//填写你自己的SecretId
	string _secretKey;//填写你自己的SecretKey
	string _appId;//填写你自己的AppId
	string _url = "wss://asr.cloud.tencent.com/asr/v2/";
	const string ACTION = "TextToVoice";//调取接口名
	const string VERSION = "2019-08-23";//操作的 API 的版本	
	#endregion

	string _paramStr;//参数	
	string _timestamp;//时间戳
	string _expired;//签名的有效期
	WebSocket _webSocket;

	// 配置参数
	[Header("腾讯云配置")]
	public int sampleRate = 16000; // 16k 或 8k
	public int sendIntervalMs = 40;
	public float silenceThreshold = 0.04f; // 静音检测阈值
	public int silenceDurationMs = 2000; // 持续静音时间

	// 计算音频参数
	private int samplesPerFrame;
	private int pcmBytesPerFrame;
	
	// 音频流参数
	private AudioClip recordingClip;
	private int bufferSize;
	private int lastReadPosition;
	private float[] audioBuffer;

	private ConcurrentQueue<byte[]> sendQueue ;

	bool _isOnRecognition = false;
	Task _connectWebSocketTask;
	Task _sendDataTask;
	Task _audioProcessingTask;
	Task _checkIsHaveRecordingTask;
	string hotword_list= "一大|10,二大|10";
	public void Start()
	{	
		samplesPerFrame = sampleRate * sendIntervalMs / 1000;
		pcmBytesPerFrame = samplesPerFrame * 2; // 16bit = 2字节
		bufferSize = sampleRate * 2; // 2秒缓冲
	}

	/// <summary>
	/// 生成签名
	/// </summary>
	/// <returns></returns>
	string GenerateSignature()
	{
		_timestamp=((int)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalSeconds).ToString();
		_expired = (int.Parse(_timestamp) + 86400).ToString(); // 有效期1天
		//1.对除 signature 之外的所有参数按字典序进行排序,拼接请求 URL (不包含协议部分:wss://)作为签名原文
		_paramStr =$"asr.cloud.tencent.com/asr/v2/{_appId}?engine_model_type=16k_zh&expired={_expired}&hotword_list={hotword_list}&needvad=1&nonce=1673408372&secretid={_secretId}&timestamp={_timestamp}&vad_silence_time=2000&voice_format=1&voice_id=c64385ee-3e5c-4fc5-bbfd-7c71addb35b0";

		//2.对签名原文进行HmacSha1加密,然后进行Base64 编码,得到 Signature 签名
		using (HMACSHA1 hmac = new HMACSHA1(Encoding.UTF8.GetBytes(_secretKey)))
		{
			byte[] hash = hmac.ComputeHash(Encoding.UTF8.GetBytes(_paramStr));
			string signature =Convert.ToBase64String(hash);
			//3.对Signature 值进行urlencode(必须进行 URL 编码,否则将导致鉴权失败偶现 )
			signature= Uri.EscapeDataString(signature);
			return signature;
		}
	}

	private void Start()
	{
		/*_secretId = MainManager.GetManager<GameManager>().SecretId;
		_secretKey = MainManager.GetManager<GameManager>().SecretKey;
		_appId = MainManager.GetManager<GameManager>().AppId;
		samplesPerFrame = sampleRate * sendIntervalMs / 1000;
		pcmBytesPerFrame = samplesPerFrame * 2; // 16bit = 2字节
		bufferSize = sampleRate * 2; // 2秒缓冲
		StartASR();*/
	}

	public override void StartASR()
	{
		_connectWebSocketTask = new Task(ConnectTTSWebSocket());
	}
	/// <summary>
	/// 连接WebSocket
	/// </summary>
	/// <returns></returns>
	IEnumerator ConnectTTSWebSocket()
	{
		_isOnRecognition = false;		
		string signature = GenerateSignature();
		//得到请求的完整地址
		//_paramStr+= "&vad_silence_time=2000";
		string url = $"wss://{_paramStr}&signature={signature}";
		_webSocket = new WebSocket(new Uri(url));		
		// 注册事件处理器
		_webSocket.OnOpen += OnWebSocketOpen;		
		_webSocket.OnMessage += OnWebSocketText;
		_webSocket.OnError += OnWebSocketError;		
		// 开始连接
		_webSocket.Open();
	
		// 等待连接建立
		yield return new WaitUntil(() => _webSocket.IsOpen);
		Debug.Log("WebSocket 连接成功");
	}

	/// <summary>
	/// 与WebSocket服务器建立连接
	/// </summary>
	/// <param name="ws"></param>
	private void OnWebSocketOpen(WebSocket ws)
	{
		Debug.Log("连接已建立");
	}
	/// <summary>
	/// 当从服务器接收到新的文本消息时调用
	/// </summary>
	/// <param name="ws"></param>
	/// <param name="message"></param>
	private void OnWebSocketText(WebSocket ws, string message)
	{
		//Debug.Log($"收到文本消息: {message}");
		JObject obj = JObject.Parse(message);
		if (!_isOnRecognition)//握手阶段
		{			
			if (obj["code"].ToString() == "0" && obj["message"].ToString() == "success")
			{
				//握手成功,开始进入识别阶段
				_isOnRecognition = true;
				Debug.Log("开始识别");
                _sendDataTask = new Task(SendDataCoroutine());				
				StartRecording();
			}
		}
		else//识别阶段
		{
			if (obj["code"].ToString() == "0" && obj["message"].ToString() == "success")
			{
				JToken result = obj["result"];
				if (result != null)
				{
					string sliceType = result["slice_type"].ToString();
					string question = result["voice_text_str"].ToString();
					//接受完毕
					if (sliceType == "2")
					{  
						Debug.Log("识别到的语音为:" + question);
						StopRecording();
						if (question == "")//识别到的问题为空
						{
							
						}
						else
						{
							
						}
					}
					else//中途接收
					{
						//表示开始识别到语音
						if (sliceType == "0")
						{
							if (_checkIsHaveRecordingTask != null)
							{
								_checkIsHaveRecordingTask.Stop();
								_checkIsHaveRecordingTask = null;
							}
						}					
					}
				}
				else
				{
					JToken final = obj["final"];
					if (final !=null&& final.ToString()=="1")
					{
						Debug.Log("识别结束");
						_webSocket.Close();						
					}
				}
				
			}
		}
	}

	/// <summary>
	/// 遇到错误时调用。参数将是错误的描述
	/// </summary>
	/// <param name="ws"></param>
	/// <param name="error"></param>
	private void OnWebSocketError(WebSocket ws, string error)
	{
		//Debug.LogError($"WebSocket错误: {error}");
	}

	void OnDestroy()
	{
		if (_webSocket != null && _webSocket.IsOpen)
		{
			_webSocket.Close();
		}	
		if (_webSocket != null)
		{
			_webSocket.Close();
			_webSocket = null;
		}
		Microphone.End(null);
	}	
	/// <summary>
	/// 发送音频数据给WebSocket服务器
	/// </summary>
	/// <returns></returns>
	IEnumerator SendDataCoroutine()
	{
		var wait = new WaitForSeconds(sendIntervalMs / 1000f);
		while (_webSocket.State == WebSocketStates.Open)
		{
			if (Microphone.IsRecording(null))
			{
				if (sendQueue.TryDequeue(out byte[] data))
				{
					try
					{
						_webSocket.Send(data); 

					}
					catch (Exception e)
					{
						Debug.LogError($"数据发送失败: {e.Message}");
					}
				}
			}
		
			
			yield return wait;			
		}				
	}

	void StartRecording()
	{		
		if (Microphone.IsRecording(null)) return;		
		// 初始化麦克风
		recordingClip = Microphone.Start(null, true, 15, sampleRate);
		MainManager.GetManager<GameManager>().StartRecordEvent();
		lastReadPosition = 0;
		audioBuffer = new float[bufferSize];
		sendQueue = new ConcurrentQueue<byte[]>();
		// 启动数据处理协程
		_audioProcessingTask = new Task(AudioProcessingCoroutine());
		_checkIsHaveRecordingTask = new Task(CheckIsHaveRecording());
	}

	/// <summary>
	/// 10秒内没有录音,就结束掉
	/// </summary>
	/// <returns></returns>
	IEnumerator CheckIsHaveRecording()
	{
		yield return new WaitForSeconds(10);
		
	}

	/// <summary>
	/// 处理音频数据
	/// </summary>
	/// <returns></returns>
	IEnumerator AudioProcessingCoroutine()
	{
		int silenceCounter = 0;
		bool isSpeaking = false;

		while (Microphone.IsRecording(null))
		{
			// 获取当前录音位置
			int currentPosition = Microphone.GetPosition(null);
			if (currentPosition < lastReadPosition)
			{
				// 处理环形缓冲区回绕
				HandleBufferWrap(currentPosition);
			}
			else
			{
				// 读取有效数据
				int samplesToRead = currentPosition - lastReadPosition;
				if (samplesToRead >= samplesPerFrame)
				{
					ProcessAudioChunk(samplesToRead, ref silenceCounter, ref isSpeaking);
				}
			}
			yield return null;
		}
	}

	void HandleBufferWrap(int currentPosition)
	{		
		int samplesAvailable = recordingClip.samples - lastReadPosition;
		float[] tempBuffer = new float[samplesAvailable];
		recordingClip.GetData(tempBuffer, lastReadPosition); // 读取剩余数据

		// 将剩余数据复制到缓冲区头部
		Array.Copy(tempBuffer, 0, audioBuffer, 0, samplesAvailable);
		lastReadPosition = 0;
	}



	void ProcessAudioChunk(int samplesToRead, ref int silenceCounter, ref bool isSpeaking)
	{
		// 读取音频数据
		float[] chunk = new float[samplesPerFrame];
		recordingClip.GetData(chunk, lastReadPosition);
		lastReadPosition += samplesPerFrame;

		// 转换为PCM
		byte[] pcm = ConvertToPCM(chunk);

		// 静音检测
		//bool isSilent = IsSilence(chunk);
		//if (isSilent)
		//{
		//	silenceCounter += sendIntervalMs;
		//	//Debug.Log(isSpeaking.ToString()+"+" + silenceCounter);
		//	if (isSpeaking && silenceCounter >= silenceDurationMs)
		//	{
		//		StopRecording(); Debug.LogError("结束");
		//		return;
		//	}
		//}
		//else
		//{
		//	silenceCounter = 0;
		//	isSpeaking = true;
		//}
		// 加入发送队列
		sendQueue.Enqueue(pcm);
		//是否静音
		bool isSilent = IsSilence(chunk);
		if (isSilent)
		{
			silenceCounter += sendIntervalMs;
			//Debug.Log(isSpeaking.ToString()+"+" + silenceCounter);
			if (isSpeaking && silenceCounter >= 1000)
			{
				MainManager.GetManager<GameManager>().CheckIsOnRecording(true);
				return;
			}
		}
		else
		{
			silenceCounter = 0;
			isSpeaking = true;
			MainManager.GetManager<GameManager>().CheckIsOnRecording(false);
		}

		
	}

	byte[] ConvertToPCM(float[] samples)
	{
		byte[] pcm = new byte[pcmBytesPerFrame];
		for (int i = 0; i < samples.Length; i++)
		{
			short value = (short)(samples[i] * short.MaxValue);
			Buffer.BlockCopy(BitConverter.GetBytes(value), 0, pcm, i * 2, 2);
		}
		return pcm;
	}

	bool IsSilence(float[] samples)
	{
		float sum = 0;
		foreach (float sample in samples)
		{
			sum += Mathf.Abs(sample);
		}
		float avg = sum / samples.Length;
		//Debug.Log($"Average Volume: {avg}, Threshold: {silenceThreshold}");
		return avg < silenceThreshold;
	}

	 void  StopRecording()
	 {
		if (Microphone.IsRecording(null))
		{
			Microphone.End(null);					
			SendEndMessage();
		}		
	}

	

	void SendEndMessage()
	{
		try
		{			
			var obj = new { type = "end" };
			string json = JsonConvert.SerializeObject(obj); // 自动生成合法 JSON
			var endText = JsonUtility.ToJson(new EndText());
			_webSocket.Send(endText);
		}
		catch (Exception e)
		{
			Debug.LogError($"结束消息发送失败: {e.Message}");
		}
	}

	public override void Reset()
	{
		if (Microphone.IsRecording(null))
		{
			Microphone.End(null);			
		}
		if (_webSocket != null)
		{
			_webSocket.Close();
			_webSocket = null;
		}
		if (_connectWebSocketTask != null)
		{
			_connectWebSocketTask.Stop();
			_connectWebSocketTask = null;
		}
		if (_sendDataTask != null)
		{
			_sendDataTask.Stop();
			_sendDataTask = null;
		}
		if (_audioProcessingTask != null)
		{
			_audioProcessingTask.Stop();
			_audioProcessingTask = null;
		}
		if (_checkIsHaveRecordingTask != null)
		{
			_checkIsHaveRecordingTask.Stop();
            _checkIsHaveRecordingTask = null;
		}		
	}
}
[Serializable]
public class EndText
{
	public string type;

	public EndText()
	{
		type = "end";
	}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值