一、介绍
1.简介
本接口服务采用 websocket 协议,对实时音频流进行识别,同步返回识别结果,达到“边说边出文字”的效果。
在使用该接口前,需要 开通语音识别服务,并进入 API 密钥管理页面 新建密钥,生成 AppID、SecretID 和 SecretKey,用于 API 调用时生成签名,签名将用来进行接口鉴权。
2.说明
- WebSocket用的是BestHTTP插件,可以在本站搜索下载使用;
- 发送_webSocket.Send($"{{\"type\": \"end\"}}");通知结束识别时,OnWebSocketError()方法会收到一条报错信息,意思是意外关闭了连接,没有找到原因,但是不影响使用,所以我把打印错误的代码注释掉了。
二、代码
public class TencentCloudRealtimeASRManager : ASRManager
{
#region 腾讯云TTS参数
string _secretId;//填写你自己的SecretId
string _secretKey;//填写你自己的SecretKey
string _appId;//填写你自己的AppId
string _url = "wss://asr.cloud.tencent.com/asr/v2/";
const string ACTION = "TextToVoice";//调取接口名
const string VERSION = "2019-08-23";//操作的 API 的版本
#endregion
string _paramStr;//参数
string _timestamp;//时间戳
string _expired;//签名的有效期
WebSocket _webSocket;
// 配置参数
[Header("腾讯云配置")]
public int sampleRate = 16000; // 16k 或 8k
public int sendIntervalMs = 40;
public float silenceThreshold = 0.04f; // 静音检测阈值
public int silenceDurationMs = 2000; // 持续静音时间
// 计算音频参数
private int samplesPerFrame;
private int pcmBytesPerFrame;
// 音频流参数
private AudioClip recordingClip;
private int bufferSize;
private int lastReadPosition;
private float[] audioBuffer;
private ConcurrentQueue<byte[]> sendQueue ;
bool _isOnRecognition = false;
Task _connectWebSocketTask;
Task _sendDataTask;
Task _audioProcessingTask;
Task _checkIsHaveRecordingTask;
string hotword_list= "一大|10,二大|10";
public void Start()
{
samplesPerFrame = sampleRate * sendIntervalMs / 1000;
pcmBytesPerFrame = samplesPerFrame * 2; // 16bit = 2字节
bufferSize = sampleRate * 2; // 2秒缓冲
}
/// <summary>
/// 生成签名
/// </summary>
/// <returns></returns>
string GenerateSignature()
{
_timestamp=((int)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalSeconds).ToString();
_expired = (int.Parse(_timestamp) + 86400).ToString(); // 有效期1天
//1.对除 signature 之外的所有参数按字典序进行排序,拼接请求 URL (不包含协议部分:wss://)作为签名原文
_paramStr =$"asr.cloud.tencent.com/asr/v2/{_appId}?engine_model_type=16k_zh&expired={_expired}&hotword_list={hotword_list}&needvad=1&nonce=1673408372&secretid={_secretId}×tamp={_timestamp}&vad_silence_time=2000&voice_format=1&voice_id=c64385ee-3e5c-4fc5-bbfd-7c71addb35b0";
//2.对签名原文进行HmacSha1加密,然后进行Base64 编码,得到 Signature 签名
using (HMACSHA1 hmac = new HMACSHA1(Encoding.UTF8.GetBytes(_secretKey)))
{
byte[] hash = hmac.ComputeHash(Encoding.UTF8.GetBytes(_paramStr));
string signature =Convert.ToBase64String(hash);
//3.对Signature 值进行urlencode(必须进行 URL 编码,否则将导致鉴权失败偶现 )
signature= Uri.EscapeDataString(signature);
return signature;
}
}
private void Start()
{
/*_secretId = MainManager.GetManager<GameManager>().SecretId;
_secretKey = MainManager.GetManager<GameManager>().SecretKey;
_appId = MainManager.GetManager<GameManager>().AppId;
samplesPerFrame = sampleRate * sendIntervalMs / 1000;
pcmBytesPerFrame = samplesPerFrame * 2; // 16bit = 2字节
bufferSize = sampleRate * 2; // 2秒缓冲
StartASR();*/
}
public override void StartASR()
{
_connectWebSocketTask = new Task(ConnectTTSWebSocket());
}
/// <summary>
/// 连接WebSocket
/// </summary>
/// <returns></returns>
IEnumerator ConnectTTSWebSocket()
{
_isOnRecognition = false;
string signature = GenerateSignature();
//得到请求的完整地址
//_paramStr+= "&vad_silence_time=2000";
string url = $"wss://{_paramStr}&signature={signature}";
_webSocket = new WebSocket(new Uri(url));
// 注册事件处理器
_webSocket.OnOpen += OnWebSocketOpen;
_webSocket.OnMessage += OnWebSocketText;
_webSocket.OnError += OnWebSocketError;
// 开始连接
_webSocket.Open();
// 等待连接建立
yield return new WaitUntil(() => _webSocket.IsOpen);
Debug.Log("WebSocket 连接成功");
}
/// <summary>
/// 与WebSocket服务器建立连接
/// </summary>
/// <param name="ws"></param>
private void OnWebSocketOpen(WebSocket ws)
{
Debug.Log("连接已建立");
}
/// <summary>
/// 当从服务器接收到新的文本消息时调用
/// </summary>
/// <param name="ws"></param>
/// <param name="message"></param>
private void OnWebSocketText(WebSocket ws, string message)
{
//Debug.Log($"收到文本消息: {message}");
JObject obj = JObject.Parse(message);
if (!_isOnRecognition)//握手阶段
{
if (obj["code"].ToString() == "0" && obj["message"].ToString() == "success")
{
//握手成功,开始进入识别阶段
_isOnRecognition = true;
Debug.Log("开始识别");
_sendDataTask = new Task(SendDataCoroutine());
StartRecording();
}
}
else//识别阶段
{
if (obj["code"].ToString() == "0" && obj["message"].ToString() == "success")
{
JToken result = obj["result"];
if (result != null)
{
string sliceType = result["slice_type"].ToString();
string question = result["voice_text_str"].ToString();
//接受完毕
if (sliceType == "2")
{
Debug.Log("识别到的语音为:" + question);
StopRecording();
if (question == "")//识别到的问题为空
{
}
else
{
}
}
else//中途接收
{
//表示开始识别到语音
if (sliceType == "0")
{
if (_checkIsHaveRecordingTask != null)
{
_checkIsHaveRecordingTask.Stop();
_checkIsHaveRecordingTask = null;
}
}
}
}
else
{
JToken final = obj["final"];
if (final !=null&& final.ToString()=="1")
{
Debug.Log("识别结束");
_webSocket.Close();
}
}
}
}
}
/// <summary>
/// 遇到错误时调用。参数将是错误的描述
/// </summary>
/// <param name="ws"></param>
/// <param name="error"></param>
private void OnWebSocketError(WebSocket ws, string error)
{
//Debug.LogError($"WebSocket错误: {error}");
}
void OnDestroy()
{
if (_webSocket != null && _webSocket.IsOpen)
{
_webSocket.Close();
}
if (_webSocket != null)
{
_webSocket.Close();
_webSocket = null;
}
Microphone.End(null);
}
/// <summary>
/// 发送音频数据给WebSocket服务器
/// </summary>
/// <returns></returns>
IEnumerator SendDataCoroutine()
{
var wait = new WaitForSeconds(sendIntervalMs / 1000f);
while (_webSocket.State == WebSocketStates.Open)
{
if (Microphone.IsRecording(null))
{
if (sendQueue.TryDequeue(out byte[] data))
{
try
{
_webSocket.Send(data);
}
catch (Exception e)
{
Debug.LogError($"数据发送失败: {e.Message}");
}
}
}
yield return wait;
}
}
void StartRecording()
{
if (Microphone.IsRecording(null)) return;
// 初始化麦克风
recordingClip = Microphone.Start(null, true, 15, sampleRate);
MainManager.GetManager<GameManager>().StartRecordEvent();
lastReadPosition = 0;
audioBuffer = new float[bufferSize];
sendQueue = new ConcurrentQueue<byte[]>();
// 启动数据处理协程
_audioProcessingTask = new Task(AudioProcessingCoroutine());
_checkIsHaveRecordingTask = new Task(CheckIsHaveRecording());
}
/// <summary>
/// 10秒内没有录音,就结束掉
/// </summary>
/// <returns></returns>
IEnumerator CheckIsHaveRecording()
{
yield return new WaitForSeconds(10);
}
/// <summary>
/// 处理音频数据
/// </summary>
/// <returns></returns>
IEnumerator AudioProcessingCoroutine()
{
int silenceCounter = 0;
bool isSpeaking = false;
while (Microphone.IsRecording(null))
{
// 获取当前录音位置
int currentPosition = Microphone.GetPosition(null);
if (currentPosition < lastReadPosition)
{
// 处理环形缓冲区回绕
HandleBufferWrap(currentPosition);
}
else
{
// 读取有效数据
int samplesToRead = currentPosition - lastReadPosition;
if (samplesToRead >= samplesPerFrame)
{
ProcessAudioChunk(samplesToRead, ref silenceCounter, ref isSpeaking);
}
}
yield return null;
}
}
void HandleBufferWrap(int currentPosition)
{
int samplesAvailable = recordingClip.samples - lastReadPosition;
float[] tempBuffer = new float[samplesAvailable];
recordingClip.GetData(tempBuffer, lastReadPosition); // 读取剩余数据
// 将剩余数据复制到缓冲区头部
Array.Copy(tempBuffer, 0, audioBuffer, 0, samplesAvailable);
lastReadPosition = 0;
}
void ProcessAudioChunk(int samplesToRead, ref int silenceCounter, ref bool isSpeaking)
{
// 读取音频数据
float[] chunk = new float[samplesPerFrame];
recordingClip.GetData(chunk, lastReadPosition);
lastReadPosition += samplesPerFrame;
// 转换为PCM
byte[] pcm = ConvertToPCM(chunk);
// 静音检测
//bool isSilent = IsSilence(chunk);
//if (isSilent)
//{
// silenceCounter += sendIntervalMs;
// //Debug.Log(isSpeaking.ToString()+"+" + silenceCounter);
// if (isSpeaking && silenceCounter >= silenceDurationMs)
// {
// StopRecording(); Debug.LogError("结束");
// return;
// }
//}
//else
//{
// silenceCounter = 0;
// isSpeaking = true;
//}
// 加入发送队列
sendQueue.Enqueue(pcm);
//是否静音
bool isSilent = IsSilence(chunk);
if (isSilent)
{
silenceCounter += sendIntervalMs;
//Debug.Log(isSpeaking.ToString()+"+" + silenceCounter);
if (isSpeaking && silenceCounter >= 1000)
{
MainManager.GetManager<GameManager>().CheckIsOnRecording(true);
return;
}
}
else
{
silenceCounter = 0;
isSpeaking = true;
MainManager.GetManager<GameManager>().CheckIsOnRecording(false);
}
}
byte[] ConvertToPCM(float[] samples)
{
byte[] pcm = new byte[pcmBytesPerFrame];
for (int i = 0; i < samples.Length; i++)
{
short value = (short)(samples[i] * short.MaxValue);
Buffer.BlockCopy(BitConverter.GetBytes(value), 0, pcm, i * 2, 2);
}
return pcm;
}
bool IsSilence(float[] samples)
{
float sum = 0;
foreach (float sample in samples)
{
sum += Mathf.Abs(sample);
}
float avg = sum / samples.Length;
//Debug.Log($"Average Volume: {avg}, Threshold: {silenceThreshold}");
return avg < silenceThreshold;
}
void StopRecording()
{
if (Microphone.IsRecording(null))
{
Microphone.End(null);
SendEndMessage();
}
}
void SendEndMessage()
{
try
{
var obj = new { type = "end" };
string json = JsonConvert.SerializeObject(obj); // 自动生成合法 JSON
var endText = JsonUtility.ToJson(new EndText());
_webSocket.Send(endText);
}
catch (Exception e)
{
Debug.LogError($"结束消息发送失败: {e.Message}");
}
}
public override void Reset()
{
if (Microphone.IsRecording(null))
{
Microphone.End(null);
}
if (_webSocket != null)
{
_webSocket.Close();
_webSocket = null;
}
if (_connectWebSocketTask != null)
{
_connectWebSocketTask.Stop();
_connectWebSocketTask = null;
}
if (_sendDataTask != null)
{
_sendDataTask.Stop();
_sendDataTask = null;
}
if (_audioProcessingTask != null)
{
_audioProcessingTask.Stop();
_audioProcessingTask = null;
}
if (_checkIsHaveRecordingTask != null)
{
_checkIsHaveRecordingTask.Stop();
_checkIsHaveRecordingTask = null;
}
}
}
[Serializable]
public class EndText
{
public string type;
public EndText()
{
type = "end";
}
}