/** @file
********************************************************************************
<PRE>
模块名: mfcc参数的提取
文件名: mfcc.c
相关文件: mfcc.h
文件实现功能: mfcc提取相关函数的实现
作者: Dake
版本: V2010.09.01
编程方式: ANSI C语言编程
授权方式: Copyright(C) Dake
联系方式:
[email protected]
生成日期: 2010-06-22
--------------------------------------------------------------------------------
多线程安全性: <是/否>[,说明]
异常时安全性: <是/否>[,说明]
--------------------------------------------------------------------------------
备注: <其它说明>
--------------------------------------------------------------------------------
修改记录:
日 期 版本 修改人 修改内容
YYYY/MM/DD X.Y <作者或修改者名> <修改内容>
</PRE>
*******************************************************************************/
#include "mfcc.h"
static void PreEmphasis(BYTE * pRawVoice, int iSampleNum, double * pDiffVoice); // 预增强
static double * JudgeTrainFrame2(double * pDiffVoice, int iSampleNum, int train_frame_num); // 判断是否为静寂语音
static BOOL FrameMultiHamming(double * pDiffVoice, VOICEFRAME * pVoiceFrame, int train_frame_num);
static void FrameToMFCC(VOICEFRAME * pVoiceFrame, double ** MFCC, int train_frame_num);
static double HalfFrameEnergy(double * pDiffVoice); // 计算半帧能量
/**
* @brief 三角带通滤波器数组
*/
static double melf16[239] =
{
//0 [3]
1.4254, 1.2616, 0.0445,
//1 [3]
0.7384, 1.9555, 0.9103,
//2 [3]
1.0897, 1.8484, 0.8501,
//3 [5]
0.1516, 1.1499, 1.9082, 1.0167, 0.1705,
//4 [5]
0.0918, 0.9833, 1.8295, 1.3652, 0.5970,
//5 [5]
0.6348, 1.4030, 1.8627, 1.1593, 0.4845,
//6 [7]
0.1373, 0.8407, 1.5155, 1.8359, 1.2116, 0.6099, 0.0291,
//7 [7]
0.1641, 0.7884, 1.3901, 1.9709, 1.4679, 0.9249, 0.3991,
//8 [7]
0.5321, 1.0751, 1.6009, 1.8894, 1.3948, 0.9144, 0.4476,
//9 [9]
0.1106, 0.6052, 1.0856, 1.5524, 1.9934, 1.5513, 1.1206, 0.7008, 0.2912,
//10 [11]
0.0066, 0.4487, 0.8794, 1.2992, 1.7088, 1.8915, 1.5012, 1.1198, 0.7469,
0.3822, 0.0253,
//11 [12]
0.1085, 0.4988, 0.8802, 1.2531, 1.6178, 1.9747, 1.6759, 1.3336, 0.9983,
0.6696, 0.3472, 0.0309,
//12 [12]
0.3241, 0.6664, 1.0017, 1.3304, 1.6528, 1.9691, 1.7205, 1.4158, 1.1166,
0.8226, 0.5338, 0.2498,
//13 [14]
0.2795, 0.5842, 0.8834, 1.1774, 1.4662, 1.7502, 1.9707, 1.6961, 1.4260,
1.1601, 0.8985, 0.6409, 0.3872, 0.1373,
//14 [17]
0.0293, 0.3039, 0.5740, 0.8399, 1.1015, 1.3591, 1.6128, 1.8627, 1.8911,
1.6485, 1.4094, 1.1737, 0.9412, 0.7119, 0.4858, 0.2627, 0.0425,
//15 [18]
0.1089, 0.3515, 0.5906, 0.8263, 1.0588, 1.2881, 1.5142, 1.7373, 1.9575,
1.8252, 1.6107, 1.3989, 1.1898, 0.9833, 0.7793, 0.5777, 0.3786, 0.1818,
//16 [20]
0.1748, 0.3893, 0.6011, 0.8102, 1.0167, 1.2207, 1.4223, 1.6214, 1.8182,
1.9874, 1.7951, 1.6051, 1.4172, 1.2314, 1.0476, 0.8659, 0.6861, 0.5082,
0.3322, 0.1580,
//17 [24]
0.0126, 0.2049, 0.3949, 0.5828, 0.7686, 0.9524, 1.1341, 1.3139, 1.4918,
1.6678, 1.8420, 1.9857, 1.8151, 1.6462, 1.4791, 1.3136, 1.1497, 0.9874,
0.8267, 0.6675, 0.5098, 0.3536, 0.1988, 0.0455,
//18 [27]
0.0143, 0.1849, 0.3538, 0.5209, 0.6864, 0.8503, 1.0126, 1.1733, 1.3325,
1.4902, 1.6464, 1.8012, 1.9545, 1.8936, 1.7430, 1.5938, 1.4459, 1.2993,
1.1540, 1.0100, 0.8672, 0.7256, 0.5852, 0.4459, 0.3078, 0.1709, 0.0350,
//19 [29]
0.1064, 0.2570, 0.4062, 0.5541, 0.7007, 0.8460, 0.9900, 1.1328, 1.2744,
1.4148, 1.5541, 1.6922, 1.8291, 1.9650, 1.9003, 1.7667, 1.6341, 1.5025,
1.3720, 1.2425, 1.1140, 0.9865, 0.8599, 0.7343, 0.6097, 0.4859, 0.3631,
0.2412, 0.1202
};
/**
* @brief 三角带通滤波器数组下标索引,表示(滤波器数组中的起始位置,最小列号,最大列号)
*/
static int indexM16[D][3] =
{
{0, 1, 3}, {3, 2, 4}, {6, 4, 6}, {9, 5, 9},
{14, 7, 11}, {19, 10, 14}, {24, 12, 18}, {31, 15, 21},
{38, 19, 25}, {45, 22, 30}, {54, 26, 36}, {65, 31, 42},
{77, 37, 48}, {89, 43, 56}, {103, 49, 65}, {120, 57, 74},
{138, 66, 85}, {158, 75, 98}, {182, 86, 112}, {209, 99, 127},
};
/** @function
********************************************************************************
<PRE>
函数名: PreEmphasis()
功能: 预增强,对原始语音数据做差分
用法:
参数:
[IN] pRawVoice: 原始语音数据
[IN] iSampleNum: 语音样本数
[OUT] pDiffVoice: 差分后的数据
返回:
调用:
主调函数: voiceToMFCC()
</PRE>
*******************************************************************************/
static void PreEmphasis(BYTE * pRawVoice, int iSampleNum, double * pDiffVoice)
{
int i;
for (i = 0; i < iSampleNum; ++i)
{
pDiffVoice[i] = (pRawVoice[i] - 128) * 0.0078125;
}
}
/** @function
********************************************************************************
<PRE>
函数名: JudgeTrainFrame2()
功能: 判断预增强后的数据是否可以作为训练帧(去除静寂语音的多段连接起来)
用法:
参数:
[IN] pDiffVoice: 增强(差分)后的语音数据
[IN] iSampleNum: 语音样本数
[IN] train_frame_num: 训练帧帧数
返回:
非空: 可以作为训练帧的信号首地址
NULL: 不可作为训练帧
调用: HalfFrameEnergy()
主调函数: voiceToMFCC()
备注: 返回的内存需要用户在voiceToMFCC()中自行释放
</PRE>
*******************************************************************************/
static double * JudgeTrainFrame2(double * pDiffVoice, int iSampleNum, int train_frame_num)
{
int len = 0;
int iNum; // 合格的语音帧数
int iFrame; // 帧序号
int iFrameNum = (iSampleNum>>7) - 1; // 帧总数=2*(样本数/256)-1
double PriorHalfFrame = 0; // 前半帧能量(前128B)
double NextHalfFrame = 0; // 后半帧能量(后128B)
double FrameEnergy = 0; // 一帧的累积能量
double * pTranDiff = NULL; // 合格语音
double * pSave; // 合格语音存储地址
double * pDiffHead = pDiffVoice;
double * pDiffNext = pDiffVoice;
if (iFrameNum < train_frame_num)
{
return NULL;
}
pTranDiff = (double *)calloc((train_frame_num/2 + 1)*FRAME_LEN, sizeof(double));
if (!pTranDiff)
{
return NULL;
}
pSave = pTranDiff;
PriorHalfFrame = HalfFrameEnergy(pDiffHead);
for (iFrame=1, iNum=0; iFrame<iFrameNum && iNum<train_frame_num; ++iFrame)
{
pDiffNext += (FRAME_LEN/2); // 指向iFrame号帧的后半帧
NextHalfFrame = HalfFrameEnergy(pDiffNext);
FrameEnergy = PriorHalfFrame + NextHalfFrame;
if (FrameEnergy < SILENCE_VALUE) // 是静寂语音
{
if (iFrameNum-iFrame < train_frame_num-iNum) // 剩下的帧不足以作为训练帧
{
free(pTranDiff);
return NULL;
}
len = pDiffNext - pDiffHead - FRAME_LEN/2;
if (len > 0)
{
memcpy(pSave, pDiffHead, len * sizeof(double));
pSave += len;
}
pDiffNext += FRAME_LEN/2; // 重新定位可用作训练的帧
pDiffHead= pDiffNext;
NextHalfFrame = HalfFrameEnergy(pDiffHead);
}
else
{
++iNum;
}
PriorHalfFrame = NextHalfFrame;
} // end: for(;;)
len = pDiffNext - pDiffHead + FRAME_LEN/2;
if (len > 0)
{
memcpy(pSave, pDiffHead, len * sizeof(double));
}
return pTranDiff;
}
/** @function