以下是一个使用Flask实现的简单语音识别应用示例,结合了讯飞语音识别API。请确保已安装所需的Python库:
from flask import Flask, render_template, request, jsonify
import wave
import time
import os
import requests
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['ALLOWED_EXTENSIONS'] = {'wav'}
# 讯飞API参数(需替换为您的真实密钥)
XUNFEI_APPID = 'your_app_id'
XUNFEI_API_KEY = 'your_api_key'
XUNFEI_API_SECRET = 'your_api_secret'
XUNFEI_URL = 'https://api.xfyun.cn/v1/service/v1/speech/transcribe'
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
@app.route('/')
def index():
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload():
if 'file' not in request.files:
return jsonify({'error': 'No file part'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No selected file'}), 400
if file and allowed_file(file.filename):
filename = f"record_{int(time.time())}.wav"
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
return jsonify({'success': True, 'file_path': filepath})
else:
return jsonify({'error': 'Invalid file type'}), 400
@app.route('/recognize', methods=['POST'])
def recognize():
data = request.json
file_path = data.get('file_path')
text_input = data.get('text_input')
if not file_path or not text_input:
return jsonify({'error': 'Missing parameters'}), 400
# 讯飞语音识别
with open(file_path, 'rb') as f:
audio_content = f.read()
headers = {
'X-Appid': XUNFEI_APPID,
'X-CurTime': str(int(time.time())),
'X-Param': 'src_type=wav&file_len={}&sample_rate=16000'.format(len(audio_content)),
'X-CheckSum': hashlib.md5(audio_content).hexdigest(),
'Content-Type': 'application/octet-stream'
}
response = requests.post(XUNFEI_URL, headers=headers, data=audio_content)
result = response.json()
if 'data' in result:
recognized_text = result['data']
# 计算准确率
accuracy = calculate_accuracy(text_input, recognized_text)
return jsonify({'recognized_text': recognized_text, 'accuracy': accuracy})
else:
return jsonify({'error': 'Speech recognition failed'}), 500
def calculate_accuracy(text_input, recognized_text):
input_words = text_input.split()
recognized_words = recognized_text.split()
correct = 0
for word in input_words:
if word in recognized_words:
correct += 1
return (correct / len(input_words)) * 100 if input_words else 0.0
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
HTML前端(templates/index.html
):
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>语音识别准确率测试</title>
<style>
body { font-family: Arial, sans-serif; padding: 20px; }
.container { max-width: 600px; margin: auto; }
input[type="text"] { width: 100%; padding: 10px; margin-bottom: 15px; }
button { padding: 10px 20px; background-color: #4CAF50; color: white; border: none; cursor: pointer; }
#result { margin-top: 20px; }
</style>
</head>
<body>
<div class="container">
<h1>语音识别准确率测试</h1>
<input type="text" id="textInput" placeholder="请输入参考文本">
<button id="recordButton">开始录制</button>
<div id="result"></div>
</div>
<script>
const recordButton = document.getElementById('recordButton');
const textInput = document.getElementById('textInput');
const resultDiv = document.getElementById('result');
recordButton.addEventListener('click', async () => {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
resultDiv.innerHTML = '<p>浏览器不支持录音功能</p>';
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const chunks = [];
const mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = (e) => chunks.push(e.data);
mediaRecorder.onstop = async () => {
const blob = new Blob(chunks, { type: 'audio/wav' });
const formData = new FormData();
formData.append('file', blob, 'record.wav');
fetch('/upload', {
method: 'POST',
body: formData
})
.then(res => res.json())
.then(data => {
if (data.success) {
fetch('/recognize', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
file_path: data.file_path,
text_input: textInput.value
})
})
.then(res => res.json())
.then(res => {
if (res.recognized_text) {
const accuracy = Math.round(res.accuracy);
resultDiv.innerHTML = `<p>识别结果: ${res.recognized_text}</p>
<p>准确率: ${accuracy}%</p>`;
} else {
resultDiv.innerHTML = '<p>语音识别失败</p>';
}
});
} else {
resultDiv.innerHTML = '<p>文件上传失败</p>';
}
});
};
mediaRecorder.start();
recordButton.disabled = true;
recordButton.textContent = '正在录制...';
setTimeout(() => {
mediaRecorder.stop();
stream.getTracks().forEach(track => track.stop());
recordButton.disabled = false;
recordButton.textContent = '开始录制';
}, 5000); // 录制5秒
} catch (error) {
resultDiv.innerHTML = `<p>错误: ${error.message}</p>`;
}
});
</script>
</body>
</html>
使用说明
- 配置讯飞API:在代码中替换
XUNFEI_APPID
、XUNFEI_API_KEY
、XUNFEI_API_SECRET
为您的实际密钥。 - 运行应用:
pip install flask requests python app.py
- 访问页面:打开浏览器访问
http://127.0.0.1:5000
(或局域网IP地址)。 - 操作流程:
- 在文本框中输入参考文本。
- 点击“开始录制”按钮(需浏览器允许麦克风权限)。
- 录制完成后自动上传音频并显示识别结果和准确率。
注意事项
- 录音限制:示例默认录制5秒,可根据需求调整
setTimeout
时间。 - 文件存储:音频文件临时存储在
uploads
文件夹,建议添加自动清理机制。 - 准确性计算:当前使用简单单词匹配计算准确率,实际应用中可能需要更复杂的NLP处理。
- 浏览器兼容性:录音功能依赖
MediaRecorder
API,需现代浏览器支持(如Chrome、Firefox)。
请确保已正确配置讯飞语音服务,并注意录音时的环境噪音对识别结果的影响。