Linux客户端:xiaozhi-esp32桌面端应用开发指南
概述
xiaozhi-esp32是一个基于ESP32的开源AI聊天机器人项目,支持语音交互、物联网控制和多协议通信。本文将详细介绍如何为该项目开发Linux桌面客户端应用,实现与ESP32设备的无缝交互。
核心通信协议
WebSocket通信基础
Linux客户端与ESP32设备通过WebSocket协议进行通信,支持双向实时数据交换:
协议消息结构
所有通信消息采用JSON格式,基本结构如下:
{
"session_id": "unique_session_id",
"type": "message_type",
"payload": {
"jsonrpc": "2.0",
"method": "method_name",
"params": {},
"id": 1,
"result": {}
}
}
MCP协议详解
初始化流程
Linux客户端首先需要初始化MCP会话:
# MCP初始化请求示例
initialize_request = {
"jsonrpc": "2.0",
"method": "initialize",
"params": {
"capabilities": {
"vision": {
"url": "http://localhost:8080/vision",
"token": "vision_token"
}
}
},
"id": 1
}
工具发现机制
客户端通过tools/list
方法发现设备支持的所有功能:
# 获取工具列表请求
tools_list_request = {
"jsonrpc": "2.0",
"method": "tools/list",
"params": {"cursor": ""},
"id": 2
}
设备响应包含所有可用工具的信息:
{
"jsonrpc": "2.0",
"id": 2,
"result": {
"tools": [
{
"name": "self.audio_speaker.set_volume",
"description": "设置设备音量",
"inputSchema": {
"type": "object",
"properties": {
"volume": {"type": "integer", "minimum": 0, "maximum": 100}
}
}
},
{
"name": "self.light.set_rgb",
"description": "设置RGB灯光颜色",
"inputSchema": {
"type": "object",
"properties": {
"r": {"type": "integer", "minimum": 0, "maximum": 255},
"g": {"type": "integer", "minimum": 0, "maximum": 255},
"b": {"type": "integer", "minimum": 0, "maximum": 255}
}
}
}
],
"nextCursor": ""
}
}
Linux客户端架构设计
系统架构图
核心模块功能
模块名称 | 功能描述 | 关键技术 |
---|---|---|
WebSocket客户端 | 建立和维护设备连接 | asyncio, websockets |
MCP协议处理器 | 解析和处理MCP消息 | JSON-RPC 2.0 |
音频处理器 | 音频录制和播放 | PyAudio, Opus编解码 |
设备管理器 | 设备发现和状态管理 | 多线程同步 |
图形界面 | 用户交互界面 | PyQt5/Tkinter |
开发环境搭建
依赖安装
# 创建Python虚拟环境
python -m venv xiaozhi-client
source xiaozhi-client/bin/activate
# 安装核心依赖
pip install websockets pyaudio opuslib pillow
pip install pyqt5 # 或使用其他GUI框架
# 可选:安装音频处理库
pip install numpy scipy
项目结构
xiaozhi-linux-client/
├── src/
│ ├── core/
│ │ ├── websocket_client.py
│ │ ├── mcp_protocol.py
│ │ ├── audio_processor.py
│ │ └── device_manager.py
│ ├── gui/
│ │ ├── main_window.py
│ │ ├── device_panel.py
│ │ ├── audio_panel.py
│ │ └── tools_panel.py
│ └── utils/
│ ├── config.py
│ ├── logger.py
│ └── helpers.py
├── assets/
│ ├── icons/
│ ├── sounds/
│ └── config/
├── tests/
└── requirements.txt
核心功能实现
WebSocket连接管理
import asyncio
import websockets
import json
from typing import Callable, Optional
class XiaozhiWebSocketClient:
def __init__(self, url: str, token: str):
self.url = url
self.token = token
self.websocket = None
self.connected = False
self.message_handlers = []
async def connect(self):
"""建立WebSocket连接"""
headers = {
"Authorization": f"Bearer {self.token}",
"Protocol-Version": "1",
"Device-Id": "linux-client",
"Client-Id": "linux-desktop-app"
}
try:
self.websocket = await websockets.connect(self.url, extra_headers=headers)
self.connected = True
await self._send_hello()
asyncio.create_task(self._receive_messages())
return True
except Exception as e:
print(f"连接失败: {e}")
return False
async def _send_hello(self):
"""发送Hello消息"""
hello_message = {
"type": "hello",
"version": 1,
"features": {"mcp": True},
"transport": "websocket",
"audio_params": {
"format": "opus",
"sample_rate": 16000,
"channels": 1,
"frame_duration": 60
}
}
await self.send_message(hello_message)
async def send_message(self, message: dict):
"""发送JSON消息"""
if self.connected and self.websocket:
await self.websocket.send(json.dumps(message))
async def _receive_messages(self):
"""接收和处理消息"""
try:
async for message in self.websocket:
data = json.loads(message)
await self._handle_message(data)
except websockets.exceptions.ConnectionClosed:
self.connected = False
print("连接已关闭")
async def _handle_message(self, message: dict):
"""处理接收到的消息"""
for handler in self.message_handlers:
await handler(message)
def add_message_handler(self, handler: Callable):
"""添加消息处理器"""
self.message_handlers.append(handler)
MCP协议处理器
class MCPHandler:
def __init__(self, websocket_client):
self.ws_client = websocket_client
self.request_id = 0
self.pending_requests = {}
self.available_tools = []
async def initialize(self):
"""初始化MCP会话"""
request_id = self._get_next_id()
request = {
"jsonrpc": "2.0",
"method": "initialize",
"params": {
"capabilities": {
"vision": {
"url": "http://localhost:8080/vision",
"token": "vision_token"
}
}
},
"id": request_id
}
# 包装为MCP消息
mcp_message = {
"type": "mcp",
"payload": request
}
await self.ws_client.send_message(mcp_message)
return await self._wait_for_response(request_id)
async def list_tools(self, cursor: str = ""):
"""获取工具列表"""
request_id = self._get_next_id()
request = {
"jsonrpc": "2.0",
"method": "tools/list",
"params": {"cursor": cursor},
"id": request_id
}
mcp_message = {
"type": "mcp",
"payload": request
}
await self.ws_client.send_message(mcp_message)
response = await self._wait_for_response(request_id)
if response and "result" in response:
self.available_tools = response["result"].get("tools", [])
return self.available_tools
return []
async def call_tool(self, tool_name: str, arguments: dict):
"""调用设备工具"""
request_id = self._get_next_id()
request = {
"jsonrpc": "2.0",
"method": "tools/call",
"params": {
"name": tool_name,
"arguments": arguments
},
"id": request_id
}
mcp_message = {
"type": "mcp",
"payload": request
}
await self.ws_client.send_message(mcp_message)
return await self._wait_for_response(request_id)
def _get_next_id(self):
"""获取下一个请求ID"""
self.request_id += 1
return self.request_id
async def _wait_for_response(self, request_id: int, timeout: int = 10):
"""等待特定请求的响应"""
# 实现响应等待逻辑
pass
音频处理模块
import pyaudio
import opuslib
import numpy as np
from threading import Thread, Event
class AudioProcessor:
def __init__(self, sample_rate=16000, channels=1, frame_duration=60):
self.sample_rate = sample_rate
self.channels = channels
self.frame_size = (sample_rate * frame_duration) // 1000
self.audio = pyaudio.PyAudio()
self.encoder = opuslib.Encoder(sample_rate, channels, opuslib.APPLICATION_VOIP)
self.decoder = opuslib.Decoder(sample_rate, channels)
self.recording = False
self.playing = False
self.record_thread = None
self.play_thread = None
def start_recording(self, callback):
"""开始录音"""
self.recording = True
self.record_thread = Thread(target=self._record_loop, args=(callback,))
self.record_thread.start()
def _record_loop(self, callback):
"""录音循环"""
stream = self.audio.open(
format=pyaudio.paInt16,
channels=self.channels,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.frame_size
)
while self.recording:
data = stream.read(self.frame_size)
pcm_data = np.frombuffer(data, dtype=np.int16)
encoded_data = self.encoder.encode(pcm_data.tobytes(), self.frame_size)
callback(encoded_data)
stream.stop_stream()
stream.close()
def play_audio(self, opus_data):
"""播放Opus音频"""
pcm_data = self.decoder.decode(opus_data, self.frame_size)
stream = self.audio.open(
format=pyaudio.paInt16,
channels=self.channels,
rate=self.sample_rate,
output=True
)
stream.write(pcm_data)
stream.stop_stream()
stream.close()
def stop_recording(self):
"""停止录音"""
self.recording = False
if self.record_thread:
self.record_thread.join()
def cleanup(self):
"""清理资源"""
self.stop_recording()
self.audio.terminate()
图形界面设计
主界面布局
from PyQt5.QtWidgets import (QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QListWidget, QTextEdit, QSlider)
from PyQt5.QtCore import Qt, pyqtSignal
class MainWindow(QMainWindow):
connection_changed = pyqtSignal(bool)
tool_called = pyqtSignal(str, dict)
def __init__(self):
super().__init__()
self.setWindowTitle("xiaozhi Linux客户端")
self.setGeometry(100, 100, 800, 600)
self.central_widget = QWidget()
self.setCentralWidget(self.central_widget)
self.layout = QHBoxLayout(self.central_widget)
self._setup_ui()
self.connected = False
self.device_tools = []
def _setup_ui(self):
# 左侧设备面板
left_panel = QWidget()
left_layout = QVBoxLayout(left_panel)
self.connection_btn = QPushButton("连接设备")
self.connection_btn.clicked.connect(self.toggle_connection)
self.device_list = QListWidget()
self.tools_list = QListWidget()
left_layout.addWidget(self.connection_btn)
left_layout.addWidget(QLabel("可用设备:"))
left_layout.addWidget(self.device_list)
left_layout.addWidget(QLabel("设备工具:"))
left_layout.addWidget(self.tools_list)
# 右侧控制面板
right_panel = QWidget()
right_layout = QVBoxLayout(right_panel)
self.volume_slider = QSlider(Qt.Horizontal)
self.volume_slider.setRange(0, 100)
self.volume_slider.setValue(50)
self.volume_slider.valueChanged.connect(self.on_volume_changed)
self.chat_display = QTextEdit()
self.chat_display.setReadOnly(True)
self.record_btn = QPushButton("开始录音")
self.record_btn.clicked.connect(self.toggle_recording)
right_layout.addWidget(QLabel("音量控制:"))
right_layout.addWidget(self.volume_slider)
right_layout.addWidget(QLabel("交互记录:"))
right_layout.addWidget(self.chat_display)
right_layout.addWidget(self.record_btn)
self.layout.addWidget(left_panel, 1)
self.layout.addWidget(right_panel, 2)
def toggle_connection(self):
"""切换设备连接状态"""
if not self.connected:
self.connect_to_device()
else:
self.disconnect_from_device()
def connect_to_device(self):
"""连接到设备"""
# 实现连接逻辑
self.connection_btn.setText("断开连接")
self.connected = True
self.connection_changed.emit(True)
def disconnect_from_device(self):
"""断开设备连接"""
self.connection_btn.setText("连接设备")
self.connected = False
self.connection_changed.emit(False)
def on_volume_changed(self, value):
"""音量调节处理"""
if self.connected:
self.tool_called.emit("self.audio_speaker.set_volume", {"volume": value})
def toggle_recording(self):
"""切换录音状态"""
# 实现录音控制逻辑
pass
def update_tools_list(self, tools):
"""更新工具列表"""
self.tools_list.clear()
for tool in tools:
self.tools_list.addItem(f"{tool['name']} - {tool['description']}")
设备控制示例
常用工具调用
# 音量控制
await mcp_handler.call_tool("self.audio_speaker.set_volume", {"volume": 75})
# RGB灯光控制
await mcp_handler.call_tool("self.light.set_rgb", {
"r": 255,
"g": 0,
"b": 0
})
# 机器人移动控制
await mcp_handler.call_tool("self.dog.forward", {})
# 摄像头控制
await mcp_handler.call_tool("self.camera.set_camera_flipped", {})
状态监控
class DeviceMonitor:
def __init__(self, mcp_handler):
self.mcp_handler = mcp_handler
self.device_status = {}
self.monitoring = False
async def start_monitoring(self):
"""开始监控设备状态"""
self.monitoring = True
while self.monitoring:
await self._update_status()
await asyncio.sleep(5) # 每5秒更新一次
async def _update_status(self):
"""更新设备状态"""
try:
# 获取设备状态信息
status = await self.mcp_handler.call_tool(
"self.get_device_status", {}
)
if status and "result" in status:
self.device_status = status["result"]
except Exception as e:
print(f"状态更新失败: {e}")
def get_battery_level(self):
"""获取电池电量"""
return self.device_status.get("battery_level", 0)
def get_network_status(self):
"""获取网络状态"""
return self.device_status.get("network
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考