shape = frames[0].shape[:2]

    results = []
    for anno in tqdm(my_part):
        frames = extract_frame(anno['filename'])
        det_results = detection_inference(det_model, frames)
        # * Get detection results for human
        det_results = [x[0] for x in det_results]
        for i, res in enumerate(det_results):
            # * filter boxes with small scores
            res = res[res[:, 4] >= args.det_score_thr]
            # * filter boxes with small areas
            box_areas = (res[:, 3] - res[:, 1]) * (res[:, 2] - res[:, 0])
            assert np.all(box_areas >= 0)
            res = res[box_areas >= args.det_area_thr]
            det_results[i] = res

        shape = frames[0].shape[:2]
        anno['img_shape'] = shape
        anno = pose_inference(anno, pose_model, frames, det_results, 
                              compress=args.compress)
        anno.pop('filename')
        results.append(anno)

shape = frames[0].shape[:2]
anno['img_shape'] = shape

shape = frames[0].shape[:2] 这段代码涉及到 Python 的列表切片语法。

首先来看 frames[0]frames 是一个列表或者数组,包含了从视频中提取的若干帧。frames[0] 表示获取列表中第一个元素,即第一帧。

接下来是 .shapeframes[0] 表示一个图像或视频帧,它有宽、高和通道数三个属性,这些属性构成了这个图像的形状。.shape 可以获取这个形状信息。

最后是 [:2]。这是 Python 列表切片的语法,用于获取列表的部分元素。[:2] 表示获取从索引 0 到索引 1 (不包括索引 2) 的元素,也就是获取宽和高两个维度的信息,不包括通道数

所以 shape = frames[0].shape[:2] 的作用是:

  1. 获取 frames 列表中的第一个元素,也就是第一帧图像。
  2. 获取这个第一帧图像的形状信息。
  3. 只保留形状中的宽和高两个维度,不包括通道数。
  4. 将这个宽高信息赋值给变量 shape

这样做的目的是为了记录下视频帧的尺寸信息,以便后续的姿态估计处理。

 anno = pose_inference(anno, pose_model, frames, det_results, compress=args.compress)
 anno.pop('filename')
 results.append(anno)

这段代码涉及到了对anno字典的处理:

  1. anno = pose_inference(anno, pose_model, frames, det_results, compress=args.compress)

    • 这一行使用预训练的姿态估计模型pose_model对当前的anno字典、提取的视频帧frames以及目标检测结果det_results进行姿态估计。
    • 估计结果被保存到anno字典中。
    • compress=args.compress表示是否压缩结果。
  2. anno.pop('filename')

    • 这一行从anno字典中移除'filename'键值对。
    • 之前已经提取了视频帧,不再需要'filename'信息了。
  3. results.append(anno)

    • 这一行将更新后的anno字典添加到results列表中。
    • results列表用于保存最终的所有结果。

总结一下这段代码的作用:

  1. 使用姿态估计模型对当前的anno字典进行处理,获得姿态估计结果并更新到anno字典中。
  2. anno字典中移除不再需要的'filename'信息。
  3. 将更新后的anno字典添加到results列表中,以便后续使用。

这个过程是整个计算流程的一个重要步骤,用于将目标检测和姿态估计的结果组织成一个统一的数据结构,以便后续的分析和处理。

pyskl/tools/data/custom_2d_skeleton.py at main · kennymckormick/pyskl · GitHub

import cv2 import numpy as np import zmq import time import threading from enum import Enum import os #!/usr/bin/env python3 import sys sys.path.append("/home/xiaoyuan/diffusion_policy_graspping/src/arm_control") from hdx_fuse_sdk.hdx_config import HDXConfig #from hdx_fuse_sdk.blev2 import BLEClientHandler from hdx_fuse_sdk.blev3 import BLEClientHandler #from hdx_fuse_sdk.goal_nav import MoveBaseClient from hdx_fuse_sdk.robot_actuator import HDXRobot import pyrealsense2 as rs class Single_Arm_Inference: def __init__(self, port_name='/dev/ttyUSB0'): self.hdxRobot = HDXRobot(port_name) def get_joints_position(self): # self.hdxRobot.common_write(HDXConfig.ADDR__MODE_SELECTION, HDXConfig.CURRENT_MODE, byte_length=1) # 设置力矩模式 self.hdxRobot.set_torque(flag=True) # 获取关节数据 joints_pos = self.hdxRobot.get_robot_joints() joints_vel = self.hdxRobot.get_robot_velocity() return joints_pos class Double_Arm_Inference: def __init__(self, right_port_name='/dev/ttyUSB0', left_port_name='/dev/ttyUSB1'): self.right_arm = Single_Arm_Inference(right_port_name) self.right_dex = BLEClientHandler(ble_address = "C4:23:12:07:15:B5") self.left_arm = Single_Arm_Inference(left_port_name) self.left_dex = BLEClientHandler(ble_address = "C4:23:12:07:14:15") class CameraThread(threading.Thread): def __init__(self): super().__init__() self.pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) self.pipeline.start(config) self.latest_frame = None self.lock = threading.Lock() self.running = True self.daemon = True # 设置为守护线程,主程序退出时自动结束 def run(self): while self.running: try: frames = self.pipeline.wait_for_frames(timeout_ms=1000) color_frame = frames.get_color_frame() if color_frame: color_image = np.asanyarray(color_frame.get_data()) #color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR) with self.lock: self.latest_frame = color_image except Exception as e: print(f"Camera error: {e}") time.sleep(0.1) def get_latest_frame(self): with self.lock: return self.latest_frame.copy() if self.latest_frame is not None else None def stop(self): self.running = False self.join() self.pipeline.stop() # 定义机器人观测数据结构 class RobotObsShape: HEAD_IMAGE_SHAPE = (480, 640, 3) HEAD_IMAGE_SIZE = HEAD_IMAGE_SHAPE[0] * HEAD_IMAGE_SHAPE[1] * HEAD_IMAGE_SHAPE[2] WRIST_IMAGE_SHAPE = (480, 640, 3) WRIST_IMAGE_SIZE = WRIST_IMAGE_SHAPE[0] * WRIST_IMAGE_SHAPE[1] * WRIST_IMAGE_SHAPE[2] STATE_SHAPE = (7,) STATE_SIZE = STATE_SHAPE[0] * 4 # float32占4字节 CHUNK_SIZE = HEAD_IMAGE_SIZE + WRIST_IMAGE_SIZE + STATE_SIZE DOF = 7 # 自由度 NUM_ACTIONS = 8 # 动作数量 ACTIONS_SHAPE = (NUM_ACTIONS, DOF) ACTIONS_SIZE = NUM_ACTIONS * DOF * 4 # 总动作数据大小 # 机器人状态枚举 class RobotState(str, Enum): IDLE = "IDLE" INITALIZING = "INITALIZING" ACTING = "ACTING" FINISHED = "FINISHED" class Client: def __init__(self, port=15558): self.context = zmq.Context() self.socket = self.context.socket(zmq.REQ) self.socket.connect(f"tcp://192.168.31.101:{port}") self.socket.RCVTIMEO = 20000 # 设置接收超时为1秒 self.running = True self.state = RobotState.IDLE print(f"Client connected to port {port}") # 启动相机线程 self.camera_thread = CameraThread() self.camera_thread.start() # 等待相机初始化 while self.camera_thread.get_latest_frame() is None and self.running: print("等待相机初始化...") time.sleep(0.5) def create_message(self): """创建符合协议的消息字节流""" # 确保图像尺寸正确 if self.head_img.shape != RobotObsShape.HEAD_IMAGE_SHAPE: self.head_img = cv2.resize(self.head_img, (RobotObsShape.HEAD_IMAGE_SHAPE[1], RobotObsShape.HEAD_IMAGE_SHAPE[0])) if self.wrist_img.shape != RobotObsShape.WRIST_IMAGE_SHAPE: self.wrist_img = cv2.resize(self.wrist_img, (RobotObsShape.WRIST_IMAGE_SHAPE[1], RobotObsShape.WRIST_IMAGE_SHAPE[0])) # 创建消息缓冲区 buffer = bytearray(RobotObsShape.CHUNK_SIZE) # 填充头部图像数据 head_bytes = self.head_img.tobytes() buffer[:len(head_bytes)] = head_bytes # 填充腕部图像数据 start = RobotObsShape.HEAD_IMAGE_SIZE end = start + RobotObsShape.WRIST_IMAGE_SIZE wrist_bytes = self.wrist_img.tobytes() buffer[start:end] = wrist_bytes # 填充关节角度数据 start = end end = start + RobotObsShape.STATE_SIZE buffer[start:end] = self.joint_angles.tobytes() return bytes(buffer) def parse_actions(self, action_bytes): """解析接收到的动作数据""" # 验证消息长度 if len(action_bytes) != RobotObsShape.ACTIONS_SIZE: return None actions = np.frombuffer(action_bytes, dtype=np.float32) return actions.reshape(RobotObsShape.ACTIONS_SHAPE) def run(self): """主循环:发送观察数据并接收动作""" iteration = 0 # 读取开始几张图像 等待相机稳定输出图像 frame = self.camera_thread.get_latest_frame() if frame is None: time.sleep(0.1) self.head_img = frame self.wrist_img = frame # 使用相同图像 self.joint_angles = np.array(yuanyouarm.left_arm.get_joints_position()) while self.running: # 获取图像 - 每次循环都更新 # 从相机线程获取最新帧 frame = self.camera_thread.get_latest_frame() if frame is None: time.sleep(0.1) continue self.head_img = frame self.wrist_img = frame # 使用相同图像 self.joint_angles = np.array(yuanyouarm.left_arm.get_joints_position()) #self.joint_angles = yuanyouarm.left_arm.get_joints_position() # print(f"joints1 pos: {yuanyouarm.left_arm.get_joints_position()}") # print(f"joints2 pos: {self.joint_angles}") # 显示图像 - 每次循环都显示 cv2.imshow("Head Camera", self.head_img) # 处理按键事件 key = cv2.waitKey(1) & 0xFF if key == ord('q'): self.running = False break elif key == ord('a'): self.state = RobotState.ACTING print("State changed to ACTING") elif key == ord('i'): self.state = RobotState.INITALIZING print("State changed to INITIALIZING") elif key == ord('f'): self.state = RobotState.FINISHED print("State changed to FINISHED") elif key == ord('s'): cv2.imwrite("saved_images/head_image.jpg", self.head_img) cv2.imwrite("saved_images/wrist_image.jpg", self.wrist_img) print("Images saved to saved_images/ directory") # 状态处理 if self.state == RobotState.INITALIZING: # 初始关节角度 min_read_flag, min_result_list = yuanyouarm.left_arm.hdxRobot.common_read(HDXConfig.ADDR_MIN_POSITION, byte_length=4, flag_unsigned=False) self.joint_angles = np.array([254.021978021978,39.32967032967033,17.208791208791208,291.4725274725275,119.56043956043956,75.05494505494505]) goal_Position = yuanyouarm.left_arm.hdxRobot._angle_to_value(self.joint_angles.tolist(),min_result_list) yuanyouarm.left_arm.hdxRobot.move_trackless(goal_Position) self.state = RobotState.IDLE # 修正:正确更新状态 print("Initialization complete") elif self.state == RobotState.ACTING: iteration += 1 message = self.create_message() try: print(f"Sending observation #{iteration}...") self.socket.send(message) print(1) # 接收动作响应 try: print(2) action_bytes = self.socket.recv() print(3) except zmq.Again: print(4) print("Receive timeout, retrying...") continue print(5) actions = self.parse_actions(action_bytes) if actions is not None: print("\nReceived actions:") for i, action in enumerate(actions): min_read_flag, min_result_list = yuanyouarm.left_arm.hdxRobot.common_read(HDXConfig.ADDR_MIN_POSITION, byte_length=4, flag_unsigned=False) self.joint_angles = action[:6] goal_Position = yuanyouarm.left_arm.hdxRobot._angle_to_value(self.joint_angles.tolist(),min_result_list) yuanyouarm.left_arm.hdxRobot.move_trackless(goal_Position) #time.sleep(0.2) if action[6] >= 0.95: gripper = "CLOSE" else: gripper = "OPEN" print(f"Action {i+1}: Joints={self.joint_angles}, Gripper={gripper}") # 如果夹爪关闭,结束动作 if gripper == "CLOSE": self.state = RobotState.FINISHED except Exception as e: print(f"Error occurred: {e}") self.state = RobotState.FINISHED elif self.state == RobotState.FINISHED: print("Task finished") self.running = False elif self.state == RobotState.IDLE: print("Idle state. Press 'a' to start sending messages.") cv2.destroyAllWindows() self.socket.close() self.context.term() self.camera_thread.stop() print("Client shutdown complete.") if __name__ == "__main__": yuanyouarm = Double_Arm_Inference() yuanyouarm.left_arm.hdxRobot.common_write(HDXConfig.ADDR__MODE_SELECTION, HDXConfig.POSITION_MODE, byte_length=HDXConfig.LEN_MODE_SELECTION) client = Client() client.run() # if __name__ == "__main__": # pipeline = rs.pipeline() # config = rs.config() # config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) # pipeline.start(config) # try: # yuanyouarm = Double_Arm_Inference() # yuanyouarm.left_arm.hdxRobot.common_write(HDXConfig.ADDR__MODE_SELECTION, HDXConfig.POSITION_MODE, byte_length=HDXConfig.LEN_MODE_SELECTION) # client = Client() # client.run() # print("Client shutdown complete.") # finally: # pipeline.stop() socket发送和接收出了问题
最新发布
07-19
class LoadStreams: # multiple IP or RTSP cameras def __init__(self, sources='streams.txt', img_size=416): self.mode = 'images' self.img_size = img_size if os.path.isfile(sources): with open(sources, 'r') as f: sources = [x.strip() for x in f.read().splitlines() if len(x.strip())] else: sources = [sources] n = len(sources) self.imgs = [None] * n self.sources = sources for i, s in enumerate(sources): # Start the thread to read frames from the video stream print('%g/%g: %s... ' % (i + 1, n, s), end='') cap = cv2.VideoCapture(0 if s == '0' else s) assert cap.isOpened(), 'Failed to open %s' % s w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) % 100 _, self.imgs[i] = cap.read() # guarantee first frame thread = Thread(target=self.update, args=([i, cap]), daemon=True) print(' success (%gx%g at %.2f FPS).' % (w, h, fps)) thread.start() print('') # newline # check for common shapes s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal if not self.rect: print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.') def update(self, index, cap): # Read next stream frame in a daemon thread n = 0 while cap.isOpened(): n += 1 # _, self.imgs[index] = cap.read() cap.grab() if n == 4: # read every 4th frame _, self.imgs[index] = cap.retrieve() n = 0 time.sleep(0.01) # wait time def __iter__(self): self.count = -1 return self def __next__(self): self.count += 1 img0 = self.imgs.copy() if cv2.waitKey(1) == ord('q'): # q to quit cv2.destroyAllWindows() raise StopIteration # Letterbox img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0] # Stack img = np.stack(img, 0) # Convert img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 img = np.ascontiguousarray(img) return self.sources, img, img0, None def __len__(self): return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years 我想换摄像头设备,但在这个函数里出错了,ttributeError: 'NoneType' object has no attribute 'shape'
03-22
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值