IO统计工具blockStatus.py源码

1.介绍

        用来统计一些IO信息的脚步,会生成一个excel,列出trace里面的io请求,每个进程的IO量,单个进程的IO状态统计,需要可以用下,不好的是数据量大的情况下很慢

抓trace要打开shduce,需要打开多个节点:

 echo 1 > /sys/kernel/tracing/events/block/enable

2.使用

        执行方式:python3 blockStatus.py trace_1.html 3083,3083指要看的pid的io状态

       

        python blockStatus.py  ruby_w_rw171_15138.html --process 15138

3.效果
————————————————

                            版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
                        
原文链接:https://blog.csdn.net/zinjin_woxin/article/details/138753138

python blockStatus.py  ruby_w_rw171_15138.html --process 15138

import sys
import re
import openpyxl
import matplotlib.pyplot as plt
import os
import datetime
import numpy as np
from openpyxl.drawing.image import Image
from collections import defaultdict
import argparse

def get_output_filename(log_file):
    """生成包含时间戳的输出文件名"""
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    base_name = os.path.basename(log_file)
    name_parts = os.path.splitext(base_name)
    return f"blk_analysis_{name_parts[0]}_{timestamp}.xlsx"

def parse_issue_line(line):
    """解析 block_rq_issue 事件行 - 增强兼容性"""
    # 尝试匹配完整格式
    regex = r'.*?\(\s*(\d+)\).*?(\d+\.\d+):\s+block_rq_issue:\s+([\d,]+)\s+(\w+)\s+\d+\s+\(\)\s+(\d+)\s+\+\s+(\d+).*?\[([^\]]+)\]'
    match = re.match(regex, line)
    if match:
        return {
            'pid': match.group(1).strip(),
            'time': float(match.group(2)),
            'device': match.group(3),
            'op': match.group(4),
            'offset': int(match.group(5)),
            'length': int(match.group(6)),
            'process': match.group(7),
            'type': 'issue'
        }
    
    # 尝试匹配简化格式(无PID)
    regex_simple = r'.*?(\d+\.\d+):\s+block_rq_issue:\s+.*?(\d+)\s+\+\s+(\d+).*?\[([^\]]+)\]'
    match = re.match(regex_simple, line)
    if match:
        return {
            'time': float(match.group(1)),
            'offset': int(match.group(2)),
            'length': int(match.group(3)),
            'process': match.group(4),
            'type': 'issue'
        }
    
    # 尝试匹配极简格式
    regex_minimal = r'.*?(\d+\.\d+):\s+block_rq_issue:\s+.*?(\d+)\s+\+\s+(\d+)'
    match = re.match(regex_minimal, line)
    if match:
        return {
            'time': float(match.group(1)),
            'offset': int(match.group(2)),
            'length': int(match.group(3)),
            'type': 'issue'
        }
    
    return None

def parse_complete_line(line):
    """解析 block_rq_complete 事件行 - 增强兼容性"""
    # 尝试匹配完整格式
    regex = r'.*?(\d+\.\d+):\s+block_rq_complete:\s+([\d,]+)\s+(\w+)?.*?\(\)\s+(\d+)\s+\+\s+(\d+).*?\[.*\]'
    match = re.match(regex, line)
    if match:
        device = match.group(2)
        op = match.group(3) if match.group(3) else 'N/A'
        return {
            'time': float(match.group(1)),
            'device': device,
            'op': op,
            'offset': int(match.group(4)),
            'length': int(match.group(5)),
            'type': 'complete'
        }
    
    # 尝试匹配简化格式
    regex_simple = r'.*?(\d+\.\d+):\s+block_rq_complete:\s+.*?(\d+)\s+\+\s+(\d+).*?\[.*\]'
    match = re.match(regex_simple, line)
    if match:
        return {
            'time': float(match.group(1)),
            'offset': int(match.group(2)),
            'length': int(match.group(3)),
            'type': 'complete'
        }
    
    # 尝试匹配极简格式
    regex_minimal = r'.*?(\d+\.\d+):\s+block_rq_complete:\s+.*?(\d+)\s+\+\s+(\d+)'
    match = re.match(regex_minimal, line)
    if match:
        return {
            'time': float(match.group(1)),
            'offset': int(match.group(2)),
            'length': int(match.group(3)),
            'type': 'complete'
        }
    
    return None

def parse_insert_line(line):
    """解析 block_rq_insert 事件行(可选)"""
    # 示例: NetworkService-3504    (   3083) [007] ...1.  3630.016754: block_rq_insert: 8,32 RA 8192 () 52952144 + 16 [NetworkService]
    regex = r'.*?\(\s*(\d+)\).*?(\d+\.\d+):\s+block_rq_insert:\s+.*?(\d+)\s+\+\s+(\d+)\s+\[(.+)\]'
    match = re.match(regex, line)
    if match:
        return {
            'pid': match.group(1).strip(),
            'time': float(match.group(2)),
            'offset': int(match.group(3)),
            'length': int(match.group(4)),
            'process': match.group(5),
            'type': 'insert'
        }
    
    # 简化格式
    regex_simple = r'.*?(\d+\.\d+):\s+block_rq_insert:\s+.*?(\d+)\s+\+\s+(\d+)'
    match = re.match(regex_simple, line)
    if match:
        return {
            'time': float(match.group(1)),
            'offset': int(match.group(2)),
            'length': int(match.group(3)),
            'type': 'insert'
        }
    
    return None

def process_io_events(file_path, output_file):
    """处理所有I/O事件并计算耗时 - 每次创建新工作簿"""
    print(f"开始处理I/O事件: {file_path}")
    print(f"将结果保存到: {output_file}")
    
    # 创建新工作簿
    workbook = openpyxl.Workbook()
    print(f"创建新工作簿: {output_file}")
    
    # 准备IO事件表
    sheet = workbook.active
    sheet.title = "IO Events"
    
    # 设置标题
    headers = [
        "PID", "Process", "Device", "Operation", 
        "Offset", "Length", 
        "Issue Time", "Complete Time", "Queue Time (ms)", 
        "Hardware Time (ms)", "Total Time (ms)"
    ]
    for col, header in enumerate(headers, 1):
        sheet.cell(row=1, column=col, value=header)
    
    # 尝试不同编码读取文件
    encodings = ['utf-8', 'latin-1', 'gb18030', 'gbk']
    lines = []
    
    print(f"尝试读取文件: {file_path}")
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding, errors='replace') as f:
                lines = f.readlines()
            print(f"成功使用 {encoding} 编码读取文件, 行数: {len(lines)}")
            break
        except UnicodeDecodeError:
            continue
    
    if not lines:
        print("错误: 无法用任何编码读取文件")
        return 0
    
    # 解析所有事件
    events = []
    print("开始解析I/O事件...")
    for i, line in enumerate(lines):
        if i % 10000 == 0 and i > 0:
            print(f"已处理 {i} 行, 解析到 {len(events)} 个事件")
            
        if 'block_rq_issue' in line:
            event = parse_issue_line(line)
            if event: 
                events.append(event)
                # 调试输出前5个issue事件
                if len(events) <= 5:
                    print(f"解析到issue事件: {event}")
        elif 'block_rq_complete' in line:
            event = parse_complete_line(line)
            if event: 
                events.append(event)
                # 调试输出前5个complete事件
                if len(events) <= 5:
                    print(f"解析到complete事件: {event}")
        elif 'block_rq_insert' in line:
            event = parse_insert_line(line)
            if event: 
                events.append(event)
                # 调试输出前5个insert事件
                if len(events) <= 5:
                    print(f"解析到insert事件: {event}")
    
    print(f"解析完成: 共 {len(events)} 个I/O事件")
    
    # 按偏移量和长度分组事件
    event_groups = defaultdict(list)
    for event in events:
        key = (event['offset'], event['length'])
        event_groups[key].append(event)
    
    print(f"事件分组完成: 共 {len(event_groups)} 个事件组")
    
    # 处理每个I/O请求
    row_idx = 2
    matched_count = 0
    error_count = 0
    total_groups = len(event_groups)

    print("开始处理事件组...")
    for group_idx, (key, group_events) in enumerate(event_groups.items()):
        if group_idx % 1000 == 0 and group_idx > 0:
            print(f"已处理 {group_idx}/{total_groups} 个事件组, 成功匹配 {matched_count} 个请求")
            
        try:
            # 按时间排序事件
            group_events.sort(key=lambda x: x['time'])
            
            # 提取事件类型
            issue_events = [e for e in group_events if e.get('type') == 'issue']
            complete_events = [e for e in group_events if e.get('type') == 'complete']
            insert_events = [e for e in group_events if e.get('type') == 'insert']
            
            # 必须至少有一个issue和一个complete事件
            if not issue_events or not complete_events:
                error_count += 1
                continue
            
            # 选择最早issue和最晚complete
            issue_event = issue_events[0]
            complete_event = complete_events[-1]
            
            # 验证时间顺序
            if issue_event['time'] > complete_event['time']:
                error_count += 1
                continue
            
            # 尝试获取insert事件(如果有)
            insert_event = insert_events[0] if insert_events else None
            
            # 安全获取所有字段
            pid = ""
            if issue_event and 'pid' in issue_event:
                pid = str(issue_event['pid'])
            elif insert_event and 'pid' in insert_event:
                pid = str(insert_event['pid'])
            
            process = ""
            if issue_event and 'process' in issue_event:
                process = str(issue_event['process'])
            elif insert_event and 'process' in insert_event:
                process = str(insert_event['process'])
            
            device = ""
            if issue_event and 'device' in issue_event:
                device = str(issue_event['device'])
            elif complete_event and 'device' in complete_event:
                device = str(complete_event['device'])
            
            operation = ""
            if issue_event and 'op' in issue_event:
                operation = str(issue_event['op'])
            elif complete_event and 'op' in complete_event:
                operation = str(complete_event['op'])
            
            # 计算时间
            issue_time = issue_event.get('time', 0)
            complete_time = complete_event.get('time', 0)
            
            # 计算排队时间
            if insert_event:
                insert_time = insert_event.get('time', issue_time)
                queue_time = issue_time - insert_time
            else:
                queue_time = 0  # 没有insert事件,排队时间为0
            
            hardware_time = complete_time - issue_time
            total_time = complete_time - (insert_event['time'] if insert_event else issue_time)
            
            # 转换为毫秒
            queue_time_ms = queue_time * 1000
            hardware_time_ms = hardware_time * 1000
            total_time_ms = total_time * 1000
            
            # 写入Excel
            sheet.cell(row=row_idx, column=1, value=pid or 'N/A')
            sheet.cell(row=row_idx, column=2, value=process or 'N/A')
            sheet.cell(row=row_idx, column=3, value=device or 'N/A')
            sheet.cell(row=row_idx, column=4, value=operation or 'N/A')
            sheet.cell(row=row_idx, column=5, value=key[0])
            sheet.cell(row=row_idx, column=6, value=key[1])
            sheet.cell(row=row_idx, column=7, value=issue_time)
            sheet.cell(row=row_idx, column=8, value=complete_time)
            sheet.cell(row=row_idx, column=9, value=queue_time_ms)
            sheet.cell(row=row_idx, column=10, value=hardware_time_ms)
            sheet.cell(row=row_idx, column=11, value=total_time_ms)
            
            row_idx += 1
            matched_count += 1
            
        except Exception as e:
            error_count += 1
            # 调试输出前5个错误信息
            if error_count <= 5:
                print(f"处理事件组时出错: {str(e)}")
                print(f"事件组: {key}")
                for i, event in enumerate(group_events):
                    print(f"  事件 {i+1}: {event}")

    # 添加统计信息工作表
    summary_sheet = workbook.create_sheet("Summary")
    summary_sheet.cell(row=1, column=1, value="统计项")
    summary_sheet.cell(row=1, column=2, value="数量")
    summary_sheet.cell(row=2, column=1, value="解析到的事件总数")
    summary_sheet.cell(row=2, column=2, value=len(events))
    summary_sheet.cell(row=3, column=1, value="成功匹配的事件组")
    summary_sheet.cell(row=3, column=2, value=matched_count)
    summary_sheet.cell(row=4, column=1, value="处理出错的事件组")
    summary_sheet.cell(row=4, column=2, value=error_count)
    summary_sheet.cell(row=5, column=1, value="事件组总数")
    summary_sheet.cell(row=5, column=2, value=len(event_groups))
    
    print("保存工作簿...")
    workbook.save(output_file)
    print(f"处理完成: 成功匹配 {matched_count} 个I/O请求, 出错 {error_count} 次, 共 {len(event_groups)} 个事件组")
    return output_file  # 返回输出文件名供后续使用

def generate_time_analysis(file_path):
    """生成I/O耗时分析报告"""
    try:
        workbook = openpyxl.load_workbook(file_path)
        sheet = workbook["IO Events"]
        
        # 创建分析表
        if "Time Analysis" in workbook.sheetnames:
            analysis_sheet = workbook["Time Analysis"]
            # 清除旧数据但保留标题
            for row in range(analysis_sheet.max_row, 1, -1):
                analysis_sheet.delete_rows(row)
        else:
            analysis_sheet = workbook.create_sheet(title="Time Analysis")
        
        # 设置标题
        headers = [
            "Metric", "Count", "Min (ms)", "Max (ms)", 
            "Avg (ms)", "90th % (ms)", "95th % (ms)", "99th % (ms)"
        ]
        for col, header in enumerate(headers, 1):
            analysis_sheet.cell(row=1, column=col, value=header)
        
        # 收集时间数据
        queue_times = []
        hardware_times = []
        total_times = []
        
        for row in range(2, sheet.max_row + 1):
            queue_time = sheet.cell(row=row, column=9).value
            hardware_time = sheet.cell(row=row, column=10).value
            total_time = sheet.cell(row=row, column=11).value
            
            if isinstance(queue_time, (int, float)) and queue_time >= 0:
                queue_times.append(queue_time)
            if isinstance(hardware_time, (int, float)) and hardware_time >= 0:
                hardware_times.append(hardware_time)
            if isinstance(total_time, (int, float)) and total_time >= 0:
                total_times.append(total_time)
        
        # 计算统计信息
        def calculate_stats(times, name):
            if not times:
                return [name, 0, "N/A", "N/A", "N/A", "N/A", "N/A", "N/A"]
            
            times_sorted = sorted(times)
            count = len(times)
            min_val = min(times)
            max_val = max(times)
            avg_val = sum(times) / count
            p90 = np.percentile(times_sorted, 90)
            p95 = np.percentile(times_sorted, 95)
            p99 = np.percentile(times_sorted, 99)
            
            return [name, count, min_val, max_val, avg_val, p90, p95, p99]
        
        # 写入统计信息
        stats = [
            calculate_stats(queue_times, "Queue Time"),
            calculate_stats(hardware_times, "Hardware Time"),
            calculate_stats(total_times, "Total Time")
        ]
        
        for row, stat_row in enumerate(stats, 2):
            for col, value in enumerate(stat_row, 1):
                analysis_sheet.cell(row=row, column=col, value=value)
        
        # 生成时间分布直方图
        plt.figure(figsize=(12, 8))
        
        # 队列时间分布
        plt.subplot(3, 1, 1)
        if queue_times:
            plt.hist(queue_times, bins=50, alpha=0.7, color='blue')
            plt.title('Queue Time Distribution (ms)')
            plt.xlabel('Queue Time (ms)')
            plt.ylabel('Count')
        else:
            plt.text(0.5, 0.5, 'No Queue Time Data', 
                     ha='center', va='center', fontsize=12)
        
        # 硬件时间分布
        plt.subplot(3, 1, 2)
        if hardware_times:
            plt.hist(hardware_times, bins=50, alpha=0.7, color='green')
            plt.title('Hardware Time Distribution (ms)')
            plt.xlabel('Hardware Time (ms)')
            plt.ylabel('Count')
        else:
            plt.text(0.5, 0.5, 'No Hardware Time Data', 
                     ha='center', va='center', fontsize=12)
        
        # 总时间分布
        plt.subplot(3, 1, 3)
        if total_times:
            plt.hist(total_times, bins=50, alpha=0.7, color='red')
            plt.title('Total Time Distribution (ms)')
            plt.xlabel('Total Time (ms)')
            plt.ylabel('Count')
        else:
            plt.text(0.5, 0.5, 'No Total Time Data', 
                     ha='center', va='center', fontsize=12)
        
        plt.tight_layout()
        time_hist_img = "time_distribution.png"
        plt.savefig(time_hist_img)
        plt.close()
        
        # 添加图片到Excel
        img = Image(time_hist_img)
        analysis_sheet.add_image(img, 'J2')
        
        # 保存工作簿
        workbook.save(file_path)
        print(f"生成时间分析报告: {time_hist_img}")
        
    except Exception as e:
        print(f"生成时间分析报告时出错: {str(e)}")

def generate_io_size_analysis(file_path):
    """生成I/O大小分析报告"""
    try:
        workbook = openpyxl.load_workbook(file_path)
        sheet = workbook["IO Events"]
        
        # 创建分析表
        if "IO Size Analysis" in workbook.sheetnames:
            size_sheet = workbook["IO Size Analysis"]
            # 清除旧数据但保留标题
            for row in range(size_sheet.max_row, 1, -1):
                size_sheet.delete_rows(row)
        else:
            size_sheet = workbook.create_sheet(title="IO Size Analysis")
        
        # 设置标题
        headers = [
            "IO Size (KB)", "Count", "Avg Queue (ms)", "Avg Hardware (ms)", 
            "Avg Total (ms)", "Max Queue (ms)", "Max Hardware (ms)", "Max Total (ms)"
        ]
        for col, header in enumerate(headers, 1):
            size_sheet.cell(row=1, column=col, value=header)
        
        # 按I/O大小分组
        size_groups = defaultdict(lambda: {'count': 0, 'queue_sum': 0, 'hardware_sum': 0, 
                                          'total_sum': 0, 'max_queue': 0, 'max_hardware': 0, 'max_total': 0})
        
        for row in range(2, sheet.max_row + 1):
            io_size = sheet.cell(row=row, column=6).value  # Length in bytes
            queue_time = sheet.cell(row=row, column=9).value
            hardware_time = sheet.cell(row=row, column=10).value
            total_time = sheet.cell(row=row, column=11).value
            
            if io_size is None or not isinstance(io_size, (int, float)):
                continue
            
            # 转换为KB
            size_kb = int(io_size / 1024)
            group = size_groups[size_kb]
            
            group['count'] += 1
            
            if isinstance(queue_time, (int, float)):
                group['queue_sum'] += queue_time
                group['max_queue'] = max(group['max_queue'], queue_time)
            
            if isinstance(hardware_time, (int, float)):
                group['hardware_sum'] += hardware_time
                group['max_hardware'] = max(group['max_hardware'], hardware_time)
            
            if isinstance(total_time, (int, float)):
                group['total_sum'] += total_time
                group['max_total'] = max(group['max_total'], total_time)
        
        # 计算平均值并写入Excel
        row_idx = 2
        for size_kb, data in sorted(size_groups.items()):
            count = data['count']
            avg_queue = data['queue_sum'] / count if count > 0 and data['queue_sum'] > 0 else 0
            avg_hardware = data['hardware_sum'] / count if count > 0 and data['hardware_sum'] > 0 else 0
            avg_total = data['total_sum'] / count if count > 0 and data['total_sum'] > 0 else 0
            
            size_sheet.cell(row=row_idx, column=1, value=size_kb)
            size_sheet.cell(row=row_idx, column=2, value=count)
            size_sheet.cell(row=row_idx, column=3, value=avg_queue)
            size_sheet.cell(row=row_idx, column=4, value=avg_hardware)
            size_sheet.cell(row=row_idx, column=5, value=avg_total)
            size_sheet.cell(row=row_idx, column=6, value=data['max_queue'])
            size_sheet.cell(row=row_idx, column=7, value=data['max_hardware'])
            size_sheet.cell(row=row_idx, column=8, value=data['max_total'])
            
            row_idx += 1
        
        # 生成I/O大小与耗时关系图
        sizes = sorted(size_groups.keys())
        avg_queues = [size_groups[k]['queue_sum'] / size_groups[k]['count'] for k in sizes if size_groups[k]['count'] > 0]
        avg_hardwares = [size_groups[k]['hardware_sum'] / size_groups[k]['count'] for k in sizes if size_groups[k]['count'] > 0]
        avg_totals = [size_groups[k]['total_sum'] / size_groups[k]['count'] for k in sizes if size_groups[k]['count'] > 0]
        
        plt.figure(figsize=(10, 6))
        plt.plot(sizes, avg_queues, 'bo-', label='Avg Queue Time')
        plt.plot(sizes, avg_hardwares, 'go-', label='Avg Hardware Time')
        plt.plot(sizes, avg_totals, 'ro-', label='Avg Total Time')
        
        plt.xlabel('I/O Size (KB)')
        plt.ylabel('Time (ms)')
        plt.title('I/O Size vs Average Latency')
        plt.legend()
        plt.grid(True)
        
        size_time_img = "size_vs_latency.png"
        plt.savefig(size_time_img)
        plt.close()
        
        # 添加图片到Excel
        img = Image(size_time_img)
        size_sheet.add_image(img, 'J2')
        
        # 保存工作簿
        workbook.save(file_path)
        print(f"生成I/O大小分析报告: {size_time_img}")
        
    except Exception as e:
        print(f"生成I/O大小分析报告时出错: {str(e)}")

def generate_operation_analysis(file_path):
    """生成操作类型分析报告"""
    try:
        workbook = openpyxl.load_workbook(file_path)
        sheet = workbook["IO Events"]
        
        # 创建分析表
        if "Operation Analysis" in workbook.sheetnames:
            op_sheet = workbook["Operation Analysis"]
            # 清除旧数据但保留标题
            for row in range(op_sheet.max_row, 1, -1):
                op_sheet.delete_rows(row)
        else:
            op_sheet = workbook.create_sheet(title="Operation Analysis")
        
        # 设置标题
        headers = [
            "Operation", "Count", "Avg Queue (ms)", "Avg Hardware (ms)", 
            "Avg Total (ms)", "Max Queue (ms)", "Max Hardware (ms)", "Max Total (ms)"
        ]
        for col, header in enumerate(headers, 1):
            op_sheet.cell(row=1, column=col, value=header)
        
        # 按操作类型分组
        op_groups = defaultdict(lambda: {'count': 0, 'queue_sum': 0, 'hardware_sum': 0, 
                                        'total_sum': 0, 'max_queue': 0, 'max_hardware': 0, 'max_total': 0})
        
        for row in range(2, sheet.max_row + 1):
            operation = sheet.cell(row=row, column=4).value  # Operation
            queue_time = sheet.cell(row=row, column=9).value
            hardware_time = sheet.cell(row=row, column=10).value
            total_time = sheet.cell(row=row, column=11).value
            
            if not operation:
                continue
            
            group = op_groups[operation]
            group['count'] += 1
            
            if isinstance(queue_time, (int, float)):
                group['queue_sum'] += queue_time
                group['max_queue'] = max(group['max_queue'], queue_time)
            
            if isinstance(hardware_time, (int, float)):
                group['hardware_sum'] += hardware_time
                group['max_hardware'] = max(group['max_hardware'], hardware_time)
            
            if isinstance(total_time, (int, float)):
                group['total_sum'] += total_time
                group['max_total'] = max(group['max_total'], total_time)
        
        # 计算平均值并写入Excel
        row_idx = 2
        for operation, data in sorted(op_groups.items()):
            count = data['count']
            avg_queue = data['queue_sum'] / count if count > 0 else 0
            avg_hardware = data['hardware_sum'] / count if count > 0 else 0
            avg_total = data['total_sum'] / count if count > 0 else 0
            
            op_sheet.cell(row=row_idx, column=1, value=operation)
            op_sheet.cell(row=row_idx, column=2, value=count)
            op_sheet.cell(row=row_idx, column=3, value=avg_queue)
            op_sheet.cell(row=row_idx, column=4, value=avg_hardware)
            op_sheet.cell(row=row_idx, column=5, value=avg_total)
            op_sheet.cell(row=row_idx, column=6, value=data['max_queue'])
            op_sheet.cell(row=row_idx, column=7, value=data['max_hardware'])
            op_sheet.cell(row=row_idx, column=8, value=data['max_total'])
            
            row_idx += 1
        
        # 生成操作类型分布图
        operations = list(op_groups.keys())
        counts = [op_groups[op]['count'] for op in operations]
        avg_totals = [op_groups[op]['total_sum'] / op_groups[op]['count'] for op in operations if op_groups[op]['count'] > 0]
        
        fig, ax1 = plt.subplots(figsize=(12, 6))
        
        # 柱状图:操作计数
        ax1.bar(operations, counts, color='b', alpha=0.7)
        ax1.set_xlabel('Operation Type')
        ax1.set_ylabel('Count', color='b')
        ax1.tick_params(axis='y', labelcolor='b')
        
        # 折线图:平均总时间
        ax2 = ax1.twinx()
        ax2.plot(operations, avg_totals, 'ro-', linewidth=2)
        ax2.set_ylabel('Average Total Time (ms)', color='r')
        ax2.tick_params(axis='y', labelcolor='r')
        
        plt.title('Operation Type Analysis')
        plt.xticks(rotation=45)
        plt.tight_layout()
        
        op_analysis_img = "operation_analysis.png"
        plt.savefig(op_analysis_img)
        plt.close()
        
        # 添加图片到Excel
        img = Image(op_analysis_img)
        op_sheet.add_image(img, 'J2')
        
        # 保存工作簿
        workbook.save(file_path)
        print(f"生成操作类型分析报告: {op_analysis_img}")
        
    except Exception as e:
        print(f"生成操作类型分析报告时出错: {str(e)}")

def analyze_process_io(file_path, process_info):
    """
    分析特定进程的I/O情况
    :param file_path: Excel文件路径
    :param process_info: 进程信息,可以是PID或进程名
    """
    try:
        workbook = openpyxl.load_workbook(file_path)
        if "IO Events" not in workbook.sheetnames:
            print("错误: 未找到'IO Events'工作表")
            return
        
        sheet = workbook["IO Events"]
        
        # 创建进程分析表
        sheet_name = f"Process {process_info} Analysis"
        if sheet_name in workbook.sheetnames:
            process_sheet = workbook[sheet_name]
            # 清除旧数据但保留标题
            for row in range(process_sheet.max_row, 1, -1):
                process_sheet.delete_rows(row)
        else:
            process_sheet = workbook.create_sheet(title=sheet_name)
        
        # 设置标题
        headers = [
            "PID", "Process", "Device", "Operation", 
            "Offset", "Length", 
            "Issue Time", "Complete Time", "Queue Time (ms)", 
            "Hardware Time (ms)", "Total Time (ms)"
        ]
        for col, header in enumerate(headers, 1):
            process_sheet.cell(row=1, column=col, value=header)
        
        # 收集进程相关的IO事件
        process_events = []
        row_idx = 2
        
        for row in range(2, sheet.max_row + 1):
            pid = sheet.cell(row=row, column=1).value
            process_name = sheet.cell(row=row, column=2).value
            
            # 检查是否匹配目标进程
            pid_match = (str(pid) == str(process_info))
            name_match = (process_name and str(process_name) == str(process_info))
            
            if pid_match or name_match:
                # 复制整行数据
                for col in range(1, len(headers) + 1):
                    process_sheet.cell(row=row_idx, column=col, value=sheet.cell(row=row, column=col).value)
                
                # 收集事件数据用于后续分析
                event_data = {
                    'pid': pid,
                    'process': process_name,
                    'device': sheet.cell(row=row, column=3).value,
                    'operation': sheet.cell(row=row, column=4).value,
                    'offset': sheet.cell(row=row, column=5).value,
                    'length': sheet.cell(row=row, column=6).value,
                    'queue_time': sheet.cell(row=row, column=9).value,
                    'hardware_time': sheet.cell(row=row, column=10).value,
                    'total_time': sheet.cell(row=row, column=11).value
                }
                process_events.append(event_data)
                row_idx += 1
        
        if not process_events:
            print(f"未找到进程 '{process_info}' 的I/O事件")
            return
        
        print(f"找到 {len(process_events)} 个进程 '{process_info}' 的I/O事件")
        
        # 添加统计信息
        process_sheet.cell(row=row_idx+1, column=1, value="统计项")
        process_sheet.cell(row=row_idx+1, column=2, value="数值")
        process_sheet.cell(row=row_idx+2, column=1, value="总I/O请求数")
        process_sheet.cell(row=row_idx+2, column=2, value=len(process_events))
        
        # 计算平均耗时
        total_queue = sum(e['queue_time'] for e in process_events if isinstance(e['queue_time'], (int, float)))
        total_hardware = sum(e['hardware_time'] for e in process_events if isinstance(e['hardware_time'], (int, float)))
        total_total = sum(e['total_time'] for e in process_events if isinstance(e['total_time'], (int, float)))
        
        avg_queue = total_queue / len(process_events) if process_events else 0
        avg_hardware = total_hardware / len(process_events) if process_events else 0
        avg_total = total_total / len(process_events) if process_events else 0
        
        process_sheet.cell(row=row_idx+3, column=1, value="平均排队时间(ms)")
        process_sheet.cell(row=row_idx+3, column=2, value=avg_queue)
        process_sheet.cell(row=row_idx+4, column=1, value="平均硬件时间(ms)")
        process_sheet.cell(row=row_idx+4, column=2, value=avg_hardware)
        process_sheet.cell(row=row_idx+5, column=1, value="平均总时间(ms)")
        process_sheet.cell(row=row_idx+5, column=2, value=avg_total)
        
        # 分析I/O大小分布
        size_groups = defaultdict(int)
        for event in process_events:
            if isinstance(event['length'], (int, float)):
                size_kb = int(event['length'] / 1024)  # 转换为KB
                size_groups[size_kb] += 1
        
        # 添加I/O大小分布标题
        process_sheet.cell(row=row_idx+7, column=1, value="I/O大小分布(KB)")
        process_sheet.cell(row=row_idx+7, column=2, value="数量")
        
        # 写入I/O大小分布
        size_row = row_idx+8
        for size_kb, count in sorted(size_groups.items()):
            process_sheet.cell(row=size_row, column=1, value=size_kb)
            process_sheet.cell(row=size_row, column=2, value=count)
            size_row += 1
        
        # 生成I/O大小分布图
        if size_groups:
            sizes = list(size_groups.keys())
            counts = list(size_groups.values())
            
            plt.figure(figsize=(10, 6))
            plt.bar(sizes, counts, color='skyblue')
            plt.xlabel('I/O Size (KB)')
            plt.ylabel('Count')
            plt.title(f'I/O Size Distribution for Process {process_info}')
            plt.grid(True, linestyle='--', alpha=0.7)
            
            size_dist_img = f"size_dist_{process_info}.png"
            plt.savefig(size_dist_img)
            plt.close()
            
            # 添加图片到Excel
            img = Image(size_dist_img)
            process_sheet.add_image(img, 'D' + str(row_idx+7))
        
        # 分析操作类型分布
        op_groups = defaultdict(int)
        for event in process_events:
            if event['operation']:
                op_groups[event['operation']] += 1
        
        # 添加操作类型分布标题
        process_sheet.cell(row=row_idx+7, column=4, value="操作类型")
        process_sheet.cell(row=row_idx+7, column=5, value="数量")
        
        # 写入操作类型分布
        op_row = row_idx+8
        for op, count in sorted(op_groups.items()):
            process_sheet.cell(row=op_row, column=4, value=op)
            process_sheet.cell(row=op_row, column=5, value=count)
            op_row += 1
        
        # 生成操作类型分布图
        if op_groups:
            operations = list(op_groups.keys())
            counts = list(op_groups.values())
            
            plt.figure(figsize=(10, 6))
            plt.pie(counts, labels=operations, autopct='%1.1f%%', startangle=90)
            plt.axis('equal')
            plt.title(f'Operation Type Distribution for Process {process_info}')
            
            op_dist_img = f"op_dist_{process_info}.png"
            plt.savefig(op_dist_img)
            plt.close()
            
            # 添加图片到Excel
            img = Image(op_dist_img)
            process_sheet.add_image(img, 'G' + str(row_idx+7))
        
        # 分析耗时分布
        total_times = [e['total_time'] for e in process_events if isinstance(e['total_time'], (int, float))]
        if total_times:
            plt.figure(figsize=(10, 6))
            plt.hist(total_times, bins=50, color='purple', alpha=0.7)
            plt.xlabel('Total Time (ms)')
            plt.ylabel('Count')
            plt.title(f'Total Time Distribution for Process {process_info}')
            plt.grid(True)
            
            time_dist_img = f"time_dist_{process_info}.png"
            plt.savefig(time_dist_img)
            plt.close()
            
            # 添加图片到Excel
            img = Image(time_dist_img)
            process_sheet.add_image(img, 'J' + str(row_idx+7))
        
        # 保存工作簿
        workbook.save(file_path)
        print(f"生成进程 '{process_info}' 的分析报告")
        
    except Exception as e:
        print(f"分析进程 '{process_info}' 的I/O时出错: {str(e)}")

if __name__ == "__main__":
    start_time = datetime.datetime.now()
    print(f"分析开始时间: {start_time}")
    
    # 设置命令行参数解析
    parser = argparse.ArgumentParser(description='块设备I/O分析工具')
    parser.add_argument('log_file', help='日志文件路径')
    parser.add_argument('--time', action='store_true', help='执行时间分析')
    parser.add_argument('--size', action='store_true', help='执行I/O大小分析')
    parser.add_argument('--op', action='store_true', help='执行操作类型分析')
    parser.add_argument('--process', help='分析特定进程(PID或进程名)')
    
    # 解析命令行参数
    args = parser.parse_args()
    
    file_path = args.log_file
    print(f"分析文件: {file_path}")
    
    # 生成输出文件名(包含时间戳)
    output_file = get_output_filename(file_path)
    print(f"输出文件: {output_file}")
    
    # 处理I/O事件并创建新工作簿
    process_io_events(file_path, output_file)
    
    # 确定分析范围
    full_analysis = not (args.time or args.size or args.op or args.process)
    print(f"分析模式: {'完整分析' if full_analysis else '指定分析'}")
    
    # 执行全局分析
    if full_analysis or args.time:
        print("执行时间分析...")
        generate_time_analysis(output_file)
    if full_analysis or args.size:
        print("执行I/O大小分析...")
        generate_io_size_analysis(output_file)
    if full_analysis or args.op:
        print("执行操作类型分析...")
        generate_operation_analysis(output_file)
    
    # 执行进程分析
    if args.process:
        print(f"开始分析进程: {args.process}")
        analyze_process_io(output_file, args.process)
    
    end_time = datetime.datetime.now()
    elapsed = end_time - start_time
    print(f"分析完成, 耗时 {elapsed.total_seconds():.2f} 秒")
    print(f"结果保存在: {output_file}")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值