public class Test { private static final String API_KEY = "Luxsan20250701"; private static final String MODEL_NAME = "/models/Qwen2.5-VL-72B-Instruct"; public static void main(String[] args) { ChatLanguageModel model = QwenChatModel.builder() .apiKey(API_KEY) .modelName(MODEL_NAME) // 使用多模态模型 .build(); // 图像URL String img = "C:\\Users\\Administrator\\Desktop\\FILE_CONTENT_u.png"; // 创建图像消息 ImageContent imageContent = ImageContent.from(img); UserMessage userMessage = new UserMessage( Arrays.asList( TextContent.from("请识别图片中的所有文字"), imageContent ) ); // 调用模型 Response<AiMessage> response = model.generate(userMessage); // 打印结果 System.out.println(response.content().text()); } }报错C:\Java\jdk-17\bin\java.exe "-javaagent:D:\work\IntelliJ IDEA 2024.3\lib\idea_rt.jar=51998:D:\work\IntelliJ IDEA 2024.3\bin" -Dfile.encoding=UTF-8 -classpath D:\maven_use\lingxi-lhc\lingxi-ai-extend\lingxi-ai-comparison\target\classes;D:\maven_use\repository\org\springframework\boot\spring-boot-starter-web\3.4.4\spring-boot-starter-web-3.4.4.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-starter\3.4.4\spring-boot-starter-3.4.4.jar;D:\maven_use\repository\org\springframework\boot\spring-boot\3.4.4\spring-boot-3.4.4.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-autoconfigure\3.4.4\spring-boot-autoconfigure-3.4.4.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-starter-logging\3.4.4\spring-boot-starter-logging-3.4.4.jar;D:\maven_use\repository\ch\qos\logback\logback-classic\1.5.18\logback-classic-1.5.18.jar;D:\maven_use\repository\ch\qos\logback\logback-core\1.5.18\logback-core-1.5.18.jar;D:\maven_use\repository\org\apache\logging\log4j\log4j-to-slf4j\2.24.3\log4j-to-slf4j-2.24.3.jar;D:\maven_use\repository\org\apache\logging\log4j\log4j-api\2.24.3\log4j-api-2.24.3.jar;D:\maven_use\repository\org\slf4j\jul-to-slf4j\2.0.17\jul-to-slf4j-2.0.17.jar;D:\maven_use\repository\jakarta\annotation\jakarta.annotation-api\2.1.1\jakarta.annotation-api-2.1.1.jar;D:\maven_use\repository\org\springframework\spring-core\6.2.5\spring-core-6.2.5.jar;D:\maven_use\repository\org\springframework\spring-jcl\6.2.5\spring-jcl-6.2.5.jar;D:\maven_use\repository\org\yaml\snakeyaml\2.3\snakeyaml-2.3.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-starter-json\3.4.4\spring-boot-starter-json-3.4.4.jar;D:\maven_use\repository\com\fasterxml\jackson\datatype\jackson-datatype-jdk8\2.18.3\jackson-datatype-jdk8-2.18.3.jar;D:\maven_use\repository\com\fasterxml\jackson\datatype\jackson-datatype-jsr310\2.18.3\jackson-datatype-jsr310-2.18.3.jar;D:\maven_use\repository\com\fasterxml\jackson\module\jackson-module-parameter-names\2.18.3\jackson-module-parameter-names-2.18.3.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-starter-tomcat\3.4.4\spring-boot-starter-tomcat-3.4.4.jar;D:\maven_use\repository\org\apache\tomcat\embed\tomcat-embed-core\10.1.39\tomcat-embed-core-10.1.39.jar;D:\maven_use\repository\org\apache\tomcat\embed\tomcat-embed-el\10.1.39\tomcat-embed-el-10.1.39.jar;D:\maven_use\repository\org\apache\tomcat\embed\tomcat-embed-websocket\10.1.39\tomcat-embed-websocket-10.1.39.jar;D:\maven_use\repository\org\springframework\spring-web\6.2.5\spring-web-6.2.5.jar;D:\maven_use\repository\org\springframework\spring-beans\6.2.5\spring-beans-6.2.5.jar;D:\maven_use\repository\io\micrometer\micrometer-observation\1.14.5\micrometer-observation-1.14.5.jar;D:\maven_use\repository\io\micrometer\micrometer-commons\1.14.5\micrometer-commons-1.14.5.jar;D:\maven_use\repository\org\springframework\spring-webmvc\6.2.5\spring-webmvc-6.2.5.jar;D:\maven_use\repository\org\springframework\spring-aop\6.2.5\spring-aop-6.2.5.jar;D:\maven_use\repository\org\springframework\spring-context\6.2.5\spring-context-6.2.5.jar;D:\maven_use\repository\org\springframework\spring-expression\6.2.5\spring-expression-6.2.5.jar;D:\maven_use\repository\org\apache\pdfbox\pdfbox\2.0.29\pdfbox-2.0.29.jar;D:\maven_use\repository\org\apache\pdfbox\fontbox\2.0.29\fontbox-2.0.29.jar;D:\maven_use\repository\commons-logging\commons-logging\1.2\commons-logging-1.2.jar;D:\maven_use\repository\org\json\json\20231013\json-20231013.jar;D:\maven_use\repository\com\hankcs\hanlp\portable-1.8.4\hanlp-portable-1.8.4.jar;D:\maven_use\lingxi-lhc\lingxi-ai-common\lingxi-ai-common-core\target\classes;D:\maven_use\repository\org\springframework\spring-context-support\6.2.5\spring-context-support-6.2.5.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-starter-validation\3.4.4\spring-boot-starter-validation-3.4.4.jar;D:\maven_use\repository\org\hibernate\validator\hibernate-validator\8.0.2.Final\hibernate-validator-8.0.2.Final.jar;D:\maven_use\repository\jakarta\validation\jakarta.validation-api\3.0.2\jakarta.validation-api-3.0.2.jar;D:\maven_use\repository\com\fasterxml\classmate\1.7.0\classmate-1.7.0.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-starter-aop\3.4.4\spring-boot-starter-aop-3.4.4.jar;D:\maven_use\repository\org\aspectj\aspectjweaver\1.9.23\aspectjweaver-1.9.23.jar;D:\maven_use\repository\org\apache\commons\commons-lang3\3.17.0\commons-lang3-3.17.0.jar;D:\maven_use\repository\jakarta\servlet\jakarta.servlet-api\6.0.0\jakarta.servlet-api-6.0.0.jar;D:\maven_use\repository\cn\hutool\hutool-core\5.8.35\hutool-core-5.8.35.jar;D:\maven_use\repository\cn\hutool\hutool-http\5.8.35\hutool-http-5.8.35.jar;D:\maven_use\repository\cn\hutool\hutool-extra\5.8.35\hutool-extra-5.8.35.jar;D:\maven_use\repository\cn\hutool\hutool-setting\5.8.35\hutool-setting-5.8.35.jar;D:\maven_use\repository\cn\hutool\hutool-log\5.8.35\hutool-log-5.8.35.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-configuration-processor\3.4.4\spring-boot-configuration-processor-3.4.4.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-properties-migrator\3.4.4\spring-boot-properties-migrator-3.4.4.jar;D:\maven_use\repository\org\springframework\boot\spring-boot-configuration-metadata\3.4.4\spring-boot-configuration-metadata-3.4.4.jar;D:\maven_use\repository\com\vaadin\external\google\android-json\0.0.20131108.vaadin1\android-json-0.0.20131108.vaadin1.jar;D:\maven_use\repository\io\github\linpeilie\mapstruct-plus-spring-boot-starter\1.4.6\mapstruct-plus-spring-boot-starter-1.4.6.jar;D:\maven_use\repository\io\github\linpeilie\mapstruct-plus\1.4.6\mapstruct-plus-1.4.6.jar;D:\maven_use\repository\org\mapstruct\mapstruct\1.5.5.Final\mapstruct-1.5.5.Final.jar;D:\maven_use\repository\io\github\linpeilie\mapstruct-plus-object-convert\1.4.6\mapstruct-plus-object-convert-1.4.6.jar;D:\maven_use\repository\org\lionsoul\ip2region\2.7.0\ip2region-2.7.0.jar;D:\maven_use\repository\net\sourceforge\tess4j\tess4j\5.8.0\tess4j-5.8.0.jar;D:\maven_use\repository\net\java\dev\jna\jna\5.13.0\jna-5.13.0.jar;D:\maven_use\repository\com\github\jai-imageio\jai-imageio-core\1.4.0\jai-imageio-core-1.4.0.jar;D:\maven_use\repository\org\apache\pdfbox\pdfbox-tools\2.0.29\pdfbox-tools-2.0.29.jar;D:\maven_use\repository\org\apache\pdfbox\pdfbox-debugger\2.0.29\pdfbox-debugger-2.0.29.jar;D:\maven_use\repository\org\apache\pdfbox\jbig2-imageio\3.0.4\jbig2-imageio-3.0.4.jar;D:\maven_use\repository\commons-io\commons-io\2.15.0\commons-io-2.15.0.jar;D:\maven_use\repository\net\sourceforge\lept4j\lept4j\1.18.1\lept4j-1.18.1.jar;D:\maven_use\repository\org\jboss\jboss-vfs\3.2.17.Final\jboss-vfs-3.2.17.Final.jar;D:\maven_use\repository\org\jboss\logging\jboss-logging\3.6.1.Final\jboss-logging-3.6.1.Final.jar;D:\maven_use\repository\org\slf4j\slf4j-api\2.0.17\slf4j-api-2.0.17.jar;D:\maven_use\repository\org\openpnp\opencv\4.5.5-1\opencv-4.5.5-1.jar;D:\maven_use\repository\com\fasterxml\jackson\core\jackson-databind\2.18.3\jackson-databind-2.18.3.jar;D:\maven_use\repository\com\fasterxml\jackson\core\jackson-annotations\2.18.3\jackson-annotations-2.18.3.jar;D:\maven_use\repository\com\fasterxml\jackson\core\jackson-core\2.18.3\jackson-core-2.18.3.jar;D:\maven_use\repository\com\fasterxml\jackson\dataformat\jackson-dataformat-xml\2.18.3\jackson-dataformat-xml-2.18.3.jar;D:\maven_use\repository\org\codehaus\woodstox\stax2-api\4.2.2\stax2-api-4.2.2.jar;D:\maven_use\repository\com\fasterxml\woodstox\woodstox-core\7.0.0\woodstox-core-7.0.0.jar;D:\maven_use\repository\org\apache\httpcomponents\httpclient\4.5.13\httpclient-4.5.13.jar;D:\maven_use\repository\org\apache\httpcomponents\httpcore\4.4.16\httpcore-4.4.16.jar;D:\maven_use\repository\commons-codec\commons-codec\1.17.2\commons-codec-1.17.2.jar;D:\maven_use\repository\jakarta\xml\bind\jakarta.xml.bind-api\4.0.0\jakarta.xml.bind-api-4.0.0.jar;D:\maven_use\repository\jakarta\activation\jakarta.activation-api\2.1.3\jakarta.activation-api-2.1.3.jar;D:\maven_use\repository\me\zhyd\oauth\JustAuth\1.16.7\JustAuth-1.16.7.jar;D:\maven_use\repository\com\xkcoding\http\simple-http\1.0.5\simple-http-1.0.5.jar;D:\maven_use\repository\com\alibaba\fastjson\1.2.83\fastjson-1.2.83.jar;D:\maven_use\repository\com\squareup\okhttp3\okhttp\4.9.3\okhttp-4.9.3.jar;D:\maven_use\repository\com\squareup\okio\okio\2.8.0\okio-2.8.0.jar;D:\maven_use\repository\org\jetbrains\kotlin\kotlin-stdlib-common\1.9.25\kotlin-stdlib-common-1.9.25.jar;D:\maven_use\repository\org\jetbrains\kotlin\kotlin-stdlib\1.9.25\kotlin-stdlib-1.9.25.jar;D:\maven_use\repository\org\jetbrains\annotations\13.0\annotations-13.0.jar;D:\maven_use\repository\dev\langchain4j\langchain4j\0.29.1\langchain4j-0.29.1.jar;D:\maven_use\repository\com\squareup\retrofit2\retrofit\2.9.0\retrofit-2.9.0.jar;D:\maven_use\repository\org\apache\opennlp\opennlp-tools\1.9.4\opennlp-tools-1.9.4.jar;D:\maven_use\repository\org\jsoup\jsoup\1.16.1\jsoup-1.16.1.jar;D:\maven_use\repository\dev\langchain4j\langchain4j-dashscope\0.29.1\langchain4j-dashscope-0.29.1.jar;D:\maven_use\repository\com\alibaba\dashscope-sdk-java\2.10.1\dashscope-sdk-java-2.10.1.jar;D:\maven_use\repository\com\google\guava\guava\32.1.1-jre\guava-32.1.1-jre.jar;D:\maven_use\repository\com\google\guava\failureaccess\1.0.1\failureaccess-1.0.1.jar;D:\maven_use\repository\com\google\guava\listenablefuture\9999.0-empty-to-avoid-conflict-with-guava\listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar;D:\maven_use\repository\com\google\code\findbugs\jsr305\3.0.2\jsr305-3.0.2.jar;D:\maven_use\repository\org\checkerframework\checker-qual\3.33.0\checker-qual-3.33.0.jar;D:\maven_use\repository\com\google\j2objc\j2objc-annotations\2.8\j2objc-annotations-2.8.jar;D:\maven_use\repository\io\reactivex\rxjava2\rxjava\2.2.21\rxjava-2.2.21.jar;D:\maven_use\repository\org\reactivestreams\reactive-streams\1.0.4\reactive-streams-1.0.4.jar;D:\maven_use\repository\org\slf4j\slf4j-simple\2.0.17\slf4j-simple-2.0.17.jar;D:\maven_use\repository\com\squareup\okhttp3\logging-interceptor\4.11.0\logging-interceptor-4.11.0.jar;D:\maven_use\repository\org\jetbrains\kotlin\kotlin-stdlib-jdk8\1.9.25\kotlin-stdlib-jdk8-1.9.25.jar;D:\maven_use\repository\org\jetbrains\kotlin\kotlin-stdlib-jdk7\1.9.25\kotlin-stdlib-jdk7-1.9.25.jar;D:\maven_use\repository\com\squareup\okhttp3\okhttp-sse\4.11.0\okhttp-sse-4.11.0.jar;D:\maven_use\repository\dev\langchain4j\langchain4j-core\0.29.1\langchain4j-core-0.29.1.jar;D:\maven_use\repository\com\google\code\gson\gson\2.11.0\gson-2.11.0.jar;D:\maven_use\repository\com\google\errorprone\error_prone_annotations\2.27.0\error_prone_annotations-2.27.0.jar com.luxsan.service.Test SLF4J(W): Class path contains multiple SLF4J providers. SLF4J(W): Found provider [ch.qos.logback.classic.spi.LogbackServiceProvider@42f30e0a] SLF4J(W): Found provider [org.slf4j.simple.SimpleServiceProvider@24273305] SLF4J(W): See https://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J(I): Actual provider is of type [ch.qos.logback.classic.spi.LogbackServiceProvider@42f30e0a] Exception in thread "main" java.lang.IllegalArgumentException: Illegal character in opaque part at index 2: C:\Users\Administrator\Desktop\FILE_CONTENT_u.png at java.base/java.net.URI.create(URI.java:906) at dev.langchain4j.data.message.ImageContent.<init>(ImageContent.java:59) at dev.langchain4j.data.message.ImageContent.from(ImageContent.java:196) at com.luxsan.service.Test.main(Test.java:26) Caused by: java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\Users\Administrator\Desktop\FILE_CONTENT_u.png at java.base/java.net.URI$Parser.fail(URI.java:2974) at java.base/java.net.URI$Parser.checkChars(URI.java:3145) at java.base/java.net.URI$Parser.parse(URI.java:3181) at java.base/java.net.URI.<init>(URI.java:623) at java.base/java.net.URI.create(URI.java:904) ... 3 more Process finished with exit code 1

时间: 2025-07-30 19:13:39 浏览: 19
<think>根据错误日志,问题在于创建URI时路径中包含非法字符。具体错误是: `java.lang.IllegalArgumentException: Illegal character in opaque part at index 2: C:\Users\Administrator\Desktop\FILE_CONTENT_u.png` 这是因为Windows文件路径中的反斜杠`\`和冒号`:`在URI中属于非法字符(特别是在opaque部分)。 在`ImageContent.from`方法中,它期望一个合法的URI字符串或URI对象。而Windows路径直接作为字符串传入会包含非法字符。 ### 解决方案: 1. **使用File对象转换为URI**(推荐): ```java File imageFile = new File("C:\\Users\\Administrator\\Desktop\\FILE_CONTENT_u.png"); ImageContent imageContent = ImageContent.from(imageFile.toURI()); ``` 2. **将文件路径转换为URL格式**(注意转义): 将反斜杠替换为正斜杠,并在路径前添加`file:/`(注意:需要转义冒号等字符,但使用`File`对象更安全)。 ### 修改后的代码: ```java import java.io.File; public class Test { private static final String API_KEY = "Luxsan20250701"; private static final String MODEL_NAME = "qwen-vl-plus"; // 注意:模型名称应为字符串常量,而非路径格式 public static void main(String[] args) { ChatLanguageModel model = QwenChatModel.builder() .apiKey(API_KEY) .modelName(MODEL_NAME) // 使用多模态模型 .build(); // 创建图像文件对象 File imageFile = new File("C:\\Users\\Administrator\\Desktop\\FILE_CONTENT_u.png"); // 通过File的URI创建图像内容 ImageContent imageContent = ImageContent.from(imageFile.toURI()); UserMessage userMessage = new UserMessage( Arrays.asList( TextContent.from("请识别图片中的所有文字"), imageContent ) ); // 调用模型 Response<AiMessage> response = model.generate(userMessage); // 打印结果 System.out.println(response.content().text()); } } ``` ### 关键修改点: 1. **模型名称修正**:确保`MODEL_NAME`是有效的模型名称(如`"qwen-vl-plus"`),而不是路径格式(原代码中为`"/models/Qwen2.5-VL-72B-Instruct"`,这可能是错误的,应使用官方模型名)。 2. **图像路径处理**:使用`File`对象将本地文件路径转换为合法的URI(`file.toURI()`会生成`file:/C:/Users/...`格式的URI,符合规范)。 ### 注意事项: - 如果使用`QwenChatModel`(而非`QwenImageModel`),确保该类支持多模态输入(根据LangChain4j文档,`QwenChatModel`也支持图像内容)。 - 检查API密钥是否正确(示例中为硬编码,实际应使用安全方式存储)。 §§相关问题§§ 1. 如何处理Windows路径中的特殊字符以符合URI标准? 2. 除了文件URI,还有哪些方式可以将本地图片传递给千问OCR模型? 3. 如何验证千问OCR模型是否成功接收到图片并开始处理? 4. 在LangChain4j中,`QwenChatModel`和`QwenImageModel`在处理图像时有何区别? 5. 当图片较大时,如何优化上传速度或避免超时错误?</think>您的代码存在两个主要问题导致报错: ### 问题1:URI路径格式错误(核心问题) 错误信息:`Illegal character in opaque part at index 2: C:\Users\...` - **原因**:`ImageContent.from()` 方法要求传入 **URL格式的字符串**,但您直接使用了Windows本地文件路径 - **解决方案**:将本地路径转换为标准URI格式 ```java // 错误写法(直接使用本地路径): String img = "C:\\Users\\..."; // 正确写法(转换为file URI): String img = new File("C:\\Users\\Administrator\\Desktop\\FILE_CONTENT_u.png").toURI().toString(); ``` ### 问题2:模型名称错误 ```java private static final String MODEL_NAME = "/models/Qwen2.5-VL-72B-Instruct"; // 错误! ``` - **正确模型名称**(根据LangChain4j文档): - `qwen-vl-plus`(推荐) - `qwen-vl-max` - **修正**: ```java private static final String MODEL_NAME = "qwen-vl-plus"; // 正确格式 ``` ### 完整修正代码: ```java import java.io.File; public class Test { private static final String API_KEY = "Luxsan20250701"; private static final String MODEL_NAME = "qwen-vl-plus"; // 修正模型名 public static void main(String[] args) { ChatLanguageModel model = QwenChatModel.builder() .apiKey(API_KEY) .modelName(MODEL_NAME) .build(); // 将本地路径转换为标准URI String img = new File("C:\\Users\\Administrator\\Desktop\\FILE_CONTENT_u.png").toURI().toString(); ImageContent imageContent = ImageContent.from(img); UserMessage userMessage = new UserMessage( Arrays.asList( TextContent.from("请识别图片中的所有文字"), imageContent ) ); Response<AiMessage> response = model.generate(userMessage); System.out.println(response.content().text()); } } ``` ### 额外建议: 1. **多SLF4J绑定警告处理**: ```xml <!-- 在pom.xml中排除冲突的日志依赖 --> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter</artifactId> <exclusions> <exclusion> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-logging</artifactId> </exclusion> </exclusions> </dependency> ``` 2. **Base64方式**(备选方案): ```java import java.util.Base64; import java.nio.file.Files; // 读取图片为Base64 byte[] imageBytes = Files.readAllBytes(Paths.get("C:\\path\\to\\image.png")); String base64Image = Base64.getEncoder().encodeToString(imageBytes); ImageContent imageContent = ImageContent.from(base64Image, "image/png"); ```
阅读全文

相关推荐

java.lang.NoSuchMethodError: 'org.springframework.ai.chat.prompt.ChatOptions org.springframework.ai.chat.prompt.Prompt.getOptions()'@RequiredArgsConstructor @Service @Slf4j public class Param { private final ObjectMapper objectMapper = new ObjectMapper(); private static final String BASE_URL = "http://10.191.39.211/v1"; private static final String API_KEY = "gpustack_078939279603f717_9b9d51de18ee165f66f6c32eda290f08"; private static final String MODEL_NAME = "Qwen2.5-VL-72B-Instruct"; /** * 大模型返回的内容 * * @param file * @return * @throws IOException */ public String callAIModel(MultipartFile file) throws IOException { OpenAiApi openAiApi = OpenAiApi.builder() .apiKey(API_KEY) .baseUrl(BASE_URL) .build(); OpenAiChatOptions options = OpenAiChatOptions.builder() .model(MODEL_NAME) .maxTokens(200) .build(); ChatModel builder = OpenAiChatModel.builder().openAiApi(openAiApi).defaultOptions(options).build(); String contentType = file.getContentType(); String base64Data = Base64.getEncoder().encodeToString(file.getBytes()); MimeType mimeType = MimeType.valueOf(contentType); Media media = new Media(mimeType, base64Data); UserMessage userMessage = new UserMessage("解析文件", List.of(media)); return builder.call(userMessage); } 我的方法 <dependency> <groupId>org.springframework.ai</groupId> <artifactId>spring-ai-starter-model-openai</artifactId> </dependency> <dependency> <groupId>org.springframework.ai</groupId> <artifactId>spring-ai-core</artifactId> <version>0.8.1</version> </dependency> <dependency> <groupId>org.springframework.ai</groupId> <artifactId>spring-ai-openai</artifactId> </dependency>

public static String callQianwenAPI(String userInput) throws Exception { String API_URL = "http://10.191.39.243:8000/vl"; String API_KEY = "Luxsan20250701"; String model_name = "/models/Qwen2.5-VL-72B-Instruct"; // 1. 创建HTTP客户端 try (CloseableHttpClient httpClient = HttpClients.createDefault()) { // 2. 构建请求 HttpPost httpPost = new HttpPost(API_URL); httpPost.setHeader("Authorization", "Bearer " + API_KEY); httpPost.setHeader("Content-Type", "application/json"); httpPost.setHeader("X-DashScope-SSE", "enable"); // 3. 构建请求体 JSONObject requestBody = new JSONObject(); requestBody.put("model", model_name); // 模型名称 JSONObject input = new JSONObject(); JSONObject message = new JSONObject(); message.put("role", "user"); message.put("content", userInput); input.put("messages", new Object[]{message}); JSONObject parameters = new JSONObject(); parameters.put("result_format", "text"); // 返回纯文本格式 requestBody.put("input", input); requestBody.put("parameters", parameters); httpPost.setEntity(new StringEntity(requestBody.toString())); // 4. 发送请求并处理响应 try (CloseableHttpResponse response = httpClient.execute(httpPost)) { HttpEntity entity = response.getEntity(); if (entity != null) { String result = EntityUtils.toString(entity); // 5. 解析响应 JSONObject jsonResponse = new JSONObject(result); return jsonResponse.getJSONObject("output") .getString("text"); } } } return "未收到有效响应"; } JSONObject jsonResponse = new JSONObject(result);里面的result爆红

“http_request_duration_highr_seconds_bucket{le="0.01"} : "4847.0" http_request_duration_highr_seconds_bucket{le="0.1"} : "5045.0" http_request_duration_highr_seconds_bucket{le="0.05"} : "4859.0" http_request_duration_highr_seconds_bucket{le="0.5"} : "5931.0" http_request_duration_highr_seconds_bucket{le="0.025"} : "4853.0" http_request_duration_highr_seconds_bucket{le="0.25"} : "5278.0" http_request_duration_highr_seconds_bucket{le="0.075"} : "4866.0" http_request_duration_highr_seconds_bucket{le="0.75"} : "6660.0" http_request_duration_highr_seconds_bucket{le="1.0"} : "7405.0" http_request_duration_highr_seconds_bucket{le="1.5"} : "8334.0" http_request_duration_highr_seconds_bucket{le="2.0"} : "9196.0" http_request_duration_highr_seconds_bucket{le="2.5"} : "10073.0" http_request_duration_highr_seconds_bucket{le="3.0"} : "10949.0" http_request_duration_highr_seconds_bucket{le="3.5"} : "11679.0" http_request_duration_highr_seconds_bucket{le="4.0"} : "12333.0" http_request_duration_highr_seconds_bucket{le="4.5"} : "12829.0" http_request_duration_highr_seconds_bucket{le="5.0"} : "13181.0" http_request_duration_highr_seconds_bucket{le="7.5"} : "14314.0" http_request_duration_highr_seconds_bucket{le="10.0"} : "15519.0" http_request_duration_highr_seconds_bucket{le="30.0"} : "25617.0" http_request_duration_highr_seconds_bucket{le="60.0"} : "26110.0" http_request_duration_highr_seconds_bucket{le="+Inf"} : "26346.0" http_request_duration_highr_seconds_count : "26346.0" http_request_duration_highr_seconds_created : "1.7512560890372858e+09" http_request_duration_highr_seconds_sum : "248594.49104921706" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.1",method="POST"} : "226.0" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.5",method="POST"} : "1112.0" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="1.0",method="POST"} : "2586.0" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="+Inf",method="POST"} : "21527.0" http_request_duration_seconds_bucket{handler="/v1/models",le="0.1",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="/v1/models",le="0.5",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="/v1/models",le="1.0",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="/v1/models",le="+Inf",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="TRACE"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="TRACE"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="TRACE"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="TRACE"} : "3.0" http_request_duration_seconds_count{handler="/v1/chat/completions",method="POST"} : "21527.0" http_request_duration_seconds_count{handler="/v1/models",method="GET"} : "1.0" http_request_duration_seconds_count{handler="none",method="GET"} : "4693.0" http_request_duration_seconds_count{handler="none",method="HEAD"} : "6.0" http_request_duration_seconds_count{handler="none",method="OPTIONS"} : "12.0" http_request_duration_seconds_count{handler="none",method="POST"} : "95.0" http_request_duration_seconds_count{handler="none",method="PROPFIND"} : "3.0" http_request_duration_seconds_count{handler="none",method="PUT"} : "3.0" http_request_duration_seconds_count{handler="none",method="SEARCH"} : "3.0" http_request_duration_seconds_count{handler="none",method="TRACE"} : "3.0" http_request_duration_seconds_created{handler="/v1/chat/completions",method="POST"} : "1.7512560967123778e+09" http_request_duration_seconds_created{handler="/v1/models",method="GET"} : "1.7536925794242406e+09" http_request_duration_seconds_created{handler="none",method="GET"} : "1.7516341020108707e+09" http_request_duration_seconds_created{handler="none",method="HEAD"} : "1.751634176119915e+09" http_request_duration_seconds_created{handler="none",method="OPTIONS"} : "1.7516341579990425e+09" http_request_duration_seconds_created{handler="none",method="POST"} : "1.7516341771295128e+09" http_request_duration_seconds_created{handler="none",method="PROPFIND"} : "1.7516341696153226e+09" http_request_duration_seconds_created{handler="none",method="PUT"} : "1.7516349058165367e+09" http_request_duration_seconds_created{handler="none",method="SEARCH"} : "1.7516341693599503e+09" http_request_duration_seconds_created{handler="none",method="TRACE"} : "1.751634165566383e+09" http_request_duration_seconds_sum{handler="/v1/chat/completions",method="POST"} : "248593.6503553912" http_request_duration_seconds_sum{handler="/v1/models",method="GET"} : "0.0027880221605300903" http_request_duration_seconds_sum{handler="none",method="GET"} : "0.8171688430011272" http_request_duration_seconds_sum{handler="none",method="HEAD"} : "0.0009557865560054779" http_request_duration_seconds_sum{handler="none",method="OPTIONS"} : "0.0028338953852653503" http_request_duration_seconds_sum{handler="none",method="POST"} : "0.014691390097141266" http_request_duration_seconds_sum{handler="none",method="PROPFIND"} : "0.000380123034119606" http_request_duration_seconds_sum{handler="none",method="PUT"} : "0.00042458251118659973" http_request_duration_seconds_sum{handler="none",method="SEARCH"} : "0.0005713216960430145" http_request_duration_seconds_sum{handler="none",method="TRACE"} : "0.0008798614144325256" http_request_size_bytes_count{handler="/v1/chat/completions"} : "21527.0" http_request_size_bytes_count{handler="/v1/models"} : "1.0" http_request_size_bytes_count{handler="none"} : "4818.0" http_request_size_bytes_created{handler="/v1/chat/completions"} : "1.7512560967123284e+09" http_request_size_bytes_created{handler="/v1/models"} : "1.753692579424021e+09" http_request_size_bytes_created{handler="none"} : "1.7516341020104244e+09" http_request_size_bytes_sum{handler="/v1/chat/completions"} : "802493.0" http_request_size_bytes_sum{handler="/v1/models"} : "0.0" http_request_size_bytes_sum{handler="none"} : "32625.0" http_requests_created{handler="/v1/chat/completions",method="POST",status="2xx"} : "1.7512560967123055e+09" http_requests_created{handler="/v1/chat/completions",method="POST",status="4xx"} : "1.7514186825033803e+09" http_requests_created{handler="/v1/models",method="GET",status="2xx"} : "1.753692579423783e+09" http_requests_created{handler="none",method="GET",status="4xx"} : "1.7516341020101185e+09" http_requests_created{handler="none",method="HEAD",status="4xx"} : "1.7516341761198838e+09" http_requests_created{handler="none",method="OPTIONS",status="4xx"} : "1.7516341579990091e+09" http_requests_created{handler="none",method="POST",status="4xx"} : "1.7516341771294773e+09" http_requests_created{handler="none",method="PROPFIND",status="4xx"} : "1.7516341696152897e+09" http_requests_created{handler="none",method="PUT",status="4xx"} : "1.7516349058164842e+09" http_requests_created{handler="none",method="SEARCH",status="4xx"} : "1.7516341693599005e+09" http_requests_created{handler="none",method="TRACE",status="4xx"} : "1.7516341655663416e+09" http_requests_total{handler="/v1/chat/completions",method="POST",status="2xx"} : "21474.0" http_requests_total{handler="/v1/chat/completions",method="POST",status="4xx"} : "53.0" http_requests_total{handler="/v1/models",method="GET",status="2xx"} : "1.0" http_requests_total{handler="none",method="GET",status="4xx"} : "4693.0" http_requests_total{handler="none",method="HEAD",status="4xx"} : "6.0" http_requests_total{handler="none",method="OPTIONS",status="4xx"} : "12.0" http_requests_total{handler="none",method="POST",status="4xx"} : "95.0" http_requests_total{handler="none",method="PROPFIND",status="4xx"} : "3.0" http_requests_total{handler="none",method="PUT",status="4xx"} : "3.0" http_requests_total{handler="none",method="SEARCH",status="4xx"} : "3.0" http_requests_total{handler="none",method="TRACE",status="4xx"} : "3.0" http_response_size_bytes_count{handler="/v1/chat/completions"} : "21527.0" http_response_size_bytes_count{handler="/v1/models"} : "1.0" http_response_size_bytes_count{handler="none"} : "4818.0" http_response_size_bytes_created{handler="/v1/chat/completions"} : "1.7512560967123535e+09" http_response_size_bytes_created{handler="/v1/models"} : "1.7536925794240377e+09" http_response_size_bytes_created{handler="none"} : "1.751634102010456e+09" http_response_size_bytes_sum{handler="/v1/chat/completions"} : "3.539877e+06" http_response_size_bytes_sum{handler="/v1/models"} : "538.0" http_response_size_bytes_sum{handler="none"} : "105996.0" process_cpu_seconds_total : "2379.04" process_max_fds : "1.073741816e+09" process_open_fds : "48.0" process_resident_memory_bytes : "4.28404736e+08" process_start_time_seconds : "1.75125604907e+09" process_virtual_memory_bytes : "1.2146741248e+010" python_gc_collections_total{generation="0"} : "5120.0" python_gc_collections_total{generation="1"} : "464.0" python_gc_collections_total{generation="2"} : "29.0" python_gc_objects_collected_total{generation="0"} : "7970.0" python_gc_objects_collected_total{generation="1"} : "1332.0" python_gc_objects_collected_total{generation="2"} : "994.0" python_gc_objects_uncollectable_total{generation="0"} : "0.0" python_gc_objects_uncollectable_total{generation="1"} : "0.0" python_gc_objects_uncollectable_total{generation="2"} : "0.0" python_info{implementation="CPython",major="3",minor="12",patchlevel="10",version="3.12.10"} : "1.0" vllm:cache_config_info{block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_offload_gb="0",enable_prefix_caching="True",gpu_memory_utilization="0.95",is_attention_free="False",num_gpu_blocks_override="None",prefix_caching_hash_algo="builtin",sliding_window="None",swap_space="4",swap_space_bytes="4294967296"} : "1.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "489.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1085.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1940.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2553.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3480.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4342.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5216.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8314.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10648.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "15688.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19682.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20745.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20989.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21156.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21237.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21414.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21459.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21462.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21463.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:e2e_request_latency_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:e2e_request_latency_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305371e+09" vllm:e2e_request_latency_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "247822.51679587364" vllm:generation_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302482e+09" vllm:generation_tokens_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.107384e+06" vllm:gpu_cache_usage_perc{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.00012135922330092086" vllm:gpu_prefix_cache_hits_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030216e+09" vllm:gpu_prefix_cache_hits_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "613379.0" vllm:gpu_prefix_cache_queries_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302012e+09" vllm:gpu_prefix_cache_queries_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.143769e+06" vllm:iteration_tokens_total_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.243007e+06" vllm:iteration_tokens_total_bucket{engine="0",le="8.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.084752e+06" vllm:iteration_tokens_total_bucket{engine="0",le="16.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.140075e+06" vllm:iteration_tokens_total_bucket{engine="0",le="32.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.140732e+06" vllm:iteration_tokens_total_bucket{engine="0",le="64.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.140779e+06" vllm:iteration_tokens_total_bucket{engine="0",le="128.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.141224e+06" vllm:iteration_tokens_total_bucket{engine="0",le="256.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.142381e+06" vllm:iteration_tokens_total_bucket{engine="0",le="512.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.15563e+06" vllm:iteration_tokens_total_bucket{engine="0",le="1024.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.158366e+06" vllm:iteration_tokens_total_bucket{engine="0",le="2048.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.160035e+06" vllm:iteration_tokens_total_bucket{engine="0",le="4096.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.160951e+06" vllm:iteration_tokens_total_bucket{engine="0",le="8192.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.161328e+06" vllm:iteration_tokens_total_bucket{engine="0",le="16384.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.161551e+06" vllm:iteration_tokens_total_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.161608e+06" vllm:iteration_tokens_total_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.161608e+06" vllm:iteration_tokens_total_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660303833e+09" vllm:iteration_tokens_total_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.3555388e+07" vllm:num_preemptions_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030228e+09" vllm:num_preemptions_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:num_requests_running{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:num_requests_waiting{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:prompt_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302384e+09" vllm:prompt_tokens_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.8448004e+07" vllm:request_decode_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1031.0" vllm:request_decode_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1755.0" vllm:request_decode_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2641.0" vllm:request_decode_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3193.0" vllm:request_decode_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3930.0" vllm:request_decode_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4858.0" vllm:request_decode_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5855.0" vllm:request_decode_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8843.0" vllm:request_decode_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "11018.0" vllm:request_decode_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "16042.0" vllm:request_decode_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19932.0" vllm:request_decode_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20880.0" vllm:request_decode_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21085.0" vllm:request_decode_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21219.0" vllm:request_decode_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21287.0" vllm:request_decode_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21421.0" vllm:request_decode_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21459.0" vllm:request_decode_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21462.0" vllm:request_decode_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21463.0" vllm:request_decode_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_decode_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_decode_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_decode_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_decode_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660307164e+09" vllm:request_decode_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "234210.93255270552" vllm:request_generation_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_generation_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "374.0" vllm:request_generation_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "679.0" vllm:request_generation_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1399.0" vllm:request_generation_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2597.0" vllm:request_generation_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4845.0" vllm:request_generation_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8414.0" vllm:request_generation_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10524.0" vllm:request_generation_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20497.0" vllm:request_generation_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21182.0" vllm:request_generation_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21386.0" vllm:request_generation_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21460.0" vllm:request_generation_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21462.0" vllm:request_generation_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21463.0" vllm:request_generation_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_generation_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_generation_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660303566e+09" vllm:request_generation_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.092714e+06" vllm:request_inference_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "495.0" vllm:request_inference_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1110.0" vllm:request_inference_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1956.0" vllm:request_inference_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2567.0" vllm:request_inference_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3493.0" vllm:request_inference_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4363.0" vllm:request_inference_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5252.0" vllm:request_inference_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8356.0" vllm:request_inference_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10677.0" vllm:request_inference_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "15741.0" vllm:request_inference_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19718.0" vllm:request_inference_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20769.0" vllm:request_inference_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21011.0" vllm:request_inference_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21174.0" vllm:request_inference_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21246.0" vllm:request_inference_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21415.0" vllm:request_inference_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21459.0" vllm:request_inference_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21462.0" vllm:request_inference_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21463.0" vllm:request_inference_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_inference_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_inference_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_inference_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_inference_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305977e+09" vllm:request_inference_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "246084.0233336063" vllm:request_max_num_generation_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "374.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "679.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1399.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2597.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4845.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8414.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10524.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20497.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21182.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21386.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21460.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21462.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21463.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_max_num_generation_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_max_num_generation_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030409e+09" vllm:request_max_num_generation_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.092714e+06" vllm:request_params_max_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.0" vllm:request_params_max_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.0" vllm:request_params_max_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4.0" vllm:request_params_max_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "7.0" vllm:request_params_max_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "134.0" vllm:request_params_max_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "420.0" vllm:request_params_max_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "437.0" vllm:request_params_max_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "915.0" vllm:request_params_max_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_max_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_max_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660304518e+09" vllm:request_params_max_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "6.23433974e+08" vllm:request_params_n_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_n_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_n_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_n_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_n_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_n_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_n_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_params_n_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660304315e+09" vllm:request_params_n_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "17829.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18462.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19014.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19415.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20007.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20328.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20485.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20996.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21206.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21366.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21423.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21437.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21463.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prefill_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030625e+09" vllm:request_prefill_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "11873.090780900791" vllm:request_prompt_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "52.0" vllm:request_prompt_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "109.0" vllm:request_prompt_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1283.0" vllm:request_prompt_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "15752.0" vllm:request_prompt_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18203.0" vllm:request_prompt_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19866.0" vllm:request_prompt_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20966.0" vllm:request_prompt_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21314.0" vllm:request_prompt_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21420.0" vllm:request_prompt_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prompt_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_prompt_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660303123e+09" vllm:request_prompt_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.8447846e+07" vllm:request_queue_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21388.0" vllm:request_queue_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21388.0" vllm:request_queue_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21388.0" vllm:request_queue_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21388.0" vllm:request_queue_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21388.0" vllm:request_queue_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21388.0" vllm:request_queue_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21396.0" vllm:request_queue_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21417.0" vllm:request_queue_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21429.0" vllm:request_queue_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21439.0" vllm:request_queue_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21448.0" vllm:request_queue_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21466.0" vllm:request_queue_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21472.0" vllm:request_queue_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305698e+09" vllm:request_queue_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1093.2847901340574" vllm:request_success_created{engine="0",finished_reason="abort",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302787e+09" vllm:request_success_created{engine="0",finished_reason="length",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030273e+09" vllm:request_success_created{engine="0",finished_reason="stop",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302663e+09" vllm:request_success_total{engine="0",finished_reason="abort",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_success_total{engine="0",finished_reason="length",model_name="qwen2.5-72b-instruct-gptq-int4"} : "16.0" vllm:request_success_total{engine="0",finished_reason="stop",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21456.0" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.01",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.1",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.076944e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.2",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.081527e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.082037e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.4",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.082299e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.05",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4.434515e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.082539e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.15",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.081115e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.025",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.075",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.058093e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.75",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.083005e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.083381e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.084889e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="7.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="80.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.085911e+06" vllm:time_per_output_token_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305083e+09" vllm:time_per_output_token_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "234810.7781156646" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.001",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.01",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.1",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10741.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.02",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.04",model_name="qwen2.5-72b-instruct-gptq-int4"} : "22.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.005",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18368.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.06",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2294.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.08",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5674.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.25",model_name="qwen2.5-72b-instruct-gptq-int4"} : "17446.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.75",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18837.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19336.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20428.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20938.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="7.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21079.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21155.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21393.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21458.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="80.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21473.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="160.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21473.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="640.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21473.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="2560.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21473.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21473.0" vllm:time_to_first_token_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21473.0" vllm:time_to_first_token_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660304754e+09" vllm:time_to_first_token_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "13614.933543205261" ”分析归类做这些数据

“http_request_duration_highr_seconds_bucket{le="0.01"} : "4847.0" http_request_duration_highr_seconds_bucket{le="0.1"} : "5046.0" http_request_duration_highr_seconds_bucket{le="0.05"} : "4859.0" http_request_duration_highr_seconds_bucket{le="0.5"} : "5934.0" http_request_duration_highr_seconds_bucket{le="0.025"} : "4853.0" http_request_duration_highr_seconds_bucket{le="0.25"} : "5279.0" http_request_duration_highr_seconds_bucket{le="0.075"} : "4866.0" http_request_duration_highr_seconds_bucket{le="0.75"} : "6667.0" http_request_duration_highr_seconds_bucket{le="1.0"} : "7415.0" http_request_duration_highr_seconds_bucket{le="1.5"} : "8357.0" http_request_duration_highr_seconds_bucket{le="2.0"} : "9227.0" http_request_duration_highr_seconds_bucket{le="2.5"} : "10121.0" http_request_duration_highr_seconds_bucket{le="3.0"} : "10998.0" http_request_duration_highr_seconds_bucket{le="3.5"} : "11729.0" http_request_duration_highr_seconds_bucket{le="4.0"} : "12385.0" http_request_duration_highr_seconds_bucket{le="4.5"} : "12882.0" http_request_duration_highr_seconds_bucket{le="5.0"} : "13234.0" http_request_duration_highr_seconds_bucket{le="7.5"} : "14374.0" http_request_duration_highr_seconds_bucket{le="10.0"} : "15581.0" http_request_duration_highr_seconds_bucket{le="30.0"} : "25709.0" http_request_duration_highr_seconds_bucket{le="60.0"} : "26209.0" http_request_duration_highr_seconds_bucket{le="+Inf"} : "26448.0" http_request_duration_highr_seconds_count : "26448.0" http_request_duration_highr_seconds_created : "1.7512560890372858e+09" http_request_duration_highr_seconds_sum : "250360.7440119423" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.1",method="POST"} : "227.0" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.5",method="POST"} : "1115.0" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="1.0",method="POST"} : "2596.0" http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="+Inf",method="POST"} : "21629.0" http_request_duration_seconds_bucket{handler="/v1/models",le="0.1",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="/v1/models",le="0.5",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="/v1/models",le="1.0",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="/v1/models",le="+Inf",method="GET"} : "1.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.1",method="TRACE"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="0.5",method="TRACE"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="1.0",method="TRACE"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="GET"} : "4693.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="HEAD"} : "6.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="OPTIONS"} : "12.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="POST"} : "95.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="PROPFIND"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="PUT"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="SEARCH"} : "3.0" http_request_duration_seconds_bucket{handler="none",le="+Inf",method="TRACE"} : "3.0" http_request_duration_seconds_count{handler="/v1/chat/completions",method="POST"} : "21629.0" http_request_duration_seconds_count{handler="/v1/models",method="GET"} : "1.0" http_request_duration_seconds_count{handler="none",method="GET"} : "4693.0" http_request_duration_seconds_count{handler="none",method="HEAD"} : "6.0" http_request_duration_seconds_count{handler="none",method="OPTIONS"} : "12.0" http_request_duration_seconds_count{handler="none",method="POST"} : "95.0" http_request_duration_seconds_count{handler="none",method="PROPFIND"} : "3.0" http_request_duration_seconds_count{handler="none",method="PUT"} : "3.0" http_request_duration_seconds_count{handler="none",method="SEARCH"} : "3.0" http_request_duration_seconds_count{handler="none",method="TRACE"} : "3.0" http_request_duration_seconds_created{handler="/v1/chat/completions",method="POST"} : "1.7512560967123778e+09" http_request_duration_seconds_created{handler="/v1/models",method="GET"} : "1.7536925794242406e+09" http_request_duration_seconds_created{handler="none",method="GET"} : "1.7516341020108707e+09" http_request_duration_seconds_created{handler="none",method="HEAD"} : "1.751634176119915e+09" http_request_duration_seconds_created{handler="none",method="OPTIONS"} : "1.7516341579990425e+09" http_request_duration_seconds_created{handler="none",method="POST"} : "1.7516341771295128e+09" http_request_duration_seconds_created{handler="none",method="PROPFIND"} : "1.7516341696153226e+09" http_request_duration_seconds_created{handler="none",method="PUT"} : "1.7516349058165367e+09" http_request_duration_seconds_created{handler="none",method="SEARCH"} : "1.7516341693599503e+09" http_request_duration_seconds_created{handler="none",method="TRACE"} : "1.751634165566383e+09" http_request_duration_seconds_sum{handler="/v1/chat/completions",method="POST"} : "250359.90331811644" http_request_duration_seconds_sum{handler="/v1/models",method="GET"} : "0.0027880221605300903" http_request_duration_seconds_sum{handler="none",method="GET"} : "0.8171688430011272" http_request_duration_seconds_sum{handler="none",method="HEAD"} : "0.0009557865560054779" http_request_duration_seconds_sum{handler="none",method="OPTIONS"} : "0.0028338953852653503" http_request_duration_seconds_sum{handler="none",method="POST"} : "0.014691390097141266" http_request_duration_seconds_sum{handler="none",method="PROPFIND"} : "0.000380123034119606" http_request_duration_seconds_sum{handler="none",method="PUT"} : "0.00042458251118659973" http_request_duration_seconds_sum{handler="none",method="SEARCH"} : "0.0005713216960430145" http_request_duration_seconds_sum{handler="none",method="TRACE"} : "0.0008798614144325256" http_request_size_bytes_count{handler="/v1/chat/completions"} : "21629.0" http_request_size_bytes_count{handler="/v1/models"} : "1.0" http_request_size_bytes_count{handler="none"} : "4818.0" http_request_size_bytes_created{handler="/v1/chat/completions"} : "1.7512560967123284e+09" http_request_size_bytes_created{handler="/v1/models"} : "1.753692579424021e+09" http_request_size_bytes_created{handler="none"} : "1.7516341020104244e+09" http_request_size_bytes_sum{handler="/v1/chat/completions"} : "806031.0" http_request_size_bytes_sum{handler="/v1/models"} : "0.0" http_request_size_bytes_sum{handler="none"} : "32625.0" http_requests_created{handler="/v1/chat/completions",method="POST",status="2xx"} : "1.7512560967123055e+09" http_requests_created{handler="/v1/chat/completions",method="POST",status="4xx"} : "1.7514186825033803e+09" http_requests_created{handler="/v1/models",method="GET",status="2xx"} : "1.753692579423783e+09" http_requests_created{handler="none",method="GET",status="4xx"} : "1.7516341020101185e+09" http_requests_created{handler="none",method="HEAD",status="4xx"} : "1.7516341761198838e+09" http_requests_created{handler="none",method="OPTIONS",status="4xx"} : "1.7516341579990091e+09" http_requests_created{handler="none",method="POST",status="4xx"} : "1.7516341771294773e+09" http_requests_created{handler="none",method="PROPFIND",status="4xx"} : "1.7516341696152897e+09" http_requests_created{handler="none",method="PUT",status="4xx"} : "1.7516349058164842e+09" http_requests_created{handler="none",method="SEARCH",status="4xx"} : "1.7516341693599005e+09" http_requests_created{handler="none",method="TRACE",status="4xx"} : "1.7516341655663416e+09" http_requests_total{handler="/v1/chat/completions",method="POST",status="2xx"} : "21576.0" http_requests_total{handler="/v1/chat/completions",method="POST",status="4xx"} : "53.0" http_requests_total{handler="/v1/models",method="GET",status="2xx"} : "1.0" http_requests_total{handler="none",method="GET",status="4xx"} : "4693.0" http_requests_total{handler="none",method="HEAD",status="4xx"} : "6.0" http_requests_total{handler="none",method="OPTIONS",status="4xx"} : "12.0" http_requests_total{handler="none",method="POST",status="4xx"} : "95.0" http_requests_total{handler="none",method="PROPFIND",status="4xx"} : "3.0" http_requests_total{handler="none",method="PUT",status="4xx"} : "3.0" http_requests_total{handler="none",method="SEARCH",status="4xx"} : "3.0" http_requests_total{handler="none",method="TRACE",status="4xx"} : "3.0" http_response_size_bytes_count{handler="/v1/chat/completions"} : "21629.0" http_response_size_bytes_count{handler="/v1/models"} : "1.0" http_response_size_bytes_count{handler="none"} : "4818.0" http_response_size_bytes_created{handler="/v1/chat/completions"} : "1.7512560967123535e+09" http_response_size_bytes_created{handler="/v1/models"} : "1.7536925794240377e+09" http_response_size_bytes_created{handler="none"} : "1.751634102010456e+09" http_response_size_bytes_sum{handler="/v1/chat/completions"} : "3.541716e+06" http_response_size_bytes_sum{handler="/v1/models"} : "538.0" http_response_size_bytes_sum{handler="none"} : "105996.0" process_cpu_seconds_total : "2391.38" process_max_fds : "1.073741816e+09" process_open_fds : "48.0" process_resident_memory_bytes : "4.28490752e+08" process_start_time_seconds : "1.75125604907e+09" process_virtual_memory_bytes : "1.2146741248e+010" python_gc_collections_total{generation="0"} : "5127.0" python_gc_collections_total{generation="1"} : "465.0" python_gc_collections_total{generation="2"} : "29.0" python_gc_objects_collected_total{generation="0"} : "8032.0" python_gc_objects_collected_total{generation="1"} : "1350.0" python_gc_objects_collected_total{generation="2"} : "994.0" python_gc_objects_uncollectable_total{generation="0"} : "0.0" python_gc_objects_uncollectable_total{generation="1"} : "0.0" python_gc_objects_uncollectable_total{generation="2"} : "0.0" python_info{implementation="CPython",major="3",minor="12",patchlevel="10",version="3.12.10"} : "1.0" vllm:cache_config_info{block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_offload_gb="0",enable_prefix_caching="True",gpu_memory_utilization="0.95",is_attention_free="False",num_gpu_blocks_override="None",prefix_caching_hash_algo="builtin",sliding_window="None",swap_space="4",swap_space_bytes="4294967296"} : "1.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "491.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1088.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1947.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2563.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3503.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4373.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5264.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8367.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10710.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "15758.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19765.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20837.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21082.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21252.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21336.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21514.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21560.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21563.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21565.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:e2e_request_latency_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:e2e_request_latency_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:e2e_request_latency_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305371e+09" vllm:e2e_request_latency_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "249588.05839586258" vllm:generation_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302482e+09" vllm:generation_tokens_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.146206e+06" vllm:gpu_cache_usage_perc{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.008373786407766981" vllm:gpu_prefix_cache_hits_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030216e+09" vllm:gpu_prefix_cache_hits_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "616813.0" vllm:gpu_prefix_cache_queries_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302012e+09" vllm:gpu_prefix_cache_queries_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.150983e+06" vllm:iteration_tokens_total_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.262281e+06" vllm:iteration_tokens_total_bucket{engine="0",le="8.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.111395e+06" vllm:iteration_tokens_total_bucket{engine="0",le="16.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.166718e+06" vllm:iteration_tokens_total_bucket{engine="0",le="32.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.167375e+06" vllm:iteration_tokens_total_bucket{engine="0",le="64.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.167422e+06" vllm:iteration_tokens_total_bucket{engine="0",le="128.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.167871e+06" vllm:iteration_tokens_total_bucket{engine="0",le="256.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.169028e+06" vllm:iteration_tokens_total_bucket{engine="0",le="512.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.182286e+06" vllm:iteration_tokens_total_bucket{engine="0",le="1024.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.185063e+06" vllm:iteration_tokens_total_bucket{engine="0",le="2048.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.186771e+06" vllm:iteration_tokens_total_bucket{engine="0",le="4096.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.187694e+06" vllm:iteration_tokens_total_bucket{engine="0",le="8192.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.188073e+06" vllm:iteration_tokens_total_bucket{engine="0",le="16384.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.188296e+06" vllm:iteration_tokens_total_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.188353e+06" vllm:iteration_tokens_total_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.188353e+06" vllm:iteration_tokens_total_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660303833e+09" vllm:iteration_tokens_total_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.3710533e+07" vllm:num_preemptions_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030228e+09" vllm:num_preemptions_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:num_requests_running{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.0" vllm:num_requests_waiting{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:prompt_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302384e+09" vllm:prompt_tokens_total{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.8564327e+07" vllm:request_decode_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1036.0" vllm:request_decode_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1763.0" vllm:request_decode_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2659.0" vllm:request_decode_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3215.0" vllm:request_decode_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3962.0" vllm:request_decode_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4894.0" vllm:request_decode_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5906.0" vllm:request_decode_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8900.0" vllm:request_decode_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "11080.0" vllm:request_decode_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "16114.0" vllm:request_decode_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20016.0" vllm:request_decode_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20972.0" vllm:request_decode_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21180.0" vllm:request_decode_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21317.0" vllm:request_decode_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21386.0" vllm:request_decode_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21521.0" vllm:request_decode_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21560.0" vllm:request_decode_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21563.0" vllm:request_decode_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21565.0" vllm:request_decode_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_decode_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_decode_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_decode_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_decode_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660307164e+09" vllm:request_decode_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "235893.4257239038" vllm:request_generation_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_generation_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "375.0" vllm:request_generation_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "680.0" vllm:request_generation_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1406.0" vllm:request_generation_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2613.0" vllm:request_generation_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4881.0" vllm:request_generation_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8468.0" vllm:request_generation_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10587.0" vllm:request_generation_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20580.0" vllm:request_generation_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21278.0" vllm:request_generation_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21486.0" vllm:request_generation_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21561.0" vllm:request_generation_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21563.0" vllm:request_generation_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21565.0" vllm:request_generation_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_generation_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_generation_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660303566e+09" vllm:request_generation_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.131528e+06" vllm:request_inference_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "497.0" vllm:request_inference_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1113.0" vllm:request_inference_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1963.0" vllm:request_inference_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2577.0" vllm:request_inference_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "3516.0" vllm:request_inference_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4394.0" vllm:request_inference_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5300.0" vllm:request_inference_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8410.0" vllm:request_inference_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10739.0" vllm:request_inference_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "15812.0" vllm:request_inference_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19801.0" vllm:request_inference_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20861.0" vllm:request_inference_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21104.0" vllm:request_inference_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21270.0" vllm:request_inference_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21345.0" vllm:request_inference_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21515.0" vllm:request_inference_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21560.0" vllm:request_inference_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21563.0" vllm:request_inference_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21565.0" vllm:request_inference_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_inference_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_inference_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_inference_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_inference_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305977e+09" vllm:request_inference_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "247843.74306567665" vllm:request_max_num_generation_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "375.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "680.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1406.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2613.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4881.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "8468.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10587.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20580.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21278.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21486.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21561.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21563.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21565.0" vllm:request_max_num_generation_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_max_num_generation_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_max_num_generation_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030409e+09" vllm:request_max_num_generation_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.131528e+06" vllm:request_params_max_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_params_max_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.0" vllm:request_params_max_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2.0" vllm:request_params_max_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4.0" vllm:request_params_max_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "7.0" vllm:request_params_max_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "136.0" vllm:request_params_max_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "422.0" vllm:request_params_max_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "439.0" vllm:request_params_max_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "924.0" vllm:request_params_max_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_max_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_max_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660304518e+09" vllm:request_params_max_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "6.26237766e+08" vllm:request_params_n_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_n_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_n_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_n_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_n_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_n_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_n_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_params_n_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660304315e+09" vllm:request_params_n_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "17877.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18517.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19080.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19493.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20099.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20423.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20583.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21096.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21308.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21468.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21525.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21539.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21565.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prefill_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030625e+09" vllm:request_prefill_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "11950.31734177284" vllm:request_prompt_tokens_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_prompt_tokens_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "52.0" vllm:request_prompt_tokens_bucket{engine="0",le="100.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "110.0" vllm:request_prompt_tokens_bucket{engine="0",le="200.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1287.0" vllm:request_prompt_tokens_bucket{engine="0",le="500.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "15764.0" vllm:request_prompt_tokens_bucket{engine="0",le="1000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18257.0" vllm:request_prompt_tokens_bucket{engine="0",le="2000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19958.0" vllm:request_prompt_tokens_bucket{engine="0",le="5000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21066.0" vllm:request_prompt_tokens_bucket{engine="0",le="10000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21416.0" vllm:request_prompt_tokens_bucket{engine="0",le="20000.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21522.0" vllm:request_prompt_tokens_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prompt_tokens_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_prompt_tokens_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660303123e+09" vllm:request_prompt_tokens_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.8563094e+07" vllm:request_queue_time_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21489.0" vllm:request_queue_time_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21489.0" vllm:request_queue_time_seconds_bucket{engine="0",le="0.8",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21489.0" vllm:request_queue_time_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21489.0" vllm:request_queue_time_seconds_bucket{engine="0",le="1.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21489.0" vllm:request_queue_time_seconds_bucket{engine="0",le="2.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21489.0" vllm:request_queue_time_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21497.0" vllm:request_queue_time_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21519.0" vllm:request_queue_time_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21531.0" vllm:request_queue_time_seconds_bucket{engine="0",le="15.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21541.0" vllm:request_queue_time_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21550.0" vllm:request_queue_time_seconds_bucket{engine="0",le="30.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21568.0" vllm:request_queue_time_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="50.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="60.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="120.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="240.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="480.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="960.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="1920.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="7680.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21574.0" vllm:request_queue_time_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305698e+09" vllm:request_queue_time_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1095.8753950130194" vllm:request_success_created{engine="0",finished_reason="abort",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302787e+09" vllm:request_success_created{engine="0",finished_reason="length",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.751256066030273e+09" vllm:request_success_created{engine="0",finished_reason="stop",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660302663e+09" vllm:request_success_total{engine="0",finished_reason="abort",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:request_success_total{engine="0",finished_reason="length",model_name="qwen2.5-72b-instruct-gptq-int4"} : "16.0" vllm:request_success_total{engine="0",finished_reason="stop",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21558.0" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.01",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.1",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.115606e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.2",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.120195e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.3",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.120706e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.4",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12097e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.05",model_name="qwen2.5-72b-instruct-gptq-int4"} : "4.473164e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.121215e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.15",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.119781e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.025",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.075",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.096754e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="0.75",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.121683e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.122067e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.123599e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="7.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="80.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5.12463e+06" vllm:time_per_output_token_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660305083e+09" vllm:time_per_output_token_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "236493.54754120763" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.001",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.01",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.1",model_name="qwen2.5-72b-instruct-gptq-int4"} : "10761.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.02",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.04",model_name="qwen2.5-72b-instruct-gptq-int4"} : "22.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.005",model_name="qwen2.5-72b-instruct-gptq-int4"} : "0.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18421.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.06",model_name="qwen2.5-72b-instruct-gptq-int4"} : "2307.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.08",model_name="qwen2.5-72b-instruct-gptq-int4"} : "5692.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.25",model_name="qwen2.5-72b-instruct-gptq-int4"} : "17492.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="0.75",model_name="qwen2.5-72b-instruct-gptq-int4"} : "18898.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="1.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "19414.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="2.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "20526.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="5.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21039.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="7.5",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21181.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="10.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21258.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="20.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21496.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="40.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21561.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="80.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21576.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="160.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21576.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="640.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21576.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="2560.0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21576.0" vllm:time_to_first_token_seconds_bucket{engine="0",le="+Inf",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21576.0" vllm:time_to_first_token_seconds_count{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "21576.0" vllm:time_to_first_token_seconds_created{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "1.7512560660304754e+09" vllm:time_to_first_token_seconds_sum{engine="0",model_name="qwen2.5-72b-instruct-gptq-int4"} : "13699.205335855484"”分析这些数据代表什么含义

INFO 07-25 07:11:43 [model_runner_v1.py:1745] Starting to load model /models/z50051264/summary/Qwen2.5-7B-nf4/... ERROR 07-25 07:11:44 [core.py:586] EngineCore failed to start. ERROR 07-25 07:11:44 [core.py:586] Traceback (most recent call last): ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 577, in run_engine_core ERROR 07-25 07:11:44 [core.py:586] engine_core = EngineCoreProc(*args, **kwargs) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 404, in __init__ ERROR 07-25 07:11:44 [core.py:586] super().__init__(vllm_config, executor_class, log_stats, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 75, in __init__ ERROR 07-25 07:11:44 [core.py:586] self.model_executor = executor_class(vllm_config) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/executor/executor_base.py", line 53, in __init__ ERROR 07-25 07:11:44 [core.py:586] self._init_executor() ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 48, in _init_executor ERROR 07-25 07:11:44 [core.py:586] self.collective_rpc("load_model") ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 57, in collective_rpc ERROR 07-25 07:11:44 [core.py:586] answer = run_method(self.driver_worker, method, args, kwargs) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/utils/__init__.py", line 2736, in run_method ERROR 07-25 07:11:44 [core.py:586] return func(*args, **kwargs) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 240, in load_model ERROR 07-25 07:11:44 [core.py:586] self.model_runner.load_model() ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1748, in load_model ERROR 07-25 07:11:44 [core.py:586] self.model = get_model(vllm_config=self.vllm_config) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/model_loader/__init__.py", line 59, in get_model ERROR 07-25 07:11:44 [core.py:586] return loader.load_model(vllm_config=vllm_config, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/model_loader/base_loader.py", line 38, in load_model ERROR 07-25 07:11:44 [core.py:586] model = initialize_model(vllm_config=vllm_config, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/model_loader/utils.py", line 64, in initialize_model ERROR 07-25 07:11:44 [core.py:586] return model_class(vllm_config=vllm_config, prefix=prefix) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 448, in __init__ ERROR 07-25 07:11:44 [core.py:586] self.model = Qwen2Model(vllm_config=vllm_config, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/compilation/decorators.py", line 152, in __init__ ERROR 07-25 07:11:44 [core.py:586] old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 317, in __init__ ERROR 07-25 07:11:44 [core.py:586] self.start_layer, self.end_layer, self.layers = make_layers( ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/utils.py", line 639, in make_layers ERROR 07-25 07:11:44 [core.py:586] [PPMissingLayer() for _ in range(start_layer)] + [ ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/utils.py", line 640, in ERROR 07-25 07:11:44 [core.py:586] maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}")) ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 319, in <lambda> ERROR 07-25 07:11:44 [core.py:586] lambda prefix: decoder_layer_type(config=config, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 216, in __init__ ERROR 07-25 07:11:44 [core.py:586] self.self_attn = Qwen2Attention( ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 137, in __init__ ERROR 07-25 07:11:44 [core.py:586] self.qkv_proj = QKVParallelLinear( ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/layers/linear.py", line 874, in __init__ ERROR 07-25 07:11:44 [core.py:586] super().__init__(input_size=input_size, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/layers/linear.py", line 420, in __init__ ERROR 07-25 07:11:44 [core.py:586] super().__init__(input_size, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/layers/linear.py", line 266, in __init__ ERROR 07-25 07:11:44 [core.py:586] self.quant_method = quant_config.get_quant_method(self, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/quantization/quant_config.py", line 92, in get_quant_method ERROR 07-25 07:11:44 [core.py:586] if self.is_layer_skipped_ascend(prefix, ERROR 07-25 07:11:44 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/quantization/quant_config.py", line 126, in is_layer_skipped_ascend ERROR 07-25 07:11:44 [core.py:586] is_shard_skipped = self.quant_description[shard_prefix + ERROR 07-25 07:11:44 [core.py:586] KeyError: 'model.layers.0.self_attn.q_proj.weight' Process EngineCore_0: Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 590, in run_engine_core raise e File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 577, in run_engine_core engine_core = EngineCoreProc(*args, **kwargs) File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 404, in __init__ super().__init__(vllm_config, executor_class, log_stats, File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 75, in __init__ self.model_executor = executor_class(vllm_config) File "/vllm-workspace/vllm/vllm/executor/executor_base.py", line 53, in __init__ self._init_executor() File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 48, in _init_executor self.collective_rpc("load_model") File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 57, in collective_rpc answer = run_method(self.driver_worker, method, args, kwargs) File "/vllm-workspace/vllm/vllm/utils/__init__.py", line 2736, in run_method return func(*args, **kwargs) File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 240, in load_model self.model_runner.load_model() File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1748, in load_model self.model = get_model(vllm_config=self.vllm_config) File "/vllm-workspace/vllm/vllm/model_executor/model_loader/__init__.py", line 59, in get_model return loader.load_model(vllm_config=vllm_config, File "/vllm-workspace/vllm/vllm/model_executor/model_loader/base_loader.py", line 38, in load_model model = initialize_model(vllm_config=vllm_config, File "/vllm-workspace/vllm/vllm/model_executor/model_loader/utils.py", line 64, in initialize_model return model_class(vllm_config=vllm_config, prefix=prefix) File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 448, in __init__ self.model = Qwen2Model(vllm_config=vllm_config, File "/vllm-workspace/vllm/vllm/compilation/decorators.py", line 152, in __init__ old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs) File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 317, in __init__ self.start_layer, self.end_layer, self.layers = make_layers( File "/vllm-workspace/vllm/vllm/model_executor/models/utils.py", line 639, in make_layers [PPMissingLayer() for _ in range(start_layer)] + [ File "/vllm-workspace/vllm/vllm/model_executor/models/utils.py", line 640, in maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}")) File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 319, in <lambda> lambda prefix: decoder_layer_type(config=config, File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 216, in __init__ self.self_attn = Qwen2Attention( File "/vllm-workspace/vllm/vllm/model_executor/models/qwen2.py", line 137, in __init__ self.qkv_proj = QKVParallelLinear( File "/vllm-workspace/vllm/vllm/model_executor/layers/linear.py", line 874, in __init__ super().__init__(input_size=input_size, File "/vllm-workspace/vllm/vllm/model_executor/layers/linear.py", line 420, in __init__ super().__init__(input_size, File "/vllm-workspace/vllm/vllm/model_executor/layers/linear.py", line 266, in __init__ self.quant_method = quant_config.get_quant_method(self, File "/vllm-workspace/vllm-ascend/vllm_ascend/quantization/quant_config.py", line 92, in get_quant_method if self.is_layer_skipped_ascend(prefix, File "/vllm-workspace/vllm-ascend/vllm_ascend/quantization/quant_config.py", line 126, in is_layer_skipped_ascend is_shard_skipped = self.quant_description[shard_prefix + KeyError: 'model.layers.0.self_attn.q_proj.weight' Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/usr/local/python3.10.17/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1495, in <module> uvloop.run(run_server(args)) File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 82, in run return loop.run_until_complete(wrapper()) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 61, in wrapper return await main File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1431, in run_server await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1451, in run_server_worker async with build_async_engine_client(args, client_config) as engine_client: File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client async with build_async_engine_client_from_engine_args( File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args async_llm = AsyncLLM.from_vllm_config( File "/vllm-workspace/vllm/vllm/v1/engine/async_llm.py", line 162, in from_vllm_config return cls( File "/vllm-workspace/vllm/vllm/v1/engine/async_llm.py", line 124, in __init__ self.engine_core = EngineCoreClient.make_async_mp_client( File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 96, in make_async_mp_client return AsyncMPClient(*client_args) File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 666, in __init__ super().__init__( File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 403, in __init__ with launch_core_engines(vllm_config, executor_class, File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 142, in __exit__ next(self.gen) File "/vllm-workspace/vllm/vllm/v1/engine/utils.py", line 434, in launch_core_engines wait_for_engine_startup( File "/vllm-workspace/vllm/vllm/v1/engine/utils.py", line 484, in wait_for_engine_startup raise RuntimeError("Engine core initialization failed. " RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} [ERROR] 2025-07-25-07:11:52 (PID:1889, Device:-1, RankID:-1) ERR99999 UNKNOWN applicaiton exception [root@e9a74ce1729c mas]# python -m vllm.entrypoints.openai.api_server --model /models/z50051264/summary/Qwen2.5-7B-nf4/ --max-num-seqs=256 --max-model-len=4096 --max-num-batched-tokens=4096 --tensor-parallel-size=1 --block-size=128 --host=0.0.0.0 --port=8080 --gpu-memory-utilization=0.9 --trust-remote-code --served-model-name=zzz --quantization bitsandbytes --load-format bitsandbytes 我使用bitsandbytes进行nf4量化,报错如上,请分析问题原因(我启动没有量化的版本是正常的)

最新推荐

recommend-type

Python程序TXLWizard生成TXL文件及转换工具介绍

### 知识点详细说明: #### 1. 图形旋转与TXL向导 图形旋转是图形学领域的一个基本操作,用于改变图形的方向。在本上下文中,TXL向导(TXLWizard)是由Esteban Marin编写的Python程序,它实现了特定的图形旋转功能,主要用于电子束光刻掩模的生成。光刻掩模是半导体制造过程中非常关键的一个环节,它确定了在硅片上沉积材料的精确位置。TXL向导通过生成特定格式的TXL文件来辅助这一过程。 #### 2. TXL文件格式与用途 TXL文件格式是一种基于文本的文件格式,它设计得易于使用,并且可以通过各种脚本语言如Python和Matlab生成。这种格式通常用于电子束光刻中,因为它的文本形式使得它可以通过编程快速创建复杂的掩模设计。TXL文件格式支持引用对象和复制对象数组(如SREF和AREF),这些特性可以用于优化电子束光刻设备的性能。 #### 3. TXLWizard的特性与优势 - **结构化的Python脚本:** TXLWizard 使用结构良好的脚本来创建遮罩,这有助于开发者创建清晰、易于维护的代码。 - **灵活的Python脚本:** 作为Python程序,TXLWizard 可以利用Python语言的灵活性和强大的库集合来编写复杂的掩模生成逻辑。 - **可读性和可重用性:** 生成的掩码代码易于阅读,开发者可以轻松地重用和修改以适应不同的需求。 - **自动标签生成:** TXLWizard 还包括自动为图形对象生成标签的功能,这在管理复杂图形时非常有用。 #### 4. TXL转换器的功能 - **查看.TXL文件:** TXL转换器(TXLConverter)允许用户将TXL文件转换成HTML或SVG格式,这样用户就可以使用任何现代浏览器或矢量图形应用程序来查看文件。 - **缩放和平移:** 转换后的文件支持缩放和平移功能,这使得用户在图形界面中更容易查看细节和整体结构。 - **快速转换:** TXL转换器还提供快速的文件转换功能,以实现有效的蒙版开发工作流程。 #### 5. 应用场景与技术参考 TXLWizard的应用场景主要集中在电子束光刻技术中,特别是用于设计和制作半导体器件时所需的掩模。TXLWizard作为一个向导,不仅提供了生成TXL文件的基础框架,还提供了一种方式来优化掩模设计,提高光刻过程的效率和精度。对于需要进行光刻掩模设计的工程师和研究人员来说,TXLWizard提供了一种有效的方法来实现他们的设计目标。 #### 6. 系统开源特性 标签“系统开源”表明TXLWizard遵循开放源代码的原则,这意味着源代码对所有人开放,允许用户自由地查看、修改和分发软件。开源项目通常拥有活跃的社区,社区成员可以合作改进软件,添加新功能,或帮助解决遇到的问题。这种开放性促进了技术创新,并允许用户根据自己的需求定制软件。 #### 7. 压缩包子文件的文件名称列表 文件名称列表中的“txlwizard-master”可能指的是TXLWizard项目的主版本库或主分支。这个名称表明了这是项目源代码的中心点,其他开发者会从这个主分支拉取代码进行合作开发或部署。以“-master”结尾通常是版本控制系统中表示主要开发线路的常见约定,例如Git中的master(现在更常被称为main)分支。 通过这些知识点的详细解释,我们可以看到TXLWizard不仅是一个用于生成TXL文件的工具,它还整合了一系列的功能,使得电子束光刻掩模的设计工作更为高效和直观。同时,作为一个开源项目,它能够借助社区的力量不断进步,为用户带来更多的便利和创新。
recommend-type

【创新图生成:扣子平台的技术前沿与创新思维】:引领图像生成技术的新潮流

# 1. 扣子平台的图像生成技术概述 在当今数字化时代,图像生成技术正成为推动技术创新和社会变革的关键力量。扣子平台正是这一技术革新的代表。本章将对扣子平台的图像生成技术进行一个全面概述,探究它如何将理论技术转化为实际应用,并带来显著的社会影响。 扣子平台运用了先进的图像生成技术,将现实世界和数字空间无缝融合,打造了沉浸式的视觉体验。其技术的核心是通过高度复杂的算法模型,快速生成高质
recommend-type

海康威视机器视觉工程师考核

<think>好的,我现在需要帮助用户了解海康威视机器视觉工程师的考核内容和要求。根据用户提供的引用资料,特别是引用[1]和[2],里面提到了考核素材包分为初级和中级,涵盖理论、算法、应用案例等。首先,我要整理这些信息,确保结构清晰,符合用户要求的格式。 接下来,我需要确认素材包的具体内容,比如初级和中级的不同点。引用[2]提到初级包含基础理论、算法实现和实际案例,中级则增加复杂算法和项目分析。这部分需要分点说明,方便用户理解层次。 另外,用户可能想知道如何准备考核,比如下载素材、学习顺序、模拟考核等,引用[2]中有使用说明和注意事项,这部分也要涵盖进去。同时要注意提醒用户考核窗口已关闭,
recommend-type

Linux环境下Docker Hub公共容器映像检测工具集

在给出的知识点中,我们需要详细解释有关Docker Hub、公共容器映像、容器编排器以及如何与这些工具交互的详细信息。同时,我们会涵盖Linux系统下的相关操作和工具使用,以及如何在ECS和Kubernetes等容器编排工具中运用这些检测工具。 ### Docker Hub 和公共容器映像 Docker Hub是Docker公司提供的一项服务,它允许用户存储、管理以及分享Docker镜像。Docker镜像可以视为应用程序或服务的“快照”,包含了运行特定软件所需的所有必要文件和配置。公共容器映像指的是那些被标记为公开可见的Docker镜像,任何用户都可以拉取并使用这些镜像。 ### 静态和动态标识工具 静态和动态标识工具在Docker Hub上用于识别和分析公共容器映像。静态标识通常指的是在不运行镜像的情况下分析镜像的元数据和内容,例如检查Dockerfile中的指令、环境变量、端口映射等。动态标识则需要在容器运行时对容器的行为和性能进行监控和分析,如资源使用率、网络通信等。 ### 容器编排器与Docker映像 容器编排器是用于自动化容器部署、管理和扩展的工具。在Docker环境中,容器编排器能够自动化地启动、停止以及管理容器的生命周期。常见的容器编排器包括ECS和Kubernetes。 - **ECS (Elastic Container Service)**:是由亚马逊提供的容器编排服务,支持Docker容器,并提供了一种简单的方式来运行、停止以及管理容器化应用程序。 - **Kubernetes**:是一个开源平台,用于自动化容器化应用程序的部署、扩展和操作。它已经成为容器编排领域的事实标准。 ### 如何使用静态和动态标识工具 要使用这些静态和动态标识工具,首先需要获取并安装它们。从给定信息中了解到,可以通过克隆仓库或下载压缩包并解压到本地系统中。之后,根据需要针对不同的容器编排环境(如Dockerfile、ECS、Kubernetes)编写配置,以集成和使用这些检测工具。 ### Dockerfile中的工具使用 在Dockerfile中使用工具意味着将检测工具的指令嵌入到构建过程中。这可能包括安装检测工具的命令、运行容器扫描的步骤,以及将扫描结果集成到镜像构建流程中,确保只有通过安全和合规检查的容器镜像才能被构建和部署。 ### ECS与Kubernetes中的工具集成 在ECS或Kubernetes环境中,工具的集成可能涉及到创建特定的配置文件、定义服务和部署策略,以及编写脚本或控制器来自动执行检测任务。这样可以在容器编排的过程中实现实时监控,确保容器编排器只使用符合预期的、安全的容器镜像。 ### Linux系统下的操作 在Linux系统下操作这些工具,用户可能需要具备一定的系统管理和配置能力。这包括使用Linux命令行工具、管理文件系统权限、配置网络以及安装和配置软件包等。 ### 总结 综上所述,Docker Hub上的静态和动态标识工具提供了一种方法来检测和分析公共容器映像,确保这些镜像的安全性和可靠性。这些工具在Linux开发环境中尤为重要,因为它们帮助开发人员和运维人员确保他们的容器映像满足安全要求。通过在Dockerfile、ECS和Kubernetes中正确使用这些工具,可以提高应用程序的安全性,减少由于使用不安全的容器镜像带来的风险。此外,掌握Linux系统下的操作技能,可以更好地管理和维护这些工具,确保它们能够有效地发挥作用。
recommend-type

【扣子平台图像艺术探究:理论与实践的完美结合】:深入学习图像生成的艺术

# 1. 图像艺术的理论基础 艺术领域的每一个流派和技巧都有其理论基础。在图像艺术中,理论基础不仅是对艺术表现形式的认知,也是掌握艺术创作内在逻辑的关键。深入理解图像艺术的理论基础,能够帮助艺术家们在创作过程中更加明确地表达自己的艺术意图,以及更好地与观众沟通。 图像艺术的理论
recommend-type

增广路定理的证明

<think>嗯,用户想查找增广路定理的证明,关于图论和网络流算法的。我得先理解增广路定理是什么,然后找到相关的证明方法。根据之前的引用,尤其是引用5提到最大流最小割定理,里面有三个等价条件,其中第二个是残余网络中没有增广路径时,流就是最大流。这可能和增广路定理相关,也就是当残余网络中没有增广路时,当前流就是最大流,这可能就是增广路定理的内容。 首先,我需要明确增广路定理的陈述。根据引用5,增广路定理可能指的是:一个流是最大流当且仅当残余网络中不存在增广路径。这个定理的证明需要用到最大流最小割定理,也就是第三个条件,即最大流的流量等于最小割的容量。 证明的步骤可能需要分为两个方向:必要性(
recommend-type

Pulse:基于SwiftUI的Apple平台高效日志记录与网络监控

从给定文件信息中,我们可以提取出以下IT知识点进行详细阐述: **Pulse概览:** Pulse是一个专门针对Apple平台(如iOS、iPadOS、macOS等)的功能强大的日志记录系统。其设计目的是为了简化开发者在这些平台上调试网络请求和应用日志的过程。Pulse的核心特色是它使用SwiftUI来构建,这有助于开发者利用现代Swift语言的声明式UI优势来快速开发和维护。 **SwiftUI框架:** SwiftUI是一种声明式框架,由苹果公司推出,用于构建用户界面。与传统的UIKit相比,SwiftUI使用更加简洁的代码来描述界面和界面元素,它允许开发者以声明的方式定义视图和界面布局。SwiftUI支持跨平台,这意味着同一套代码可以在不同的Apple设备上运行,大大提高了开发效率和复用性。Pulse选择使用SwiftUI构建,显示了其对现代化、高效率开发的支持。 **Network Inspector功能:** Pulse具备Network Inspector功能,这个功能使得开发者能够在开发iOS应用时,直接从应用内记录和检查网络请求和日志。这种内嵌式的网络诊断能力非常有助于快速定位网络请求中的问题,如不正确的URL、不返回预期响应等。与传统的需要外部工具来抓包和分析的方式相比,这样的内嵌式工具大大减少了调试的复杂性。 **日志记录和隐私保护:** Pulse强调日志是本地记录的,并保证不会离开设备。这种做法对隐私保护至关重要,尤其是考虑到当前数据保护法规如GDPR等的严格要求。因此,Pulse的设计在帮助开发者进行问题诊断的同时,也确保了用户数据的安全性。 **集成和框架支持:** Pulse不仅仅是一个工具,它更是一个框架。它能够记录来自URLSession的事件,这意味着它可以与任何使用URLSession进行网络通信的应用或框架配合使用,包括但不限于Apple官方的网络库。此外,Pulse与使用它的框架(例如Alamofire)也能够良好配合,Alamofire是一个流行的网络请求库,广泛应用于Swift开发中。Pulse提供了一个PulseUI视图组件,开发者可以将其集成到自己的应用中,从而展示网络请求和其他事件。 **跨平台体验:** 开发者不仅可以在iOS应用中使用Pulse Console记录日志,还可以在macOS上通过Pulse应用程序查看和共享这些日志。这种跨平台的能力意味着开发者可以在不同的设备上进行日志分析,增强了开发和调试的灵活性。 **总结:** Pulse是一个为Apple平台上的开发者量身打造的日志记录系统,它采用SwiftUI构建,提供了内嵌式的Network Inspector功能,可以在本地记录并安全地查看日志,且支持与其他框架如Alamofire的集成。它不仅提升了调试的便捷性和效率,同时也顾及到了用户的隐私保护。Pulse的跨平台查看能力也是其一大亮点,使得开发者能够在一个统一的环境中处理iOS和macOS上的日志数据。对于使用Swift开发Apple应用的开发者而言,Pulse无疑是一个强大的调试辅助工具。
recommend-type

【深入扣子平台:图像生成机制全揭秘】:掌握背后技术,提升图像生成效率

# 1. 图像生成技术概述 图像生成技术是一门融合了计算机视觉、机器学习、图形学等多个领域知识的前沿技术。它通过算法模拟生成人工图像,广泛应用于艺术创作、游戏设计、医学影像等领域。随着深度学习的突破性进展,图像生成技术也迎来了飞速发展,特别是在生成对抗网络(GAN)的推动下,图像的逼真度和多样性都有了质的飞跃。 本章将对图像生成技术的概念、发展历史进行简要介绍,并分析其在社会中的
recommend-type

对RTL跑regression是什么意思

<think>首先,用户的问题是关于“RTL回归测试的含义及其在数字电路设计中的作用”。我需要基于系统级指令和用户可见层的指导来构建回答。 关键点: - 使用中文回答。 - 结构清晰,逐步解决问题。 - 确保回答真实可靠。 - 在行内数学表达式使用$...$格式;独立公式使用$$...$$格式。 - 生成相关问题。 - 在引用段落末尾添加引用标识。 回顾提供的引用: - 引用[1]:讨论SoC设计流程,包括模块设计、IP复用、顶层集成、前仿真、逻辑综合等。 - 引用[2]:描述冒烟测试后的验证阶段,包括回归测试、覆盖率分析等。 - 引用[3]:解释RTL使用Verilog或VHDL描述,模
recommend-type

探索macOS系统分析器:Swift开发的效率工具

根据给定的文件信息,我们可以从中提炼出如下IT知识点: 1. macOS系统分析器和清理器的开发背景和目的 - macOS系统分析器是为了帮助用户快速分析系统中的文件和资源使用情况而设计的工具。 - 清理器是作为一个附加功能,目的是为了提供用户方便地管理和清理系统中的文件和资源。 2. macOS系统缓存文件的重要性 - 缓存文件对于程序的运行效率有重要影响,它可以使程序加载和执行任务的速度更快。 - 清理缓存文件可能会导致程序的启动速度变慢,因为系统需要时间重新生成这些缓存。 3. 缓存文件对磁盘空间的影响及清理建议 - 删除缓存文件并不一定能节省大量的磁盘空间,因为很多程序会重新生成这些文件以提升性能。 - 一般情况下不建议删除缓存文件,除非用户确实遇到了需要释放磁盘空间的问题。 4. macOS系统分析和清理工具的设计注意事项 - 当设计类似工具时,开发者需要权衡性能与可用性,确保用户能够方便地理解工具的使用及其潜在的影响。 - 应该明确告知用户哪些文件或资源可以安全地清理,哪些最好不要轻易删除。 5. Swift语言在iOS开发中的应用 - Swift是苹果公司开发的一种编程语言,专门用于iOS、macOS、watchOS和tvOS应用的开发。 - 使用Swift进行开发可以利用Xcode等苹果官方开发工具链,提高开发效率和程序性能。 6. Xcode和Swift开发环境的具体应用 - Xcode是苹果官方的集成开发环境(IDE),集成了代码编辑器、调试工具和性能分析工具,非常适合使用Swift语言进行iOS和macOS应用开发。 - 开发者可以在Xcode中创建项目,编写Swift代码,并且使用Xcode提供的各种模板和库来加速开发过程。 7. Clean-Me-master压缩包文件的可能内容 - Clean-Me-master是提供给开发者的资源文件包,可能包含了用于macOS系统分析器和清理器的源代码文件、项目配置文件、依赖库以及可能的使用说明文档。 - 通过分析该文件包,开发者可以更深入地了解程序结构和实现逻辑,进一步进行维护和更新。 通过上述知识点的详细解释,我们可以看到,一个简单的文件信息不仅涵盖了macOS系统工具的开发背景、功能和注意事项,还涉及了Swift语言的开发环境、Xcode的使用以及文件管理等多方面的IT知识。这不仅对于初学者理解macOS系统工具的开发过程有帮助,也对于希望深入了解macOS系统维护的开发者提供了实践的指导。