Addressing feedback and removing response

elastic · l-trotta · Jan 13, 2025 · Dec 16, 2024 · Dec 16, 2024 · Jan 8, 2025
commit 90f9fd26b7b5dc9499db0133f54fb24e0245fa83
diff --git a/specification/_types/Binary.ts b/specification/_types/Binary.ts
@@ -22,3 +22,6 @@ export type MapboxVectorTiles = ArrayBuffer
 
 // ES|QL columns
 export type EsqlColumns = ArrayBuffer
+
+// Streaming endpoints response
+export type StreamResult = ArrayBuffer
diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts
@@ -88,120 +88,6 @@ export class InferenceResult {
   rerank?: Array<RankedDocument>
 }
 
-/**
- * The function the model wants to call.
- */
-export class ResultFunctionCall {
-  /**
-   * The arguments to call the function with in that the model generated in JSON format.
-   */
-  arguments?: string
-  /**
-   * The name of the function to call.
-   */
-  name?: string
-}
-
-/**
- * The tool call made by the model.
- */
-export class ResultToolCall {
-  index: number
-  /**
-   * The identifier of the tool call.
-   */
-  id?: string
-  /**
-   * The function the model wants to call.
-   */
-  function?: ResultFunctionCall
-  /**
-   * The type of the tool.
-   */
-  type?: string
-}
-
-export class CompletionDelta {
-  /**
-   * The contents of the chunked message.
-   */
-  content?: string
-  /**
-   * The refusal message.
-   */
-  refusal?: string
-  /**
-   * The role of the author of the message.
-   */
-  role?: string
-  /**
-   * The tool calls made by the model.
-   */
-  tool_calls?: Array<ResultToolCall>
-}
-
-/**
- * Represent a completion choice returned from a model.
- */
-export class CompletionChoice {
-  /**
-   * The delta generated by the model.
-   */
-  delta: CompletionDelta
-  /**
-   * The reason the model stopped generating tokens.
-   */
-  finish_reason?: string
-  /**
-   * The index of the choice in the array of choices field.
-   */
-  index: number
-}
-
-/**
- * The token usage statistics for the entire request.
- */
-export class Usage {
-  /**
-   * The number of tokens in the generated completion.
-   */
-  completion_tokens: number
-  /**
-   * The number of tokens in the prompt.
-   */
-  prompt_tokens: number
-  /**
-   * The sum of completion_tokens and prompt_tokens.
-   */
-  total_tokens: number
-}
-
-/**
- * Respresents the result format for a completion request using the Unified Inference API.
- */
-export class UnifiedInferenceResult {
-  /**
-   * A unique identifier for the chat completion
-   */
-  id: string
-  /**
-   * A list of completion choices.
-   */
-  choices: Array<CompletionChoice>
-  /**
-   * The model that generated the completion.
-   */
-  model: string
-  /**
-   * The object type.
-   */
-  object: string
-  /**
-   * The token usage statistics for the entire request.
-   */
-  usage?: Usage
-}
-
 /**
  * Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint
  */

diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/unified_inference/UnifiedRequest.ts
@@ -21,8 +21,74 @@ import { TaskType } from '@inference/_types/TaskType'
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { float, long } from '@_types/Numeric'
 import { Duration } from '@_types/Time'
 
+/**
+ * Perform inference on the service using the Unified Schema
+ * @rest_spec_name inference.unified_inference
+ * @availability stack since=8.18.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ */
+export interface Request extends RequestBase {
+  path_parts: {
+    /**
+     * The task type
+     */
+    task_type?: TaskType
+    /**
+     * The inference Id
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * A list of objects representing the conversation.
+     */
+    messages: Array<Message>
+    /**
+     * The ID of the model to use.
+     */
+    model?: string
+    /**
+     * The upper bound limit for the number of tokens that can be generated for a completion request.
+     */
+    max_completion_tokens?: long
+    /**
+     * A sequence of strings to control when the model should stop generating additional tokens.
+     */
+    stop?: Array<string>
+    /**
+     * The sampling temperature to use.
+     */
+    temperature?: float
+    /**
+     * Controls which tool is called by the model.
+     */
+    tool_choice?: CompletionToolType
+    /**
+     * A list of tools that the model can call.
+     */
+    tools?: Array<CompletionTool>
+    /**
+     * Nucleus sampling, an alternative to sampling with temperature.
+     */
+    top_p?: float
+  }
+}
+
+/**
+ * @codegen_names string, object
+ */
+export type CompletionToolType = string | CompletionToolChoice
+
 /**
  * An object style representation of a single portion of a conversation.
  */
@@ -58,7 +124,7 @@ export interface ToolCall {
   /**
    * The identifier of the tool call.
    */
-  id: string
+  id: Id
   /**
    * The function that the model called.
    */
@@ -69,22 +135,27 @@ export interface ToolCall {
   type: string
 }
 
+/**
+ * @codegen_names string, object
+ */
+export type MessageContent = string | Array<ContentObject>
+
 /**
  * An object representing part of the conversation.
  */
 export interface Message {
   /**
    * The content of the message.
    */
-  content: string | Array<ContentObject>
+  content?: MessageContent
   /**
    * The role of the message author.
    */
   role: string
   /**
    * The tool call that this message is responding to.
    */
-  tool_call_id?: string
+  tool_call_id?: Id
   /**
    * The tool calls generated by the model.
    */
@@ -152,63 +223,3 @@ export interface CompletionTool {
    */
   function: CompletionToolFunction
 }
-
-/**
- * Perform inference on the service using the Unified Schema
- * @rest_spec_name inference.unified_inference
- * @availability stack since=8.18.0 stability=stable visibility=public
- * @availability serverless stability=stable visibility=public
- */
-export interface Request extends RequestBase {
-  path_parts: {
-    /**
-     * The task type
-     */
-    task_type?: TaskType
-    /**
-     * The inference Id
-     */
-    inference_id: Id
-  }
-  query_parameters: {
-    /**
-     * Specifies the amount of time to wait for the inference request to complete.
-     * @server_default 30s
-     */
-    timeout?: Duration
-  }
-  body: {
-    /**
-     * A list of objects representing the conversation.
-     */
-    messages: Array<Message>
-    /**
-     * The ID of the model to use.
-     */
-    model?: string
-    /**
-     * The upper bound limit for the number of tokens that can be generated for a completion request.
-     */
-    max_completion_tokens?: number
-    /**
-     * A sequence of strings to control when the model should stop generating additional tokens.
-     */
-    stop?: Array<string>
-    /**
-     * The sampling temperature to use.
-     */
-    temperature?: number
-    /**
-     * Controls which tool is called by the model.
-     */
-    tool_choice?: string | CompletionToolChoice
-    /**
-     * A list of tools that the model can call.
-     */
-    tools?: Array<CompletionTool>
-    /**
-     * Nucleus sampling, an alternative to sampling with temperature.
-     */
-    top_p?: number
-  }
-}
diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/unified_inference/UnifiedResponse.ts
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-import { UnifiedInferenceResult } from '@inference/_types/Results'
+import { StreamResult } from '@_types/Binary'
 
 export class Response {
-  body: UnifiedInferenceResult
+  body: StreamResult
 }