From 38eac40d56510f343ffa4d6f7613d4e820d14cd1 Mon Sep 17 00:00:00 2001
From: Devon Rifkin <drifkin@drifkin.net>
Date: Thu, 15 Jan 2026 15:46:36 -0800
Subject: [PATCH] openai: tweak v1/responses to conform better (#13736)

* openai: tweak v1/responses to conform better

* openai: provide better error for image URLs

* lint
---
 middleware/openai.go     |   9 +-
 openai/openai.go         |   4 +
 openai/responses.go      | 382 ++++++++++++++++++++++++++++++++-------
 openai/responses_test.go |  18 +-
 4 files changed, 335 insertions(+), 78 deletions(-)

diff --git a/middleware/openai.go b/middleware/openai.go
index 5e526416e..64dc97ec1 100644
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -8,6 +8,7 @@ import (
 	"math/rand"
 	"net/http"
 	"strings"
+	"time"
 
 	"github.com/gin-gonic/gin"
 
@@ -441,6 +442,7 @@ type ResponsesWriter struct {
 	stream     bool
 	responseID string
 	itemID     string
+	request    openai.ResponsesRequest
 }
 
 func (w *ResponsesWriter) writeEvent(eventType string, data any) error {
@@ -478,7 +480,9 @@ func (w *ResponsesWriter) writeResponse(data []byte) (int, error) {
 
 	// Non-streaming response
 	w.ResponseWriter.Header().Set("Content-Type", "application/json")
-	response := openai.ToResponse(w.model, w.responseID, w.itemID, chatResponse)
+	response := openai.ToResponse(w.model, w.responseID, w.itemID, chatResponse, w.request)
+	completedAt := time.Now().Unix()
+	response.CompletedAt = &completedAt
 	return len(data), json.NewEncoder(w.ResponseWriter).Encode(response)
 }
 
@@ -523,11 +527,12 @@ func ResponsesMiddleware() gin.HandlerFunc {
 
 		w := &ResponsesWriter{
 			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
-			converter:  openai.NewResponsesStreamConverter(responseID, itemID, req.Model),
+			converter:  openai.NewResponsesStreamConverter(responseID, itemID, req.Model, req),
 			model:      req.Model,
 			stream:     streamRequested,
 			responseID: responseID,
 			itemID:     itemID,
+			request:    req,
 		}
 
 		// Set headers based on streaming mode
diff --git a/openai/openai.go b/openai/openai.go
index 9dcba3000..44ffb21bc 100644
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -630,6 +630,10 @@ func nameFromToolCallID(messages []Message, toolCallID string) string {
 
 // decodeImageURL decodes a base64 data URI into raw image bytes.
 func decodeImageURL(url string) (api.ImageData, error) {
+	if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
+		return nil, errors.New("image URLs are not currently supported, please use base64 encoded data instead")
+	}
+
 	types := []string{"jpeg", "jpg", "png", "webp"}
 
 	// Support blank mime type to match /api/chat's behavior of taking just unadorned base64
diff --git a/openai/responses.go b/openai/responses.go
index 1fbd2de09..f6f202b9a 100644
--- a/openai/responses.go
+++ b/openai/responses.go
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"math/rand"
+	"time"
 
 	"github.com/ollama/ollama/api"
 )
@@ -265,9 +266,9 @@ type ResponsesText struct {
 type ResponsesTool struct {
 	Type        string         `json:"type"` // "function"
 	Name        string         `json:"name"`
-	Description string         `json:"description,omitempty"`
-	Strict      bool           `json:"strict,omitempty"`
-	Parameters  map[string]any `json:"parameters,omitempty"`
+	Description *string        `json:"description"` // nullable but required
+	Strict      *bool          `json:"strict"`      // nullable but required
+	Parameters  map[string]any `json:"parameters"`  // nullable but required
 }
 
 type ResponsesRequest struct {
@@ -475,11 +476,16 @@ func convertTool(t ResponsesTool) (api.Tool, error) {
 		}
 	}
 
+	var description string
+	if t.Description != nil {
+		description = *t.Description
+	}
+
 	return api.Tool{
 		Type: t.Type,
 		Function: api.ToolFunction{
 			Name:        t.Name,
-			Description: t.Description,
+			Description: description,
 			Parameters:  params,
 		},
 	}, nil
@@ -516,17 +522,60 @@ func convertInputMessage(m ResponsesInputMessage) (api.Message, error) {
 
 // Response types for the Responses API
 
+// ResponsesTextField represents the text output configuration in the response.
+type ResponsesTextField struct {
+	Format ResponsesTextFormat `json:"format"`
+}
+
+// ResponsesReasoningOutput represents reasoning configuration in the response.
+type ResponsesReasoningOutput struct {
+	Effort  *string `json:"effort,omitempty"`
+	Summary *string `json:"summary,omitempty"`
+}
+
+// ResponsesError represents an error in the response.
+type ResponsesError struct {
+	Code    string `json:"code"`
+	Message string `json:"message"`
+}
+
+// ResponsesIncompleteDetails represents details about why a response was incomplete.
+type ResponsesIncompleteDetails struct {
+	Reason string `json:"reason"`
+}
+
 type ResponsesResponse struct {
-	ID        string                `json:"id"`
-	Object    string                `json:"object"`
-	CreatedAt int64                 `json:"created_at"`
-	Status    string                `json:"status"`
-	Model     string                `json:"model"`
-	Output    []ResponsesOutputItem `json:"output"`
-	Usage     *ResponsesUsage       `json:"usage,omitempty"`
-	// TODO(drifkin): add `temperature` and `top_p` to the response, but this
-	// requires additional plumbing to find the effective values since the
-	// defaults can come from the model or the request
+	ID                 string                      `json:"id"`
+	Object             string                      `json:"object"`
+	CreatedAt          int64                       `json:"created_at"`
+	CompletedAt        *int64                      `json:"completed_at"`
+	Status             string                      `json:"status"`
+	IncompleteDetails  *ResponsesIncompleteDetails `json:"incomplete_details"`
+	Model              string                      `json:"model"`
+	PreviousResponseID *string                     `json:"previous_response_id"`
+	Instructions       *string                     `json:"instructions"`
+	Output             []ResponsesOutputItem       `json:"output"`
+	Error              *ResponsesError             `json:"error"`
+	Tools              []ResponsesTool             `json:"tools"`
+	ToolChoice         any                         `json:"tool_choice"`
+	Truncation         string                      `json:"truncation"`
+	ParallelToolCalls  bool                        `json:"parallel_tool_calls"`
+	Text               ResponsesTextField          `json:"text"`
+	TopP               float64                     `json:"top_p"`
+	PresencePenalty    float64                     `json:"presence_penalty"`
+	FrequencyPenalty   float64                     `json:"frequency_penalty"`
+	TopLogprobs        int                         `json:"top_logprobs"`
+	Temperature        float64                     `json:"temperature"`
+	Reasoning          *ResponsesReasoningOutput   `json:"reasoning"`
+	Usage              *ResponsesUsage             `json:"usage"`
+	MaxOutputTokens    *int                        `json:"max_output_tokens"`
+	MaxToolCalls       *int                        `json:"max_tool_calls"`
+	Store              bool                        `json:"store"`
+	Background         bool                        `json:"background"`
+	ServiceTier        string                      `json:"service_tier"`
+	Metadata           map[string]any              `json:"metadata"`
+	SafetyIdentifier   *string                     `json:"safety_identifier"`
+	PromptCacheKey     *string                     `json:"prompt_cache_key"`
 }
 
 type ResponsesOutputItem struct {
@@ -550,18 +599,39 @@ type ResponsesReasoningSummary struct {
 }
 
 type ResponsesOutputContent struct {
-	Type string `json:"type"` // "output_text"
-	Text string `json:"text"`
+	Type        string `json:"type"` // "output_text"
+	Text        string `json:"text"`
+	Annotations []any  `json:"annotations"`
+	Logprobs    []any  `json:"logprobs"`
+}
+
+type ResponsesInputTokensDetails struct {
+	CachedTokens int `json:"cached_tokens"`
+}
+
+type ResponsesOutputTokensDetails struct {
+	ReasoningTokens int `json:"reasoning_tokens"`
 }
 
 type ResponsesUsage struct {
-	InputTokens  int `json:"input_tokens"`
-	OutputTokens int `json:"output_tokens"`
-	TotalTokens  int `json:"total_tokens"`
+	InputTokens         int                          `json:"input_tokens"`
+	OutputTokens        int                          `json:"output_tokens"`
+	TotalTokens         int                          `json:"total_tokens"`
+	InputTokensDetails  ResponsesInputTokensDetails  `json:"input_tokens_details"`
+	OutputTokensDetails ResponsesOutputTokensDetails `json:"output_tokens_details"`
 }
 
-// ToResponse converts an api.ChatResponse to a Responses API response
-func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse) ResponsesResponse {
+// derefFloat64 returns the value of a float64 pointer, or a default if nil.
+func derefFloat64(p *float64, def float64) float64 {
+	if p != nil {
+		return *p
+	}
+	return def
+}
+
+// ToResponse converts an api.ChatResponse to a Responses API response.
+// The request is used to echo back request parameters in the response.
+func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse, request ResponsesRequest) ResponsesResponse {
 	var output []ResponsesOutputItem
 
 	// Add reasoning item if thinking is present
@@ -585,6 +655,7 @@ func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse)
 			output = append(output, ResponsesOutputItem{
 				ID:        fmt.Sprintf("fc_%s_%d", responseID, i),
 				Type:      "function_call",
+				Status:    "completed",
 				CallID:    tc.ID,
 				Name:      tc.Function.Name,
 				Arguments: tc.Function.Arguments,
@@ -598,25 +669,90 @@ func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse)
 			Role:   "assistant",
 			Content: []ResponsesOutputContent{
 				{
-					Type: "output_text",
-					Text: chatResponse.Message.Content,
+					Type:        "output_text",
+					Text:        chatResponse.Message.Content,
+					Annotations: []any{},
+					Logprobs:    []any{},
 				},
 			},
 		})
 	}
 
+	var instructions *string
+	if request.Instructions != "" {
+		instructions = &request.Instructions
+	}
+
+	// Build truncation with default
+	truncation := "disabled"
+	if request.Truncation != nil {
+		truncation = *request.Truncation
+	}
+
+	tools := request.Tools
+	if tools == nil {
+		tools = []ResponsesTool{}
+	}
+
+	text := ResponsesTextField{
+		Format: ResponsesTextFormat{Type: "text"},
+	}
+	if request.Text != nil && request.Text.Format != nil {
+		text.Format = *request.Text.Format
+	}
+
+	// Build reasoning output from request
+	var reasoning *ResponsesReasoningOutput
+	if request.Reasoning.Effort != "" || request.Reasoning.Summary != "" {
+		reasoning = &ResponsesReasoningOutput{}
+		if request.Reasoning.Effort != "" {
+			reasoning.Effort = &request.Reasoning.Effort
+		}
+		if request.Reasoning.Summary != "" {
+			reasoning.Summary = &request.Reasoning.Summary
+		}
+	}
+
 	return ResponsesResponse{
-		ID:        responseID,
-		Object:    "response",
-		CreatedAt: chatResponse.CreatedAt.Unix(),
-		Status:    "completed",
-		Model:     model,
-		Output:    output,
+		ID:                 responseID,
+		Object:             "response",
+		CreatedAt:          chatResponse.CreatedAt.Unix(),
+		CompletedAt:        nil, // Set by middleware when writing final response
+		Status:             "completed",
+		IncompleteDetails:  nil, // Only populated if response incomplete
+		Model:              model,
+		PreviousResponseID: nil, // Not supported
+		Instructions:       instructions,
+		Output:             output,
+		Error:              nil, // Only populated on failure
+		Tools:              tools,
+		ToolChoice:         "auto", // Default value
+		Truncation:         truncation,
+		ParallelToolCalls:  true, // Default value
+		Text:               text,
+		TopP:               derefFloat64(request.TopP, 1.0),
+		PresencePenalty:    0, // Default value
+		FrequencyPenalty:   0, // Default value
+		TopLogprobs:        0, // Default value
+		Temperature:        derefFloat64(request.Temperature, 1.0),
+		Reasoning:          reasoning,
 		Usage: &ResponsesUsage{
 			InputTokens:  chatResponse.PromptEvalCount,
 			OutputTokens: chatResponse.EvalCount,
 			TotalTokens:  chatResponse.PromptEvalCount + chatResponse.EvalCount,
+			// TODO(drifkin): wire through the actual values
+			InputTokensDetails: ResponsesInputTokensDetails{CachedTokens: 0},
+			// TODO(drifkin): wire through the actual values
+			OutputTokensDetails: ResponsesOutputTokensDetails{ReasoningTokens: 0},
 		},
+		MaxOutputTokens:  request.MaxOutputTokens,
+		MaxToolCalls:     nil,   // Not supported
+		Store:            false, // We don't store responses
+		Background:       request.Background,
+		ServiceTier:      "default", // Default value
+		Metadata:         map[string]any{},
+		SafetyIdentifier: nil, // Not supported
+		PromptCacheKey:   nil, // Not supported
 	}
 }
 
@@ -636,6 +772,7 @@ type ResponsesStreamConverter struct {
 	responseID string
 	itemID     string
 	model      string
+	request    ResponsesRequest
 
 	// State tracking (mutated across Process calls)
 	firstWrite      bool
@@ -668,11 +805,12 @@ func (c *ResponsesStreamConverter) newEvent(eventType string, data map[string]an
 }
 
 // NewResponsesStreamConverter creates a new converter with the given configuration.
-func NewResponsesStreamConverter(responseID, itemID, model string) *ResponsesStreamConverter {
+func NewResponsesStreamConverter(responseID, itemID, model string, request ResponsesRequest) *ResponsesStreamConverter {
 	return &ResponsesStreamConverter{
 		responseID: responseID,
 		itemID:     itemID,
 		model:      model,
+		request:    request,
 		firstWrite: true,
 	}
 }
@@ -717,25 +855,120 @@ func (c *ResponsesStreamConverter) Process(r api.ChatResponse) []ResponsesStream
 	return events
 }
 
+// buildResponseObject creates a full response object with all required fields for streaming events.
+func (c *ResponsesStreamConverter) buildResponseObject(status string, output []any, usage map[string]any) map[string]any {
+	var instructions any = nil
+	if c.request.Instructions != "" {
+		instructions = c.request.Instructions
+	}
+
+	truncation := "disabled"
+	if c.request.Truncation != nil {
+		truncation = *c.request.Truncation
+	}
+
+	var tools []any
+	if c.request.Tools != nil {
+		for _, t := range c.request.Tools {
+			tools = append(tools, map[string]any{
+				"type":        t.Type,
+				"name":        t.Name,
+				"description": t.Description,
+				"strict":      t.Strict,
+				"parameters":  t.Parameters,
+			})
+		}
+	}
+	if tools == nil {
+		tools = []any{}
+	}
+
+	textFormat := map[string]any{"type": "text"}
+	if c.request.Text != nil && c.request.Text.Format != nil {
+		textFormat = map[string]any{
+			"type": c.request.Text.Format.Type,
+		}
+		if c.request.Text.Format.Name != "" {
+			textFormat["name"] = c.request.Text.Format.Name
+		}
+		if c.request.Text.Format.Schema != nil {
+			textFormat["schema"] = c.request.Text.Format.Schema
+		}
+		if c.request.Text.Format.Strict != nil {
+			textFormat["strict"] = *c.request.Text.Format.Strict
+		}
+	}
+
+	var reasoning any = nil
+	if c.request.Reasoning.Effort != "" || c.request.Reasoning.Summary != "" {
+		r := map[string]any{}
+		if c.request.Reasoning.Effort != "" {
+			r["effort"] = c.request.Reasoning.Effort
+		} else {
+			r["effort"] = nil
+		}
+		if c.request.Reasoning.Summary != "" {
+			r["summary"] = c.request.Reasoning.Summary
+		} else {
+			r["summary"] = nil
+		}
+		reasoning = r
+	}
+
+	// Build top_p and temperature with defaults
+	topP := 1.0
+	if c.request.TopP != nil {
+		topP = *c.request.TopP
+	}
+	temperature := 1.0
+	if c.request.Temperature != nil {
+		temperature = *c.request.Temperature
+	}
+
+	return map[string]any{
+		"id":                   c.responseID,
+		"object":               "response",
+		"created_at":           time.Now().Unix(),
+		"completed_at":         nil,
+		"status":               status,
+		"incomplete_details":   nil,
+		"model":                c.model,
+		"previous_response_id": nil,
+		"instructions":         instructions,
+		"output":               output,
+		"error":                nil,
+		"tools":                tools,
+		"tool_choice":          "auto",
+		"truncation":           truncation,
+		"parallel_tool_calls":  true,
+		"text":                 map[string]any{"format": textFormat},
+		"top_p":                topP,
+		"presence_penalty":     0,
+		"frequency_penalty":    0,
+		"top_logprobs":         0,
+		"temperature":          temperature,
+		"reasoning":            reasoning,
+		"usage":                usage,
+		"max_output_tokens":    c.request.MaxOutputTokens,
+		"max_tool_calls":       nil,
+		"store":                false,
+		"background":           c.request.Background,
+		"service_tier":         "default",
+		"metadata":             map[string]any{},
+		"safety_identifier":    nil,
+		"prompt_cache_key":     nil,
+	}
+}
+
 func (c *ResponsesStreamConverter) createResponseCreatedEvent() ResponsesStreamEvent {
 	return c.newEvent("response.created", map[string]any{
-		"response": map[string]any{
-			"id":     c.responseID,
-			"object": "response",
-			"status": "in_progress",
-			"output": []any{},
-		},
+		"response": c.buildResponseObject("in_progress", []any{}, nil),
 	})
 }
 
 func (c *ResponsesStreamConverter) createResponseInProgressEvent() ResponsesStreamEvent {
 	return c.newEvent("response.in_progress", map[string]any{
-		"response": map[string]any{
-			"id":     c.responseID,
-			"object": "response",
-			"status": "in_progress",
-			"output": []any{},
-		},
+		"response": c.buildResponseObject("in_progress", []any{}, nil),
 	})
 }
 
@@ -762,9 +995,10 @@ func (c *ResponsesStreamConverter) processThinking(thinking string) []ResponsesS
 
 	// Emit delta
 	events = append(events, c.newEvent("response.reasoning_summary_text.delta", map[string]any{
-		"item_id":      c.reasoningItemID,
-		"output_index": c.outputIndex,
-		"delta":        thinking,
+		"item_id":       c.reasoningItemID,
+		"output_index":  c.outputIndex,
+		"summary_index": 0,
+		"delta":         thinking,
 	}))
 
 	// TODO(drifkin): consider adding
@@ -783,9 +1017,10 @@ func (c *ResponsesStreamConverter) finishReasoning() []ResponsesStreamEvent {
 
 	events := []ResponsesStreamEvent{
 		c.newEvent("response.reasoning_summary_text.done", map[string]any{
-			"item_id":      c.reasoningItemID,
-			"output_index": c.outputIndex,
-			"text":         c.accumulatedThinking,
+			"item_id":       c.reasoningItemID,
+			"output_index":  c.outputIndex,
+			"summary_index": 0,
+			"text":          c.accumulatedThinking,
 		}),
 		c.newEvent("response.output_item.done", map[string]any{
 			"output_index": c.outputIndex,
@@ -898,8 +1133,10 @@ func (c *ResponsesStreamConverter) processTextContent(content string) []Response
 			"output_index":  c.outputIndex,
 			"content_index": c.contentIndex,
 			"part": map[string]any{
-				"type": "output_text",
-				"text": "",
+				"type":        "output_text",
+				"text":        "",
+				"annotations": []any{},
+				"logprobs":    []any{},
 			},
 		}))
 	}
@@ -913,6 +1150,7 @@ func (c *ResponsesStreamConverter) processTextContent(content string) []Response
 		"output_index":  c.outputIndex,
 		"content_index": 0,
 		"delta":         content,
+		"logprobs":      []any{},
 	}))
 
 	return events
@@ -944,8 +1182,10 @@ func (c *ResponsesStreamConverter) buildFinalOutput() []any {
 			"status": "completed",
 			"role":   "assistant",
 			"content": []map[string]any{{
-				"type": "output_text",
-				"text": c.accumulatedText,
+				"type":        "output_text",
+				"text":        c.accumulatedText,
+				"annotations": []any{},
+				"logprobs":    []any{},
 			}},
 		})
 	}
@@ -967,6 +1207,7 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
 			"output_index":  c.outputIndex,
 			"content_index": 0,
 			"text":          c.accumulatedText,
+			"logprobs":      []any{},
 		}))
 
 		// response.content_part.done
@@ -975,8 +1216,10 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
 			"output_index":  c.outputIndex,
 			"content_index": 0,
 			"part": map[string]any{
-				"type": "output_text",
-				"text": c.accumulatedText,
+				"type":        "output_text",
+				"text":        c.accumulatedText,
+				"annotations": []any{},
+				"logprobs":    []any{},
 			},
 		}))
 
@@ -989,26 +1232,31 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
 				"status": "completed",
 				"role":   "assistant",
 				"content": []map[string]any{{
-					"type": "output_text",
-					"text": c.accumulatedText,
+					"type":        "output_text",
+					"text":        c.accumulatedText,
+					"annotations": []any{},
+					"logprobs":    []any{},
 				}},
 			},
 		}))
 	}
 
 	// response.completed
-	events = append(events, c.newEvent("response.completed", map[string]any{
-		"response": map[string]any{
-			"id":     c.responseID,
-			"object": "response",
-			"status": "completed",
-			"output": c.buildFinalOutput(),
-			"usage": map[string]any{
-				"input_tokens":  r.PromptEvalCount,
-				"output_tokens": r.EvalCount,
-				"total_tokens":  r.PromptEvalCount + r.EvalCount,
-			},
+	usage := map[string]any{
+		"input_tokens":  r.PromptEvalCount,
+		"output_tokens": r.EvalCount,
+		"total_tokens":  r.PromptEvalCount + r.EvalCount,
+		"input_tokens_details": map[string]any{
+			"cached_tokens": 0,
 		},
+		"output_tokens_details": map[string]any{
+			"reasoning_tokens": 0,
+		},
+	}
+	response := c.buildResponseObject("completed", c.buildFinalOutput(), usage)
+	response["completed_at"] = time.Now().Unix()
+	events = append(events, c.newEvent("response.completed", map[string]any{
+		"response": response,
 	}))
 
 	return events
diff --git a/openai/responses_test.go b/openai/responses_test.go
index bfb6bb36e..743821b29 100644
--- a/openai/responses_test.go
+++ b/openai/responses_test.go
@@ -850,7 +850,7 @@ func TestFromResponsesRequest_Images(t *testing.T) {
 }
 
 func TestResponsesStreamConverter_TextOnly(t *testing.T) {
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	// First chunk with content
 	events := converter.Process(api.ChatResponse{
@@ -916,7 +916,7 @@ func TestResponsesStreamConverter_TextOnly(t *testing.T) {
 }
 
 func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{
@@ -952,7 +952,7 @@ func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
 }
 
 func TestResponsesStreamConverter_Reasoning(t *testing.T) {
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	// First chunk with thinking
 	events := converter.Process(api.ChatResponse{
@@ -1267,7 +1267,7 @@ func TestToResponse_WithReasoning(t *testing.T) {
 			Content:  "The answer is 42",
 		},
 		Done: true,
-	})
+	}, ResponsesRequest{})
 
 	// Should have 2 output items: reasoning + message
 	if len(response.Output) != 2 {
@@ -1638,7 +1638,7 @@ func TestFromResponsesRequest_ShorthandFormats(t *testing.T) {
 
 func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
 	// Verify that response.output_item.done includes content field for messages
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	// First chunk
 	converter.Process(api.ChatResponse{
@@ -1686,7 +1686,7 @@ func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
 
 func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T) {
 	// Verify that response.completed includes the output array
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	// Process some content
 	converter.Process(api.ChatResponse{
@@ -1730,7 +1730,7 @@ func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T)
 
 func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
 	// Verify that response.created includes an empty output array
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{Content: "Hi"},
@@ -1757,7 +1757,7 @@ func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
 
 func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
 	// Verify that events include incrementing sequence numbers
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{Content: "Hello"},
@@ -1791,7 +1791,7 @@ func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
 
 func TestResponsesStreamConverter_FunctionCallStatus(t *testing.T) {
 	// Verify that function call items include status field
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
 
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{