diff --git a/model/parsers/ministral.go b/model/parsers/ministral.go index 2acf10c5f..5df9ff329 100644 --- a/model/parsers/ministral.go +++ b/model/parsers/ministral.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "strings" + "unicode" "github.com/ollama/ollama/api" ) @@ -17,12 +18,34 @@ const ( ministralCollectingToolArgs ) +// ministralEvent represents an event emitted during parsing +type ministralEvent interface { + isMinistralEvent() +} + +type ministralEventContent struct { + content string +} + +type ministralEventThinking struct { + thinking string +} + +type ministralEventToolCall struct { + name string + args string // raw JSON string +} + +func (ministralEventContent) isMinistralEvent() {} +func (ministralEventThinking) isMinistralEvent() {} +func (ministralEventToolCall) isMinistralEvent() {} + type MinistralParser struct { state ministralParserState buffer strings.Builder tools []api.Tool hasThinkingSupport bool - currentTool *api.Tool + pendingToolName string // stores tool name while collecting args } func (p *MinistralParser) HasToolSupport() bool { @@ -63,74 +86,251 @@ func toolByName(tools []api.Tool, n string) (*api.Tool, error) { return nil, fmt.Errorf("tool '%s' not found", n) } -func (p *MinistralParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) { - p.buffer.WriteString(s) +const ( + ministralToolCallsTag = "[TOOL_CALLS]" + ministralThinkTag = "[THINK]" + ministralThinkEndTag = "[/THINK]" + ministralArgsTag = "[ARGS]" +) + +// eat consumes the parser's buffer, and returns a list of any unambiguous +// events from the current parser state. The second return value indicates +// whether to keep looping (true when state transitions, false when waiting +// for more data). +func (p *MinistralParser) eat() ([]ministralEvent, bool) { + var events []ministralEvent switch p.state { case ministralCollectingContent: - if strings.Contains(p.buffer.String(), "[TOOL_CALLS]") { - before, _ := splitAtTag(&p.buffer, "[TOOL_CALLS]", false) - if before != "" { - return before, "", calls, nil + bufStr := p.buffer.String() + + // Check for [TOOL_CALLS] tag + if strings.Contains(bufStr, ministralToolCallsTag) { + split := strings.SplitN(bufStr, ministralToolCallsTag, 2) + before := strings.TrimRightFunc(split[0], unicode.IsSpace) + if len(before) > 0 { + events = append(events, ministralEventContent{content: before}) } + after := split[1] + p.buffer.Reset() + p.buffer.WriteString(after) p.state = ministralCollectingToolName - } else if strings.Contains(p.buffer.String(), "[THINK]") { + return events, true + } + + // Check for [THINK] tag + if strings.Contains(bufStr, ministralThinkTag) { + split := strings.SplitN(bufStr, ministralThinkTag, 2) + before := strings.TrimRightFunc(split[0], unicode.IsSpace) + if len(before) > 0 { + events = append(events, ministralEventContent{content: before}) + } + after := split[1] + p.buffer.Reset() + p.buffer.WriteString(after) p.state = ministralCollectingThinkingContent - return "", "", calls, nil - } else { - p.buffer.Reset() - return s, "", calls, nil + return events, true } + + // Check for partial tag overlap with [TOOL_CALLS] or [THINK] + overlapToolCalls := overlap(bufStr, ministralToolCallsTag) + overlapThink := overlap(bufStr, ministralThinkTag) + maxOverlap := max(overlapToolCalls, overlapThink) + + if maxOverlap > 0 { + // Withhold the potential partial tag + beforePartialTag := bufStr[:len(bufStr)-maxOverlap] + trailingWS := trailingWhitespaceLen(beforePartialTag) + ambiguousStart := len(beforePartialTag) - trailingWS + unambiguous := bufStr[:ambiguousStart] + ambiguous := bufStr[ambiguousStart:] + p.buffer.Reset() + p.buffer.WriteString(ambiguous) + if len(unambiguous) > 0 { + events = append(events, ministralEventContent{content: unambiguous}) + } + return events, false + } + + // No tag found: emit content but withhold trailing whitespace + whitespaceLen := trailingWhitespaceLen(bufStr) + ambiguousStart := len(bufStr) - whitespaceLen + unambiguous := bufStr[:ambiguousStart] + ambiguous := bufStr[ambiguousStart:] + p.buffer.Reset() + p.buffer.WriteString(ambiguous) + if len(unambiguous) > 0 { + events = append(events, ministralEventContent{content: unambiguous}) + } + return events, false + case ministralCollectingThinkingContent: - if strings.Contains(p.buffer.String(), "[/THINK]") { - thinkingContent, after := splitAtTag(&p.buffer, "[/THINK]", true) - p.state = ministralCollectingContent - if after != "" { - p.buffer.Reset() - return after, thinkingContent, calls, nil - } - return "", thinkingContent, calls, nil - } else { + bufStr := p.buffer.String() + + if strings.Contains(bufStr, ministralThinkEndTag) { + split := strings.SplitN(bufStr, ministralThinkEndTag, 2) + thinkingContent := split[0] + after := strings.TrimLeftFunc(split[1], unicode.IsSpace) p.buffer.Reset() - return "", s, calls, nil - } - case ministralCollectingToolName: - if strings.Contains(p.buffer.String(), "[ARGS]") { - name, _ := splitAtTag(&p.buffer, "[ARGS]", false) - - t, err := toolByName(p.tools, name) - if err != nil { - return "", "", calls, err + p.buffer.WriteString(after) + if len(thinkingContent) > 0 { + events = append(events, ministralEventThinking{thinking: thinkingContent}) } - p.currentTool = t - p.state = ministralCollectingToolArgs - return "", "", calls, nil - } - return "", "", calls, nil - case ministralCollectingToolArgs: - if strings.Contains(p.buffer.String(), "}") { - before, _ := splitAtTag(&p.buffer, "}", false) - before += "}" - - var args api.ToolCallFunctionArguments - if err := json.Unmarshal([]byte(before), &args); err != nil { - // todo - throw a better error - return "", "", calls, err - } - p.state = ministralCollectingContent + return events, true + } - call := api.ToolCall{ + // Check for partial overlap with [/THINK] + if overlapLen := overlap(bufStr, ministralThinkEndTag); overlapLen > 0 { + unambiguous := bufStr[:len(bufStr)-overlapLen] + ambiguous := bufStr[len(bufStr)-overlapLen:] + p.buffer.Reset() + p.buffer.WriteString(ambiguous) + if len(unambiguous) > 0 { + events = append(events, ministralEventThinking{thinking: unambiguous}) + } + return events, false + } + + // No tag found: emit all thinking content + p.buffer.Reset() + if len(bufStr) > 0 { + events = append(events, ministralEventThinking{thinking: bufStr}) + } + return events, false + + case ministralCollectingToolName: + bufStr := p.buffer.String() + + if strings.Contains(bufStr, ministralArgsTag) { + split := strings.SplitN(bufStr, ministralArgsTag, 2) + toolName := split[0] + after := split[1] + p.pendingToolName = toolName + p.buffer.Reset() + p.buffer.WriteString(after) + p.state = ministralCollectingToolArgs + return events, true + } + // Wait for more data + return events, false + + case ministralCollectingToolArgs: + bufStr := p.buffer.String() + jsonEnd := findJSONEnd(bufStr) + + if jsonEnd != -1 { + jsonStr := bufStr[:jsonEnd+1] + remaining := bufStr[jsonEnd+1:] + + events = append(events, ministralEventToolCall{ + name: p.pendingToolName, + args: jsonStr, + }) + + p.pendingToolName = "" + p.buffer.Reset() + p.buffer.WriteString(remaining) + p.state = ministralCollectingContent + return events, true + } + // Wait for more data + return events, false + + default: + panic("unexpected ministral event") + } +} + +// parseEvents loops calling eat() until it returns false +func (p *MinistralParser) parseEvents() []ministralEvent { + var all []ministralEvent + keepLooping := true + for keepLooping { + var events []ministralEvent + events, keepLooping = p.eat() + all = append(all, events...) + } + return all +} + +func (p *MinistralParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) { + p.buffer.WriteString(s) + + events := p.parseEvents() + + var contentBuilder, thinkingBuilder strings.Builder + var toolCalls []api.ToolCall + + for _, event := range events { + switch e := event.(type) { + case ministralEventContent: + contentBuilder.WriteString(e.content) + case ministralEventThinking: + thinkingBuilder.WriteString(e.thinking) + case ministralEventToolCall: + // Validate tool exists + tool, toolErr := toolByName(p.tools, e.name) + if toolErr != nil { + return contentBuilder.String(), thinkingBuilder.String(), toolCalls, toolErr + } + // Parse JSON arguments + var args api.ToolCallFunctionArguments + if jsonErr := json.Unmarshal([]byte(e.args), &args); jsonErr != nil { + return contentBuilder.String(), thinkingBuilder.String(), toolCalls, jsonErr + } + toolCalls = append(toolCalls, api.ToolCall{ Function: api.ToolCallFunction{ - Name: p.currentTool.Function.Name, + Name: tool.Function.Name, Arguments: args, }, - } - calls = append(calls, call) - return "", "", calls, nil + }) } - return "", "", calls, nil } - return p.buffer.String(), thinking, calls, nil + return contentBuilder.String(), thinkingBuilder.String(), toolCalls, nil +} + +// findJSONEnd finds the index of the closing brace that completes a JSON object. +// It properly handles nested objects, arrays, and strings (including escaped characters). +// Returns -1 if the JSON is not yet complete. +func findJSONEnd(s string) int { + depth := 0 + inString := false + escaped := false + + for i, r := range s { + if inString { + switch { + case escaped: + // If the previous character was a backslash, skip this character + escaped = false + case r == '\\': + // Mark the next character as escaped + escaped = true + case r == '"': + // End of string literal + inString = false + } + continue + } + + switch r { + case '"': + // Start of string literal + inString = true + case '{', '[': + // Increase nesting level for objects and arrays + depth++ + case '}', ']': + // Decrease nesting level + depth-- + if depth == 0 { + // Reached the end of the root JSON structure + return i + } + } + } + + return -1 } diff --git a/model/parsers/ministral_test.go b/model/parsers/ministral_test.go new file mode 100644 index 000000000..a04590b07 --- /dev/null +++ b/model/parsers/ministral_test.go @@ -0,0 +1,545 @@ +package parsers + +import ( + "reflect" + "testing" + + "github.com/ollama/ollama/api" +) + +func TestMinistralParserStreaming(t *testing.T) { + type step struct { + input string + wantEvents []ministralEvent + } + + cases := []struct { + desc string + tools []api.Tool + steps []step + think bool // whether to enable thinking support + }{ + // Content streaming + { + desc: "simple content", + steps: []step{ + {input: "Hello, how can I help you?", wantEvents: []ministralEvent{ + ministralEventContent{content: "Hello, how can I help you?"}, + }}, + }, + }, + { + desc: "streaming content word by word", + steps: []step{ + {input: "Hello,", wantEvents: []ministralEvent{ministralEventContent{content: "Hello,"}}}, + {input: " how", wantEvents: []ministralEvent{ministralEventContent{content: " how"}}}, + {input: " can I help?", wantEvents: []ministralEvent{ministralEventContent{content: " can I help?"}}}, + }, + }, + + // Simple tool calls + { + desc: "simple tool call", + tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}}, + steps: []step{ + {input: `[TOOL_CALLS]get_weather[ARGS]{"location": "San Francisco"}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "get_weather", args: `{"location": "San Francisco"}`}, + }}, + }, + }, + { + desc: "tool call with nested object", + tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}}, + steps: []step{ + {input: `[TOOL_CALLS]create_entities[ARGS]{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`}, + }}, + }, + }, + { + desc: "tool call with deeply nested objects", + tools: []api.Tool{{Function: api.ToolFunction{Name: "update_config"}}}, + steps: []step{ + {input: `[TOOL_CALLS]update_config[ARGS]{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "update_config", args: `{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`}, + }}, + }, + }, + { + desc: "tool call with array of objects", + tools: []api.Tool{{Function: api.ToolFunction{Name: "process_items"}}}, + steps: []step{ + {input: `[TOOL_CALLS]process_items[ARGS]{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "process_items", args: `{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`}, + }}, + }, + }, + { + desc: "tool call with escaped quotes in string", + tools: []api.Tool{{Function: api.ToolFunction{Name: "search"}}}, + steps: []step{ + {input: `[TOOL_CALLS]search[ARGS]{"query": "say \"hello\""}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "search", args: `{"query": "say \"hello\""}`}, + }}, + }, + }, + { + desc: "tool call with braces inside string", + tools: []api.Tool{{Function: api.ToolFunction{Name: "format"}}}, + steps: []step{ + {input: `[TOOL_CALLS]format[ARGS]{"template": "Hello {name}!"}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "format", args: `{"template": "Hello {name}!"}`}, + }}, + }, + }, + { + desc: "empty JSON object", + tools: []api.Tool{{Function: api.ToolFunction{Name: "no_args"}}}, + steps: []step{ + {input: `[TOOL_CALLS]no_args[ARGS]{}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "no_args", args: `{}`}, + }}, + }, + }, + { + desc: "JSON with newlines in string", + tools: []api.Tool{{Function: api.ToolFunction{Name: "write"}}}, + steps: []step{ + {input: `[TOOL_CALLS]write[ARGS]{"content": "line1\nline2\nline3"}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "write", args: `{"content": "line1\nline2\nline3"}`}, + }}, + }, + }, + { + desc: "backslash in string value", + tools: []api.Tool{{Function: api.ToolFunction{Name: "path"}}}, + steps: []step{ + {input: `[TOOL_CALLS]path[ARGS]{"dir": "C:\\Users\\test"}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "path", args: `{"dir": "C:\\Users\\test"}`}, + }}, + }, + }, + + // Content after tool call + { + desc: "content after tool call", + tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}}, + steps: []step{ + // NOTE: It's unclear if this is valid Ministral output, but the parser + // currently treats text after a tool call as regular content. This test + // documents that behavior so we notice if it changes. + {input: `[TOOL_CALLS]test[ARGS]{"a": 1}some content after`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "test", args: `{"a": 1}`}, + ministralEventContent{content: "some content after"}, + }}, + }, + }, + + // Multiple tool calls + { + desc: "multiple tool calls in sequence", + tools: []api.Tool{ + {Function: api.ToolFunction{Name: "get_weather"}}, + {Function: api.ToolFunction{Name: "get_time"}}, + }, + steps: []step{ + {input: `[TOOL_CALLS]get_weather[ARGS]{"location": "NYC"}[TOOL_CALLS]get_time[ARGS]{"timezone": "EST"}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "get_weather", args: `{"location": "NYC"}`}, + ministralEventToolCall{name: "get_time", args: `{"timezone": "EST"}`}, + }}, + }, + }, + { + desc: "multiple tool calls streamed separately", + tools: []api.Tool{ + {Function: api.ToolFunction{Name: "tool_a"}}, + {Function: api.ToolFunction{Name: "tool_b"}}, + }, + steps: []step{ + {input: `[TOOL_CALLS]tool_a[ARGS]{"x": 1}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "tool_a", args: `{"x": 1}`}, + }}, + {input: `[TOOL_CALLS]tool_b[ARGS]{"y": 2}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "tool_b", args: `{"y": 2}`}, + }}, + }, + }, + + // Streaming tool calls + { + desc: "streaming tool call with nested objects", + tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}}, + steps: []step{ + {input: "[TOOL_CALLS]create_entities[ARGS]", wantEvents: []ministralEvent{}}, + {input: `{"entities": [{"entityType": "Person",`, wantEvents: []ministralEvent{}}, + {input: ` "name": "Jack",`, wantEvents: []ministralEvent{}}, + {input: ` "observations": ["Works`, wantEvents: []ministralEvent{}}, + {input: ` as a baker"]}`, wantEvents: []ministralEvent{}}, + {input: `]}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`}, + }}, + }, + }, + { + desc: "streaming with incomplete JSON waits for completion", + tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}}, + steps: []step{ + {input: "[TOOL_CALLS]test[ARGS]{", wantEvents: []ministralEvent{}}, + {input: `"a": {`, wantEvents: []ministralEvent{}}, + {input: `"b": 1`, wantEvents: []ministralEvent{}}, + {input: `}`, wantEvents: []ministralEvent{}}, + {input: `}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "test", args: `{"a": {"b": 1}}`}, + }}, + }, + }, + + // Partial tag handling + { + desc: "partial tool tag fakeout", + steps: []step{ + {input: "abc[TOOL", wantEvents: []ministralEvent{ministralEventContent{content: "abc"}}}, + {input: " not a tag", wantEvents: []ministralEvent{ministralEventContent{content: "[TOOL not a tag"}}}, + }, + }, + { + desc: "tool call tag split across chunks", + tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}}, + steps: []step{ + {input: "[TOOL_", wantEvents: []ministralEvent{}}, + {input: "CALLS]test[ARGS]{}", wantEvents: []ministralEvent{ + ministralEventToolCall{name: "test", args: `{}`}, + }}, + }, + }, + { + desc: "content before tool call", + tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}}, + steps: []step{ + {input: "hello [TOOL_CALLS]get_weather[ARGS]{}", wantEvents: []ministralEvent{ + ministralEventContent{content: "hello"}, + ministralEventToolCall{name: "get_weather", args: `{}`}, + }}, + }, + }, + { + desc: "whitespace between content and tool call is trimmed", + tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}}, + steps: []step{ + {input: "content \n [TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{ + ministralEventContent{content: "content"}, + ministralEventToolCall{name: "test", args: `{}`}, + }}, + }, + }, + { + desc: "tabs and newlines before tool call are trimmed", + tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}}, + steps: []step{ + {input: "content\t\n\t[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{ + ministralEventContent{content: "content"}, + ministralEventToolCall{name: "test", args: `{}`}, + }}, + }, + }, + { + desc: "non-breaking space before tool call is trimmed", + tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}}, + steps: []step{ + // \u00a0 is non-breaking space, which unicode.IsSpace considers whitespace + {input: "content\u00a0[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{ + ministralEventContent{content: "content"}, + ministralEventToolCall{name: "test", args: `{}`}, + }}, + }, + }, + { + desc: "whitespace before THINK tag is trimmed", + steps: []step{ + {input: "content \n [THINK]thinking[/THINK]after", wantEvents: []ministralEvent{ + ministralEventContent{content: "content"}, + ministralEventThinking{thinking: "thinking"}, + ministralEventContent{content: "after"}, + }}, + }, + }, + { + desc: "trailing whitespace withheld then emitted", + steps: []step{ + {input: "Hello ", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}}, + {input: "world", wantEvents: []ministralEvent{ministralEventContent{content: " world"}}}, + }, + }, + { + desc: "trailing newline withheld then emitted", + steps: []step{ + {input: "Hello\n", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}}, + {input: "world", wantEvents: []ministralEvent{ministralEventContent{content: "\nworld"}}}, + }, + }, + + // Thinking support + { + desc: "thinking content", + think: true, + steps: []step{ + {input: "thinking here[/THINK]", wantEvents: []ministralEvent{ + ministralEventThinking{thinking: "thinking here"}, + }}, + {input: "content after", wantEvents: []ministralEvent{ + ministralEventContent{content: "content after"}, + }}, + }, + }, + { + desc: "thinking with whitespace after end tag", + think: true, + steps: []step{ + {input: "my thoughts[/THINK] \n response", wantEvents: []ministralEvent{ + ministralEventThinking{thinking: "my thoughts"}, + ministralEventContent{content: "response"}, + }}, + }, + }, + { + desc: "non-breaking space after think end tag is trimmed", + think: true, + steps: []step{ + // \u00a0 is non-breaking space + {input: "thinking[/THINK]\u00a0response", wantEvents: []ministralEvent{ + ministralEventThinking{thinking: "thinking"}, + ministralEventContent{content: "response"}, + }}, + }, + }, + { + desc: "partial think end tag", + think: true, + steps: []step{ + {input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}}, + {input: "NK]after", wantEvents: []ministralEvent{ministralEventContent{content: "after"}}}, + }, + }, + { + desc: "think tag fakeout", + think: true, + steps: []step{ + {input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}}, + {input: "not end tag", wantEvents: []ministralEvent{ministralEventThinking{thinking: "[/THInot end tag"}}}, + }, + }, + { + desc: "thinking then tool call", + think: true, + tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}}, + steps: []step{ + {input: "let me think[/THINK][TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{ + ministralEventThinking{thinking: "let me think"}, + ministralEventToolCall{name: "test", args: `{}`}, + }}, + }, + }, + + // Content then THINK tag transition + { + desc: "content then think tag", + steps: []step{ + {input: "content[THINK]thinking[/THINK]more", wantEvents: []ministralEvent{ + ministralEventContent{content: "content"}, + ministralEventThinking{thinking: "thinking"}, + ministralEventContent{content: "more"}, + }}, + }, + }, + + // Unicode handling + { + desc: "unicode content", + steps: []step{ + {input: "你好 🌍 مرحبا", wantEvents: []ministralEvent{ + ministralEventContent{content: "你好 🌍 مرحبا"}, + }}, + }, + }, + { + desc: "unicode in tool args", + tools: []api.Tool{{Function: api.ToolFunction{Name: "greet"}}}, + steps: []step{ + {input: `[TOOL_CALLS]greet[ARGS]{"message": "你好 🌍"}`, wantEvents: []ministralEvent{ + ministralEventToolCall{name: "greet", args: `{"message": "你好 🌍"}`}, + }}, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + parser := MinistralParser{} + parser.hasThinkingSupport = tc.think + parser.Init(tc.tools, nil, nil) + + for i, step := range tc.steps { + parser.buffer.WriteString(step.input) + gotEvents := parser.parseEvents() + + if len(gotEvents) == 0 && len(step.wantEvents) == 0 { + // avoid deep equal on empty vs. nil slices + continue + } + + if !reflect.DeepEqual(gotEvents, step.wantEvents) { + t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents) + } + } + }) + } +} + +func TestMinistralParser_Errors(t *testing.T) { + t.Run("unknown tool returns error", func(t *testing.T) { + p := &MinistralParser{} + p.Init([]api.Tool{{Function: api.ToolFunction{Name: "known_tool"}}}, nil, nil) + + _, _, _, err := p.Add(`[TOOL_CALLS]unknown_tool[ARGS]{"a": 1}`, true) + if err == nil { + t.Fatal("expected error for unknown tool") + } + }) + + t.Run("invalid JSON returns error", func(t *testing.T) { + p := &MinistralParser{} + p.Init([]api.Tool{{Function: api.ToolFunction{Name: "test"}}}, nil, nil) + + _, _, _, err := p.Add(`[TOOL_CALLS]test[ARGS]{invalid json}`, true) + if err == nil { + t.Fatal("expected error for invalid JSON") + } + }) +} + +func TestFindJSONEnd(t *testing.T) { + tests := []struct { + name string + input string + expected int + }{ + { + name: "simple object", + input: `{"a": 1}`, + expected: 7, + }, + { + name: "nested object", + input: `{"a": {"b": 2}}`, + expected: 14, + }, + { + name: "array inside object", + input: `{"items": [1, 2, 3]}`, + expected: 19, + }, + { + name: "braces in string", + input: `{"template": "Hello {name}!"}`, + expected: 28, + }, + { + name: "escaped quotes", + input: `{"msg": "say \"hi\""}`, + expected: 20, + }, + { + name: "incomplete object", + input: `{"a": {"b": 1}`, + expected: -1, + }, + { + name: "deeply nested", + input: `{"a": {"b": {"c": {"d": 1}}}}`, + expected: 28, + }, + { + name: "object with trailing content", + input: `{"a": 1} extra`, + expected: 7, + }, + { + name: "array", + input: `[{"a": 1}, {"b": 2}]`, + expected: 19, + }, + { + name: "escaped backslash before quote", + input: `{"path": "C:\\"}`, + expected: 15, + }, + { + name: "empty string", + input: "", + expected: -1, + }, + { + name: "no opening brace", + input: "hello world", + expected: -1, + }, + { + name: "only opening brace", + input: "{", + expected: -1, + }, + { + name: "unclosed string", + input: `{"key": "unclosed`, + expected: -1, + }, + { + name: "double escaped backslash then quote", + input: `{"path": "C:\\\\"}`, + expected: 17, + }, + { + name: "unicode in key and value", + input: `{"키": "값"}`, + expected: 13, + }, + { + name: "nested arrays", + input: `{"matrix": [[1, 2], [3, 4]]}`, + expected: 27, + }, + { + name: "mixed nesting", + input: `{"a": [{"b": {"c": [1, 2, 3]}}]}`, + expected: 31, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := findJSONEnd(tt.input) + if result != tt.expected { + t.Errorf("findJSONEnd(%q) = %d, want %d", tt.input, result, tt.expected) + } + }) + } +} + +func TestMinistralParser_HasToolSupport(t *testing.T) { + p := &MinistralParser{} + if !p.HasToolSupport() { + t.Error("expected HasToolSupport to return true") + } +} + +func TestMinistralParser_HasThinkingSupport(t *testing.T) { + p := &MinistralParser{hasThinkingSupport: false} + if p.HasThinkingSupport() { + t.Error("expected HasThinkingSupport to return false") + } + + p = &MinistralParser{hasThinkingSupport: true} + if !p.HasThinkingSupport() { + t.Error("expected HasThinkingSupport to return true") + } +} diff --git a/model/parsers/parsers.go b/model/parsers/parsers.go index c5baabe53..7e5ad4114 100644 --- a/model/parsers/parsers.go +++ b/model/parsers/parsers.go @@ -3,6 +3,7 @@ package parsers import ( "strings" "unicode" + "unicode/utf8" "github.com/ollama/ollama/api" "github.com/ollama/ollama/harmony" @@ -114,3 +115,33 @@ func splitAtTag(sb *strings.Builder, tag string, trimAfter bool) (string, string sb.WriteString(after) return before, after // return events } + +// overlap returns the longest overlap between the suffix of s and the prefix of delim +func overlap(s, delim string) int { + max := min(len(delim), len(s)) + for i := max; i > 0; i-- { + if strings.HasSuffix(s, delim[:i]) { + return i + } + } + return 0 +} + +// trailingWhitespaceLen returns the length in bytes of trailing whitespace in s +func trailingWhitespaceLen(s string) int { + remaining := s + total := 0 + for len(remaining) > 0 { + r, size := utf8.DecodeLastRuneInString(remaining) + // if it's an invalid utf8 rune, assume it isn't whitespace + if r == utf8.RuneError && size == 1 { + break + } + if !unicode.IsSpace(r) { + break + } + total += size + remaining = remaining[:len(remaining)-size] + } + return total +} diff --git a/model/parsers/qwen3coder.go b/model/parsers/qwen3coder.go index cf8f214e2..5604988ec 100644 --- a/model/parsers/qwen3coder.go +++ b/model/parsers/qwen3coder.go @@ -11,7 +11,6 @@ import ( "strconv" "strings" "unicode" - "unicode/utf8" "github.com/ollama/ollama/api" "github.com/ollama/ollama/logutil" @@ -194,36 +193,6 @@ func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) { } } -// TODO(drifkin): move this to a shared location -// longest overlap between suffix of s and prefix of delim -func overlap(s, delim string) int { - max := min(len(delim), len(s)) - for i := max; i > 0; i-- { - if strings.HasSuffix(s, delim[:i]) { - return i - } - } - return 0 -} - -func trailingWhitespaceLen(s string) int { - remaining := s - total := 0 - for len(remaining) > 0 { - r, size := utf8.DecodeLastRuneInString(remaining) - // if it's an invalid utf8 rune, assume it isn't whitespace - if r == utf8.RuneError && size == 1 { - break - } - if !unicode.IsSpace(r) { - break - } - total += size - remaining = remaining[:len(remaining)-size] - } - return total -} - type XMLFunctionCall struct { XMLName xml.Name `xml:"function"` Name string `xml:"name,attr"`