feat(api): Handle tool calls in responses API

Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
Richard Palethorpe
2025-06-12 05:11:43 +01:00
parent 9160ca598e
commit ff6890c9c1
9 changed files with 985 additions and 96 deletions

View File

@@ -427,6 +427,22 @@ func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction
return conv, nil
}
// getAvailableActionsForJob returns available actions including user-defined ones for a specific job
func (a *Agent) getAvailableActionsForJob(job *types.Job) types.Actions {
// Start with regular available actions
baseActions := a.availableActions()
// Add user-defined actions from the job
userTools := job.GetUserTools()
if len(userTools) > 0 {
userDefinedActions := types.CreateUserDefinedActions(userTools)
baseActions = append(baseActions, userDefinedActions...)
xlog.Debug("Added user-defined actions", "definitions", userTools)
}
return baseActions
}
func (a *Agent) availableActions() types.Actions {
// defaultActions := append(a.options.userActions, action.NewReply())
@@ -493,16 +509,19 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC
xlog.Debug("[pickAction] picking action starts", "messages", messages)
// Get available actions including user-defined ones
availableActions := a.getAvailableActionsForJob(job)
// Identify the goal of this conversation
if !a.options.forceReasoning {
xlog.Debug("not forcing reasoning")
if !a.options.forceReasoning || job.ToolChoice != "" {
xlog.Debug("not forcing reasoning", "forceReasoning", a.options.forceReasoning, "ToolChoice", job.ToolChoice)
// We also could avoid to use functions here and get just a reply from the LLM
// and then use the reply to get the action
thought, err := a.decision(job,
messages,
a.availableActions().ToTools(),
"",
availableActions.ToTools(),
job.ToolChoice,
maxRetries)
if err != nil {
return nil, nil, "", err
@@ -512,7 +531,7 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC
xlog.Debug("thought message", "message", thought.message)
// Find the action
chosenAction := a.availableActions().Find(thought.actionName)
chosenAction := availableActions.Find(thought.actionName)
if chosenAction == nil || thought.actionName == "" {
xlog.Debug("no answer")

View File

@@ -572,6 +572,66 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
return failedBy == "" && (!hasTriggers || triggeredBy != ""), nil
}
// validateBuiltinTools checks that builtin tools specified by the user can be matched to available actions
func (a *Agent) validateBuiltinTools(job *types.Job) {
builtinTools := job.GetBuiltinTools()
if len(builtinTools) == 0 {
return
}
// Get available actions
availableActions := a.mcpActions
for _, tool := range builtinTools {
functionName := tool.Name
// Check if this is a web search builtin tool
if strings.HasPrefix(string(functionName), "web_search_") {
// Look for a search action
searchAction := availableActions.Find("search")
if searchAction == nil {
xlog.Warn("Web search builtin tool specified but no 'search' action available",
"function_name", functionName,
"agent", a.Character.Name)
} else {
xlog.Debug("Web search builtin tool matched to search action",
"function_name", functionName,
"agent", a.Character.Name)
}
} else {
// For future builtin tools, add more matching logic here
xlog.Warn("Unknown builtin tool specified",
"function_name", functionName,
"agent", a.Character.Name)
}
}
}
// replyWithToolCall handles user-defined actions by recording the action state without setting Response
func (a *Agent) replyWithToolCall(job *types.Job, conv []openai.ChatCompletionMessage, params types.ActionParams, chosenAction types.Action, reasoning string) {
// Record the action state so the webui can detect this is a user-defined action
stateResult := types.ActionState{
ActionCurrentState: types.ActionCurrentState{
Job: job,
Action: chosenAction,
Params: params,
Reasoning: reasoning,
},
ActionResult: types.ActionResult{
Result: reasoning, // The reasoning/message to show to user
},
}
// Add the action state to the job result
job.Result.SetResult(stateResult)
// Set conversation but leave Response empty
// The webui will detect the user-defined action and generate the proper tool call response
job.Result.Conversation = conv
// job.Result.Response remains empty - this signals to webui that it should check State
job.Result.Finish(nil)
}
func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
if err := job.GetContext().Err(); err != nil {
job.Result.Finish(fmt.Errorf("expired"))
@@ -625,6 +685,9 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
// RAG
conv = a.knowledgeBaseLookup(job, conv)
// Validate builtin tools against available actions
a.validateBuiltinTools(job)
var pickTemplate string
var reEvaluationTemplate string
@@ -843,6 +906,13 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
}
if !chosenAction.Definition().Name.Is(action.PlanActionName) {
// Check if this is a user-defined action
if types.IsActionUserDefined(chosenAction) {
xlog.Debug("User-defined action chosen, returning tool call", "action", chosenAction.Definition().Name)
a.replyWithToolCall(job, conv, actionParams, chosenAction, reasoning)
return
}
result, err := a.runAction(job, chosenAction, actionParams)
if err != nil {
result.Result = fmt.Sprintf("Error running tool: %v", err)

View File

@@ -3,6 +3,7 @@ package types
import (
"context"
"encoding/json"
"fmt"
"github.com/sashabaranov/go-openai"
"github.com/sashabaranov/go-openai/jsonschema"
@@ -93,6 +94,60 @@ type Action interface {
Plannable() bool
}
// UserDefinedChecker interface to identify user-defined actions
type UserDefinedChecker interface {
IsUserDefined() bool
}
// BaseAction provides default implementation for Action interface
// Embed this in action implementations to get the default IsUserDefined behavior
type BaseAction struct{}
func (b *BaseAction) IsUserDefined() bool {
return false // Regular actions are not user-defined
}
// IsActionUserDefined checks if an action is user-defined
func IsActionUserDefined(action Action) bool {
if checker, ok := action.(UserDefinedChecker); ok {
return checker.IsUserDefined()
}
return false // Actions without UserDefinedChecker are not user-defined
}
// UserDefinedAction represents a user-defined function tool
type UserDefinedAction struct {
ActionDef *ActionDefinition
}
func (u *UserDefinedAction) Run(ctx context.Context, sharedState *AgentSharedState, action ActionParams) (ActionResult, error) {
// User-defined actions should not be executed directly
return ActionResult{}, fmt.Errorf("user-defined action '%s' cannot be executed by agent", u.ActionDef.Name)
}
func (u *UserDefinedAction) Definition() ActionDefinition {
return *u.ActionDef
}
func (u *UserDefinedAction) Plannable() bool {
return true // User-defined actions are plannable
}
func (u *UserDefinedAction) IsUserDefined() bool {
return true
}
// CreateUserDefinedActions converts user tools to UserDefinedAction instances
func CreateUserDefinedActions(userTools []ActionDefinition) []Action {
var actions []Action
for _, tool := range userTools {
actions = append(actions, &UserDefinedAction{
ActionDef: &tool,
})
}
return actions
}
type Actions []Action
func (a Actions) ToTools() []openai.Tool {

View File

@@ -20,6 +20,11 @@ type Job struct {
UUID string
Metadata map[string]interface{}
DoneFilter bool
// Tools available for this job
BuiltinTools []ActionDefinition // Built-in tools like web search
UserTools []ActionDefinition // User-defined function tools
ToolChoice string
pastActions []*ActionRequest
nextAction *Action
@@ -45,6 +50,24 @@ func WithConversationHistory(history []openai.ChatCompletionMessage) JobOption {
}
}
func WithBuiltinTools(tools []ActionDefinition) JobOption {
return func(j *Job) {
j.BuiltinTools = tools
}
}
func WithUserTools(tools []ActionDefinition) JobOption {
return func(j *Job) {
j.UserTools = tools
}
}
func WithToolChoice(choice string) JobOption {
return func(j *Job) {
j.ToolChoice = choice
}
}
func WithReasoningCallback(f func(ActionCurrentState) bool) JobOption {
return func(r *Job) {
r.ReasoningCallback = f
@@ -227,3 +250,21 @@ func (j *Job) IncrementEvaluationLoop() {
currentLoop := j.GetEvaluationLoop()
j.Metadata["evaluation_loop"] = currentLoop + 1
}
// GetBuiltinTools returns the builtin tools for this job
func (j *Job) GetBuiltinTools() []ActionDefinition {
return j.BuiltinTools
}
// GetUserTools returns the user tools for this job
func (j *Job) GetUserTools() []ActionDefinition {
return j.UserTools
}
// GetAllTools returns all tools (builtin + user) for this job
func (j *Job) GetAllTools() []ActionDefinition {
allTools := make([]ActionDefinition, 0, len(j.BuiltinTools)+len(j.UserTools))
allTools = append(allTools, j.BuiltinTools...)
allTools = append(allTools, j.UserTools...)
return allTools
}

View File

@@ -4,18 +4,58 @@ import (
"encoding/json"
"fmt"
"net/http"
"github.com/sashabaranov/go-openai/jsonschema"
)
// UserLocation represents the user's location for web search
type UserLocation struct {
Type string `json:"type"`
City *string `json:"city,omitempty"`
Country *string `json:"country,omitempty"`
Region *string `json:"region,omitempty"`
Timezone *string `json:"timezone,omitempty"`
}
type Tool struct {
Type string `json:"type"`
// Function tool fields (used when type == "function")
Name *string `json:"name,omitempty"`
Description *string `json:"description,omitempty"`
Parameters *jsonschema.Definition `json:"parameters,omitempty"`
// Web search tool fields (used when type == "web_search_preview" etc.)
SearchContextSize *string `json:"search_context_size,omitempty"`
UserLocation *UserLocation `json:"user_location,omitempty"`
}
type ToolChoice struct {
Name string `json:"name"`
Type string `json:"type"`
}
// RequestBody represents the message request to the AI model
type RequestBody struct {
Model string `json:"model"`
Input any `json:"input"`
Temperature *float64 `json:"temperature,omitempty"`
Tools []Tool `json:"tools,omitempty"`
ToolChoice *ToolChoice `json:"tool_choice"`
MaxTokens *int `json:"max_output_tokens,omitempty"`
}
type InputFunctionToolCallOutput struct {
CallID string `json:"call_id"`
Output string `json:"output"`
Type string `json:"type"`
ID string `json:"id"`
Status string `json:"status"`
}
// InputMessage represents a user input message
type InputMessage struct {
Type string `json:"type"`
Role string `json:"role"`
Content any `json:"content"`
}
@@ -29,10 +69,49 @@ type ContentItem struct {
// ResponseBody represents the response from the AI model
type ResponseBody struct {
CreatedAt int64 `json:"created_at"`
Status string `json:"status"`
Error any `json:"error,omitempty"`
Output []ResponseMessage `json:"output"`
CreatedAt int64 `json:"created_at"`
Status string `json:"status"`
Error any `json:"error,omitempty"`
Output []ResponseBase `json:"output"`
Tools []Tool `json:"tools"`
}
type ResponseType string
const (
ResponseTypeFunctionToolCall ResponseType = "function_call"
ResponseTypeMessage ResponseType = "message"
)
type ResponseBase json.RawMessage
func (r *ResponseBase) UnmarshalJSON(data []byte) error {
return (*json.RawMessage)(r).UnmarshalJSON(data)
}
func (r *ResponseBase) ToMessage() (msg ResponseMessage, err error) {
err = json.Unmarshal(*r, &msg)
if msg.Type != string(ResponseTypeMessage) {
return ResponseMessage{}, fmt.Errorf("Expected %s, not %s", ResponseTypeMessage, msg.Type)
}
return
}
func (r *ResponseBase) ToFunctionToolCall() (msg ResponseFunctionToolCall, err error) {
err = json.Unmarshal(*r, &msg)
if msg.Type != string(ResponseTypeFunctionToolCall) {
return ResponseFunctionToolCall{}, fmt.Errorf("Expected %s, not %s", ResponseTypeFunctionToolCall, msg.Type)
}
return
}
type ResponseFunctionToolCall struct {
Arguments string `json:"arguments"`
CallID string `json:"call_id"`
Name string `json:"name"`
Type string `json:"type"`
ID string `json:"id"`
Status string `json:"status"`
}
// ResponseMessage represents a message in the response
@@ -85,7 +164,11 @@ func (c *Client) SimpleAIResponse(agentName, input string) (string, error) {
}
// Extract the text response from the output
for _, msg := range response.Output {
for _, out := range response.Output {
msg, err := out.ToMessage()
if err != nil {
return "", fmt.Errorf("out.ToMessage: %w", err)
}
if msg.Role == "assistant" {
for _, content := range msg.Content {
if content.Type == "output_text" {
@@ -113,7 +196,12 @@ func (c *Client) ChatAIResponse(agentName string, messages []InputMessage) (stri
}
// Extract the text response from the output
for _, msg := range response.Output {
for _, out := range response.Output {
msg, err := out.ToMessage()
if err != nil {
return "", fmt.Errorf("out.ToMessage: %w", err)
}
if msg.Role == "assistant" {
for _, content := range msg.Content {
if content.Type == "output_text" {

5
pkg/utils/ptr/ptr.go Normal file
View File

@@ -0,0 +1,5 @@
package ptr
func To[T any](v T) *T {
return &v
}

View File

@@ -1,10 +1,15 @@
package e2e_test
import (
"encoding/json"
"fmt"
"net/http"
"time"
localagi "github.com/mudler/LocalAGI/pkg/client"
"github.com/mudler/LocalAGI/pkg/utils/ptr"
"github.com/mudler/LocalAGI/pkg/xlog"
"github.com/sashabaranov/go-openai/jsonschema"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
@@ -15,9 +20,13 @@ var _ = Describe("Agent test", func() {
BeforeEach(func() {
Eventually(func() error {
// test apiURL is working and available
_, err := http.Get(apiURL + "/readyz")
_, err := http.Get(localagiURL + "/readyz")
return err
}, "10m", "10s").ShouldNot(HaveOccurred())
client := localagi.NewClient(localagiURL, "", time.Minute)
err := client.DeleteAgent("testagent")
Expect(err).ToNot(HaveOccurred())
})
It("create agent", func() {
@@ -33,5 +42,187 @@ var _ = Describe("Agent test", func() {
Expect(result).ToNot(BeEmpty())
})
It("tool call", func() {
client := localagi.NewClient(localagiURL, "", 5*time.Minute)
err := client.CreateAgent(&localagi.AgentConfig{
Name: "testagent",
})
Expect(err).ToNot(HaveOccurred())
req := localagi.RequestBody{
Model: "testagent",
Input: "Create an appointment next week on wednesday at 10:00 am for the whole day. The topic is about AI and you include ABC and DEF to the appointment.",
Tools: []localagi.Tool{
{
Type: "function",
Name: ptr.To("CreateTask"),
Description: ptr.To("Write the needed details whenever you're asked to create something like an info, appointment, e-mail or when you're asked to remind of anything or create a remainder. Also use this if you're supposed to answer an e-mail."),
Parameters: ptr.To(jsonschema.Definition{
Type: "object",
Properties: map[string]jsonschema.Definition{
"task": {
Type: "string",
Description: "Look for the name of the task you're supposed to do or create ",
Enum: []string{
"appointment",
"E-mail",
},
},
"subject": {
Type: "string",
Description: "A subject the task is about. Infer this from the given context data and user prompt.",
},
"reply": {
Type: "string",
Description: "A sharp and short reply to the contextual data given. Use a friendly and neutral general greeting.",
},
"recipient": {
Type: "array",
Description: "A list of names and abbreviations to send our task to. Abbreviations always have to match exactly. If the user gives you first names you can deduce the last name.",
Items: &jsonschema.Definition{
Type: "string",
Enum: []string{
"ABC",
"DEF",
},
},
},
"datestart": {
Type: "string",
Description: "The date and time when the task should start. Discard any older dates than today. Use tomorrow as default. Use the format DD/MM/YYYY HH:MM",
},
"dateend": {
Type: "string",
Description: "The date and time when a meeting should end. Default to start date. If the duration of an appointment is given, calculate the end with the start date. Use the format DD/MM/YYYY HH:MM",
},
"datedone": {
Type: "string",
Description: "The date and time when the task should be done. Use the format DD/MM/YYYY HH:MM",
},
"private": {
Type: "boolean",
Description: "Whether the task should be private or not. Default to false.",
},
"includeall": {
Type: "boolean",
Description: "Whether the task should include every mentioned person or not. Default to true. If you find explicitly mentioned people in the prompt whilst ignoring the contextual xml schema you choose false unless it is mentioned that you should include everyone.",
},
"wholedayappointment": {
Type: "boolean",
Description: "Whether the appointment should be done for the whole days. Default to false unless mentioned by the user prompt. Ignore the xml schema for this.",
},
"remainder": {
Type: "boolean",
Description: "Whether you are explicitly supposed to remind of something or not. Default to false. Ignore the xml schema for this.",
},
},
Required: []string{
"task",
"recipient",
"datestart",
"dateend",
"datedone",
"private",
"wholedayappointment",
"remainder",
"subject",
"reply",
"includeall",
},
}),
},
}}
result, err := client.GetAIResponse(&req)
Expect(err).ToNot(HaveOccurred())
Expect(result).ToNot(BeNil())
var call localagi.ResponseFunctionToolCall
var args struct {
Task string `json:"task"`
Subject string `json:"subject"`
Reply string `json:"reply"`
Recipient []string `json:"recipient"`
DateStart string `json:"datestart"`
DateEnd string `json:"dateend"`
DateDone string `json:"datedone"`
Private bool `json:"private"`
IncludeAll bool `json:"includeall"`
WholeDayAppointment bool `json:"wholedayappointment"`
Remainder bool `json:"remainder"`
}
for _, out := range result.Output {
msg, err := out.ToMessage()
if err == nil && msg.Role == "assistant" {
xlog.Info("Agent returned message", "message", msg)
continue
}
fnc, err := out.ToFunctionToolCall()
call = fnc
Expect(err).ToNot(HaveOccurred())
Expect(string(fnc.Type)).To(Equal("function_call"))
Expect(fnc.Name).To(Equal("CreateTask"))
err = json.Unmarshal([]byte(fnc.Arguments), &args)
Expect(err).ToNot(HaveOccurred())
Expect(args.Task).To(Equal("appointment"))
Expect(args.Subject).ToNot(BeEmpty())
Expect(args.Reply).ToNot(BeEmpty())
}
req = localagi.RequestBody{
Model: "testagent",
Input: []any{
localagi.InputMessage{
Type: "message",
Role: "user",
Content: "Create an appointment next week on wednesday at 10:00 am for the whole day. The topic is about AI and you include ABC and DEF to the appointment.",
},
call,
localagi.InputFunctionToolCallOutput{
Type: "function_call_output",
CallID: call.CallID,
Output: fmt.Sprintf("Successfully created %s: %s", args.Task, args.Subject),
},
localagi.InputMessage{
Type: "message",
Role: "user",
Content: "Was the appointment created?",
},
},
Tools: []localagi.Tool{
{
Type: "function",
Name: ptr.To("ChooseAnswer"),
Description: ptr.To("Select Yes or No"),
Parameters: ptr.To(jsonschema.Definition{
Type: "object",
Properties: map[string]jsonschema.Definition{
"answer": {
Type: "boolean",
Description: "Set true for Yes and false for no",
},
},
Required: []string{
"answer",
},
}),
},
},
ToolChoice: &localagi.ToolChoice{
Type: "function",
Name: "ChooseAnswer",
},
}
result, err = client.GetAIResponse(&req)
Expect(err).ToNot(HaveOccurred())
Expect(len(result.Output)).To(BeNumerically(">", 0))
fnc, err := result.Output[len(result.Output)-1].ToFunctionToolCall()
Expect(err).ToNot(HaveOccurred())
Expect(fnc.Arguments).To(ContainSubstring("true"))
})
})
})

View File

@@ -488,6 +488,63 @@ func (a *App) ListActions() func(c *fiber.Ctx) error {
}
}
// createToolCallResponse generates a proper tool call response for user-defined actions
func (a *App) createToolCallResponse(id, agentName string, actionState coreTypes.ActionState, conv []openai.ChatCompletionMessage) types.ResponseBody {
// Create tool call ID
toolCallID := fmt.Sprintf("call_%d", time.Now().UnixNano())
// Get function name and arguments
functionName := actionState.Action.Definition().Name.String()
argumentsJSON, err := json.Marshal(actionState.Params)
if err != nil {
xlog.Error("Error marshaling action params for tool call", "error", err)
// Fallback to empty arguments
argumentsJSON = []byte("{}")
}
// Create message object with reasoning
messageObj := types.ResponseMessage{
Type: "message",
ID: fmt.Sprintf("msg_%d", time.Now().UnixNano()),
Status: "completed",
Role: "assistant",
Content: []types.MessageContentItem{
{
Type: "output_text",
Text: actionState.Reasoning,
},
},
}
// Create function tool call object
functionToolCall := types.FunctionToolCall{
Arguments: string(argumentsJSON),
CallID: toolCallID,
Name: functionName,
Type: "function_call",
ID: fmt.Sprintf("tool_%d", time.Now().UnixNano()),
Status: "completed",
}
// Create response with both message and tool call in output array
return types.ResponseBody{
ID: id,
Object: "response",
CreatedAt: time.Now().Unix(),
Status: "completed",
Model: agentName,
Output: []interface{}{
messageObj,
functionToolCall,
},
Usage: types.UsageInfo{
InputTokens: 0, // TODO: calculate actual usage
OutputTokens: 0,
TotalTokens: 0,
},
}
}
func (a *App) Responses(pool *state.AgentPool, tracker *conversations.ConversationTracker[string]) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
var request types.RequestBody
@@ -507,15 +564,38 @@ func (a *App) Responses(pool *state.AgentPool, tracker *conversations.Conversati
agentName := request.Model
messages := append(conv, request.ToChatCompletionMessages()...)
a := pool.GetAgent(agentName)
if a == nil {
agent := pool.GetAgent(agentName)
if agent == nil {
xlog.Info("Agent not found in pool", c.Params("name"))
return c.Status(http.StatusInternalServerError).JSON(types.ResponseBody{Error: "Agent not found"})
}
res := a.Ask(
// Prepare job options
jobOptions := []coreTypes.JobOption{
coreTypes.WithConversationHistory(messages),
)
}
// Add tools if present in the request
if len(request.Tools) > 0 {
builtinTools, userTools := types.SeparateTools(request.Tools)
if len(builtinTools) > 0 {
jobOptions = append(jobOptions, coreTypes.WithBuiltinTools(builtinTools))
xlog.Debug("Adding builtin tools to job", "count", len(builtinTools), "agent", agentName)
}
if len(userTools) > 0 {
jobOptions = append(jobOptions, coreTypes.WithUserTools(userTools))
xlog.Debug("Adding user tools to job", "count", len(userTools), "agent", agentName)
}
}
var choice types.ToolChoice
if err := json.Unmarshal(request.ToolChoice, &choice); err == nil {
if choice.Type == "function" {
jobOptions = append(jobOptions, coreTypes.WithToolChoice(choice.Name))
}
}
res := agent.Ask(jobOptions...)
if res.Error != nil {
xlog.Error("Error asking agent", "agent", agentName, "error", res.Error)
@@ -524,28 +604,44 @@ func (a *App) Responses(pool *state.AgentPool, tracker *conversations.Conversati
xlog.Info("we got a response from the agent", "agent", agentName, "response", res.Response)
}
id := uuid.New().String()
// Check if this is a user-defined tool call
if res.Response == "" && len(res.State) > 0 {
// Get the last action from state
lastAction := res.State[len(res.State)-1]
if coreTypes.IsActionUserDefined(lastAction.Action) {
xlog.Debug("Detected user-defined action, creating tool call response", "action", lastAction.Action.Definition().Name)
// Generate tool call response
response := a.createToolCallResponse(id, agentName, lastAction, conv)
tracker.SetConversation(id, conv) // Save conversation without adding assistant message
return c.JSON(response)
}
}
// Regular text response
conv = append(conv, openai.ChatCompletionMessage{
Role: "assistant",
Content: res.Response,
})
id := uuid.New().String()
tracker.SetConversation(id, conv)
response := types.ResponseBody{
ID: id,
Object: "response",
// "created_at": 1741476542,
ID: id,
Object: "response",
CreatedAt: time.Now().Unix(),
Status: "completed",
Output: []types.ResponseMessage{
{
Model: agentName,
Output: []interface{}{
types.ResponseMessage{
Type: "message",
ID: fmt.Sprintf("msg_%d", time.Now().UnixNano()),
Status: "completed",
Role: "assistant",
Content: []types.MessageContentItem{
types.MessageContentItem{
{
Type: "output_text",
Text: res.Response,
},

View File

@@ -3,16 +3,133 @@ package types
import (
"encoding/json"
coreTypes "github.com/mudler/LocalAGI/core/types"
"github.com/mudler/LocalAGI/pkg/xlog"
"github.com/sashabaranov/go-openai"
"github.com/sashabaranov/go-openai/jsonschema"
)
// Input represents either a string or a slice of Message
type Input struct {
Text *string `json:"-"`
Messages *[]Message `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for Input
func (i *Input) UnmarshalJSON(data []byte) error {
// Try to unmarshal as string first
var text string
if err := json.Unmarshal(data, &text); err == nil {
i.Text = &text
return nil
}
// Try to unmarshal as []Message
var messages []Message
if err := json.Unmarshal(data, &messages); err == nil {
i.Messages = &messages
return nil
}
return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
}
// MarshalJSON implements custom JSON marshaling for Input
func (i *Input) MarshalJSON() ([]byte, error) {
if i.Text != nil {
return json.Marshal(*i.Text)
}
if i.Messages != nil {
return json.Marshal(*i.Messages)
}
return json.Marshal(nil)
}
// IsText returns true if the input contains text
func (i *Input) IsText() bool {
return i.Text != nil
}
// IsMessages returns true if the input contains messages
func (i *Input) IsMessages() bool {
return i.Messages != nil
}
// GetText returns the text value or empty string
func (i *Input) GetText() string {
if i.Text != nil {
return *i.Text
}
return ""
}
// GetMessages returns the messages value or empty slice
func (i *Input) GetMessages() []Message {
if i.Messages != nil {
return *i.Messages
}
return nil
}
// Message represents different types of messages in the input
type Message struct {
// Common fields
Type string `json:"type,omitempty"`
// InputMessage fields (when this is a regular chat message)
Role *string `json:"role,omitempty"`
Content *Content `json:"content,omitempty"`
// WebSearchToolCall fields (when type == "web_search_call")
ID *string `json:"id,omitempty"`
Status *string `json:"status,omitempty"`
// Function call and function call output
Arguments *string `json:"arguments,omitempty"`
CallId *string `json:"call_id,omitempty"`
Name *string `json:"name,omitempty"`
Output *string `json:"output,omitempty"`
}
// IsInputMessage returns true if this is a regular chat message
func (m *Message) IsInputMessage() bool {
return m.Role != nil
}
// IsWebSearchCall returns true if this is a web search tool call
func (m *Message) IsWebSearchCall() bool {
return m.Type == "web_search_call"
}
func (m *Message) IsFunctionCall() bool {
return m.Type == "function_call"
}
func (m *Message) IsFunctionCallOutput() bool {
return m.Type == "function_call_output"
}
// ToInputMessage converts to InputMessage if this is a regular message
func (m *Message) ToInputMessage() *InputMessage {
if m.IsInputMessage() && m.Role != nil && m.Content != nil {
content := *m.Content
return &InputMessage{
Role: *m.Role,
Content: content,
}
}
return nil
}
type ToolChoice struct {
Name string `json:"name"`
Type string `json:"type"`
}
// RequestBody represents the request body structure for the OpenAI API
type RequestBody struct {
Model string `json:"model"`
Input json.RawMessage `json:"input"`
InputText string `json:"input_text"`
InputMessages []InputMessage `json:"input_messages"`
Input Input `json:"input"`
Include []string `json:"include,omitempty"`
Instructions *string `json:"instructions,omitempty"`
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
@@ -24,92 +141,103 @@ type RequestBody struct {
Stream *bool `json:"stream,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
Text *TextConfig `json:"text,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"`
Tools []interface{} `json:"tools,omitempty"`
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
Tools []Tool `json:"tools,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
Truncation *string `json:"truncation,omitempty"`
}
func (r *RequestBody) SetInputByType() {
xlog.Debug("[Parse Request] Set input type", "input", string(r.Input))
var inputText string
if err := json.Unmarshal(r.Input, &inputText); err == nil {
r.InputText = inputText
return
// This method is no longer needed as Input handles unmarshaling automatically
if r.Input.IsText() {
xlog.Debug("[Parse Request] Set input type as text", "input", r.Input.GetText())
} else if r.Input.IsMessages() {
xlog.Debug("[Parse Request] Input messages parsed", "messages", r.Input.GetMessages())
}
var inputMessages []InputMessage
if err := json.Unmarshal(r.Input, &inputMessages); err != nil {
xlog.Warn("[Parse Request] Input type not recognized", "input", string(r.Input))
return
}
for _, i := range inputMessages {
switch content := i.Content.(type) {
case []ContentItem:
i.ContentItems = content
case string:
i.ContentText = content
default:
xlog.Warn("[Parse Request] Input content type not recognized", "content", content)
}
r.InputMessages = append(r.InputMessages, i)
}
xlog.Debug("[Parse Request] Input messages parsed", "messages", r.InputMessages)
}
func (r *RequestBody) ToChatCompletionMessages() []openai.ChatCompletionMessage {
result := []openai.ChatCompletionMessage{}
for _, m := range r.InputMessages {
content := []openai.ChatMessagePart{}
oneImageWasFound := false
if r.Input.IsMessages() {
for _, m := range r.Input.GetMessages() {
if m.ContentText != "" {
content = append(content, openai.ChatMessagePart{
Type: "text",
Text: m.ContentText,
})
}
for _, c := range m.ContentItems {
switch c.Type {
case "text":
content = append(content, openai.ChatMessagePart{
Type: "text",
Text: c.Text,
})
case "image":
oneImageWasFound = true
content = append(content, openai.ChatMessagePart{
Type: "image",
ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
if m.IsFunctionCall() {
result = append(result, openai.ChatCompletionMessage{
Role: "assistant",
ToolCalls: []openai.ToolCall{
{
Type: "function",
ID: *m.CallId,
Function: openai.FunctionCall{
Arguments: *m.Arguments,
Name: *m.Name,
},
},
},
})
}
}
if oneImageWasFound {
result = append(result, openai.ChatCompletionMessage{
Role: m.Role,
MultiContent: content,
})
} else {
for _, c := range content {
if m.IsFunctionCallOutput() {
result = append(result, openai.ChatCompletionMessage{
Role: m.Role,
Content: c.Text,
Role: "tool",
Content: *m.Output,
ToolCallID: *m.CallId,
})
}
if !m.IsInputMessage() {
continue
}
content := []openai.ChatMessagePart{}
oneImageWasFound := false
if m.Content != nil && m.Content.IsText() && m.Content.GetText() != "" {
content = append(content, openai.ChatMessagePart{
Type: "text",
Text: m.Content.GetText(),
})
}
if m.Content != nil && m.Content.IsItems() {
for _, c := range m.Content.GetItems() {
switch c.Type {
case "text":
content = append(content, openai.ChatMessagePart{
Type: "text",
Text: c.Text,
})
case "image":
oneImageWasFound = true
content = append(content, openai.ChatMessagePart{
Type: "image",
ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
})
}
}
}
if oneImageWasFound {
result = append(result, openai.ChatCompletionMessage{
Role: *m.Role,
MultiContent: content,
})
} else {
for _, c := range content {
result = append(result, openai.ChatCompletionMessage{
Role: *m.Role,
Content: c.Text,
})
}
}
}
}
if r.InputText != "" {
if r.Input.IsText() && r.Input.GetText() != "" {
result = append(result, openai.ChatCompletionMessage{
Role: "user",
Content: r.InputText,
Content: r.Input.GetText(),
})
}
@@ -148,6 +276,16 @@ type MessageContentItem struct {
Annotations []interface{} `json:"annotations"`
}
// FunctionToolCall represents a function tool call as a top-level object in the output array
type FunctionToolCall struct {
Arguments string `json:"arguments"`
CallID string `json:"call_id"`
Name string `json:"name"`
Type string `json:"type"`
ID string `json:"id"`
Status string `json:"status"`
}
// UsageInfo represents token usage information
type UsageInfo struct {
InputTokens int `json:"input_tokens"`
@@ -174,7 +312,7 @@ type ResponseBody struct {
Instructions interface{} `json:"instructions"`
MaxOutputTokens interface{} `json:"max_output_tokens"`
Model string `json:"model"`
Output []ResponseMessage `json:"output"`
Output []interface{} `json:"output"`
ParallelToolCalls bool `json:"parallel_tool_calls"`
PreviousResponseID interface{} `json:"previous_response_id"`
Reasoning ReasoningConfig `json:"reasoning"`
@@ -182,7 +320,7 @@ type ResponseBody struct {
Temperature float64 `json:"temperature"`
Text TextConfig `json:"text"`
ToolChoice string `json:"tool_choice"`
Tools []interface{} `json:"tools"`
Tools []Tool `json:"tools"`
TopP float64 `json:"top_p"`
Truncation string `json:"truncation"`
Usage UsageInfo `json:"usage"`
@@ -190,12 +328,72 @@ type ResponseBody struct {
Metadata map[string]interface{} `json:"metadata"`
}
// Content represents either a string or a slice of ContentItem
type Content struct {
Text *string `json:"-"`
Items *[]ContentItem `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for Content
func (c *Content) UnmarshalJSON(data []byte) error {
// Try to unmarshal as string first
var text string
if err := json.Unmarshal(data, &text); err == nil {
c.Text = &text
return nil
}
// Try to unmarshal as []ContentItem
var items []ContentItem
if err := json.Unmarshal(data, &items); err == nil {
c.Items = &items
return nil
}
return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
}
// MarshalJSON implements custom JSON marshaling for Content
func (c *Content) MarshalJSON() ([]byte, error) {
if c.Text != nil {
return json.Marshal(*c.Text)
}
if c.Items != nil {
return json.Marshal(*c.Items)
}
return json.Marshal(nil)
}
// IsText returns true if the content contains text
func (c *Content) IsText() bool {
return c.Text != nil
}
// IsItems returns true if the content contains items
func (c *Content) IsItems() bool {
return c.Items != nil
}
// GetText returns the text value or empty string
func (c *Content) GetText() string {
if c.Text != nil {
return *c.Text
}
return ""
}
// GetItems returns the items value or empty slice
func (c *Content) GetItems() []ContentItem {
if c.Items != nil {
return *c.Items
}
return nil
}
// InputMessage represents a user input message
type InputMessage struct {
Role string `json:"role"`
Content any `json:"content"`
ContentText string `json:"content_text"`
ContentItems []ContentItem `json:"content_items"`
Role string `json:"role"`
Content Content `json:"content"`
}
// ContentItem represents an item in a content array
@@ -204,3 +402,129 @@ type ContentItem struct {
Text string `json:"text,omitempty"`
ImageURL string `json:"image_url,omitempty"`
}
// Tool represents a tool that can be called by the assistant
type Tool struct {
Type string `json:"type"`
// Function tool fields (used when type == "function")
Name *string `json:"name,omitempty"`
Description *string `json:"description,omitempty"`
Parameters *jsonschema.Definition `json:"parameters,omitempty"`
Strict *bool `json:"strict,omitempty"`
// Web search tool fields (used when type == "web_search_preview" etc.)
SearchContextSize *string `json:"search_context_size,omitempty"`
UserLocation *UserLocation `json:"user_location,omitempty"`
}
// IsFunction returns true if this is a function tool
func (t *Tool) IsFunction() bool {
return t.Type == "function"
}
// IsWebSearch returns true if this is a web search tool
func (t *Tool) IsWebSearch() bool {
return t.Type == "web_search_preview" || t.Type == "web_search_preview_2025_03_11"
}
// ToActionDefinition converts this tool to an ActionDefinition
func (t *Tool) ToActionDefinition() *coreTypes.ActionDefinition {
if t.IsFunction() && t.Name != nil {
// Regular function tool
properties := make(map[string]jsonschema.Definition)
required := []string{}
if t.Parameters != nil {
properties = t.Parameters.Properties
required = t.Parameters.Required
}
desc := ""
if t.Description != nil {
desc = *t.Description
}
return &coreTypes.ActionDefinition{
Name: coreTypes.ActionDefinitionName(*t.Name),
Description: desc,
Properties: properties,
Required: required,
}
}
if t.IsWebSearch() {
// Convert web search builtin to ActionDefinition
name := "web_search_" + t.Type
desc := "Web search tool for finding relevant information online"
// Create parameters schema for web search options
properties := map[string]jsonschema.Definition{
"search_context_size": {
Type: jsonschema.String,
Enum: []string{"low", "medium", "high"},
Description: "Amount of context window space to use for search",
},
"user_location": {
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"type": {
Type: jsonschema.String,
Enum: []string{"approximate"},
Description: "Type of location approximation",
},
"city": {
Type: jsonschema.String,
Description: "City of the user",
},
"country": {
Type: jsonschema.String,
Description: "Two-letter ISO country code",
},
"region": {
Type: jsonschema.String,
Description: "Region of the user",
},
"timezone": {
Type: jsonschema.String,
Description: "IANA timezone of the user",
},
},
},
}
return &coreTypes.ActionDefinition{
Name: coreTypes.ActionDefinitionName(name),
Description: desc,
Properties: properties,
Required: []string{},
}
}
return nil
}
// SeparateTools separates a slice of Tools into builtin tools and user tools as ActionDefinitions
func SeparateTools(tools []Tool) (builtinTools []coreTypes.ActionDefinition, userTools []coreTypes.ActionDefinition) {
for _, tool := range tools {
if actionDef := tool.ToActionDefinition(); actionDef != nil {
if tool.IsFunction() {
// User-defined function tool
userTools = append(userTools, *actionDef)
} else if tool.IsWebSearch() {
// Builtin tool (web search)
builtinTools = append(builtinTools, *actionDef)
}
}
}
return builtinTools, userTools
}
// UserLocation represents the user's location for web search
type UserLocation struct {
Type string `json:"type"`
City *string `json:"city,omitempty"`
Country *string `json:"country,omitempty"`
Region *string `json:"region,omitempty"`
Timezone *string `json:"timezone,omitempty"`
}