feat(api): Handle tool calls in responses API

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-06-12 05:11:43 +01:00
parent 9160ca598e
commit ff6890c9c1
9 changed files with 985 additions and 96 deletions
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -427,6 +427,22 @@ func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction
 	return conv, nil
 }

+// getAvailableActionsForJob returns available actions including user-defined ones for a specific job
+func (a *Agent) getAvailableActionsForJob(job *types.Job) types.Actions {
+	// Start with regular available actions
+	baseActions := a.availableActions()
+	
+	// Add user-defined actions from the job
+	userTools := job.GetUserTools()
+	if len(userTools) > 0 {
+		userDefinedActions := types.CreateUserDefinedActions(userTools)
+		baseActions = append(baseActions, userDefinedActions...)
+		xlog.Debug("Added user-defined actions", "definitions", userTools)
+	}
+	
+	return baseActions
+}
+
 func (a *Agent) availableActions() types.Actions {
 	//	defaultActions := append(a.options.userActions, action.NewReply())

@@ -493,16 +509,19 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC

 	xlog.Debug("[pickAction] picking action starts", "messages", messages)

+	// Get available actions including user-defined ones
+	availableActions := a.getAvailableActionsForJob(job)
+
 	// Identify the goal of this conversation

-	if !a.options.forceReasoning {
-		xlog.Debug("not forcing reasoning")
+	if !a.options.forceReasoning || job.ToolChoice != "" {
+		xlog.Debug("not forcing reasoning", "forceReasoning", a.options.forceReasoning, "ToolChoice", job.ToolChoice)
 		// We also could avoid to use functions here and get just a reply from the LLM
 		// and then use the reply to get the action
 		thought, err := a.decision(job,
 			messages,
-			a.availableActions().ToTools(),
-			"",
+			availableActions.ToTools(),
+			job.ToolChoice,
 			maxRetries)
 		if err != nil {
 			return nil, nil, "", err
@@ -512,7 +531,7 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC
 		xlog.Debug("thought message", "message", thought.message)

 		// Find the action
-		chosenAction := a.availableActions().Find(thought.actionName)
+		chosenAction := availableActions.Find(thought.actionName)
 		if chosenAction == nil || thought.actionName == "" {
 			xlog.Debug("no answer")

--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -572,6 +572,66 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
 	return failedBy == "" && (!hasTriggers || triggeredBy != ""), nil
 }

+// validateBuiltinTools checks that builtin tools specified by the user can be matched to available actions
+func (a *Agent) validateBuiltinTools(job *types.Job) {
+	builtinTools := job.GetBuiltinTools()
+	if len(builtinTools) == 0 {
+		return
+	}
+	
+	// Get available actions
+	availableActions := a.mcpActions
+	
+	for _, tool := range builtinTools {
+		functionName := tool.Name
+		
+		// Check if this is a web search builtin tool
+		if strings.HasPrefix(string(functionName), "web_search_") {
+			// Look for a search action
+			searchAction := availableActions.Find("search")
+			if searchAction == nil {
+				xlog.Warn("Web search builtin tool specified but no 'search' action available", 
+					"function_name", functionName, 
+					"agent", a.Character.Name)
+			} else {
+				xlog.Debug("Web search builtin tool matched to search action", 
+					"function_name", functionName, 
+					"agent", a.Character.Name)
+			}
+		} else {
+			// For future builtin tools, add more matching logic here
+			xlog.Warn("Unknown builtin tool specified", 
+				"function_name", functionName, 
+				"agent", a.Character.Name)
+		}
+	}
+}
+
+// replyWithToolCall handles user-defined actions by recording the action state without setting Response
+func (a *Agent) replyWithToolCall(job *types.Job, conv []openai.ChatCompletionMessage, params types.ActionParams, chosenAction types.Action, reasoning string) {
+	// Record the action state so the webui can detect this is a user-defined action
+	stateResult := types.ActionState{
+		ActionCurrentState: types.ActionCurrentState{
+			Job:       job,
+			Action:    chosenAction,
+			Params:    params,
+			Reasoning: reasoning,
+		},
+		ActionResult: types.ActionResult{
+			Result: reasoning, // The reasoning/message to show to user
+		},
+	}
+	
+	// Add the action state to the job result
+	job.Result.SetResult(stateResult)
+	
+	// Set conversation but leave Response empty
+	// The webui will detect the user-defined action and generate the proper tool call response
+	job.Result.Conversation = conv
+	// job.Result.Response remains empty - this signals to webui that it should check State
+	job.Result.Finish(nil)
+}
+
 func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 	if err := job.GetContext().Err(); err != nil {
 		job.Result.Finish(fmt.Errorf("expired"))
@@ -625,6 +685,9 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 	// RAG
 	conv = a.knowledgeBaseLookup(job, conv)

+	// Validate builtin tools against available actions
+	a.validateBuiltinTools(job)
+
 	var pickTemplate string
 	var reEvaluationTemplate string

@@ -843,6 +906,13 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 	}

 	if !chosenAction.Definition().Name.Is(action.PlanActionName) {
+		// Check if this is a user-defined action
+		if types.IsActionUserDefined(chosenAction) {
+			xlog.Debug("User-defined action chosen, returning tool call", "action", chosenAction.Definition().Name)
+			a.replyWithToolCall(job, conv, actionParams, chosenAction, reasoning)
+			return
+		}
+		
 		result, err := a.runAction(job, chosenAction, actionParams)
 		if err != nil {
 			result.Result = fmt.Sprintf("Error running tool: %v", err)
--- a/core/types/actions.go
+++ b/core/types/actions.go
@@ -3,6 +3,7 @@ package types
 import (
 	"context"
 	"encoding/json"
+	"fmt"

 	"github.com/sashabaranov/go-openai"
 	"github.com/sashabaranov/go-openai/jsonschema"
@@ -93,6 +94,60 @@ type Action interface {
 	Plannable() bool
 }

+// UserDefinedChecker interface to identify user-defined actions
+type UserDefinedChecker interface {
+	IsUserDefined() bool
+}
+
+// BaseAction provides default implementation for Action interface
+// Embed this in action implementations to get the default IsUserDefined behavior
+type BaseAction struct{}
+
+func (b *BaseAction) IsUserDefined() bool {
+	return false // Regular actions are not user-defined
+}
+
+// IsActionUserDefined checks if an action is user-defined
+func IsActionUserDefined(action Action) bool {
+	if checker, ok := action.(UserDefinedChecker); ok {
+		return checker.IsUserDefined()
+	}
+	return false // Actions without UserDefinedChecker are not user-defined
+}
+
+// UserDefinedAction represents a user-defined function tool
+type UserDefinedAction struct {
+	ActionDef *ActionDefinition
+}
+
+func (u *UserDefinedAction) Run(ctx context.Context, sharedState *AgentSharedState, action ActionParams) (ActionResult, error) {
+	// User-defined actions should not be executed directly
+	return ActionResult{}, fmt.Errorf("user-defined action '%s' cannot be executed by agent", u.ActionDef.Name)
+}
+
+func (u *UserDefinedAction) Definition() ActionDefinition {
+	return *u.ActionDef
+}
+
+func (u *UserDefinedAction) Plannable() bool {
+	return true // User-defined actions are plannable
+}
+
+func (u *UserDefinedAction) IsUserDefined() bool {
+	return true
+}
+
+// CreateUserDefinedActions converts user tools to UserDefinedAction instances
+func CreateUserDefinedActions(userTools []ActionDefinition) []Action {
+	var actions []Action
+	for _, tool := range userTools {
+			actions = append(actions, &UserDefinedAction{
+				ActionDef: &tool,
+			})
+	}
+	return actions
+}
+
 type Actions []Action

 func (a Actions) ToTools() []openai.Tool {
--- a/core/types/job.go
+++ b/core/types/job.go
@@ -20,6 +20,11 @@ type Job struct {
 	UUID                string
 	Metadata            map[string]interface{}
 	DoneFilter          bool
+	
+	// Tools available for this job
+	BuiltinTools []ActionDefinition // Built-in tools like web search
+	UserTools    []ActionDefinition // User-defined function tools
+	ToolChoice   string

 	pastActions         []*ActionRequest
 	nextAction          *Action
@@ -45,6 +50,24 @@ func WithConversationHistory(history []openai.ChatCompletionMessage) JobOption {
 	}
 }

+func WithBuiltinTools(tools []ActionDefinition) JobOption {
+	return func(j *Job) {
+		j.BuiltinTools = tools
+	}
+}
+
+func WithUserTools(tools []ActionDefinition) JobOption {
+	return func(j *Job) {
+		j.UserTools = tools
+	}
+}
+
+func WithToolChoice(choice string) JobOption {
+	return func(j *Job) {
+		j.ToolChoice = choice
+	}
+}
+
 func WithReasoningCallback(f func(ActionCurrentState) bool) JobOption {
 	return func(r *Job) {
 		r.ReasoningCallback = f
@@ -227,3 +250,21 @@ func (j *Job) IncrementEvaluationLoop() {
 	currentLoop := j.GetEvaluationLoop()
 	j.Metadata["evaluation_loop"] = currentLoop + 1
 }
+
+// GetBuiltinTools returns the builtin tools for this job
+func (j *Job) GetBuiltinTools() []ActionDefinition {
+	return j.BuiltinTools
+}
+
+// GetUserTools returns the user tools for this job
+func (j *Job) GetUserTools() []ActionDefinition {
+	return j.UserTools
+}
+
+// GetAllTools returns all tools (builtin + user) for this job
+func (j *Job) GetAllTools() []ActionDefinition {
+	allTools := make([]ActionDefinition, 0, len(j.BuiltinTools)+len(j.UserTools))
+	allTools = append(allTools, j.BuiltinTools...)
+	allTools = append(allTools, j.UserTools...)
+	return allTools
+}
--- a/pkg/client/responses.go
+++ b/pkg/client/responses.go
@@ -4,18 +4,58 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
+
+	"github.com/sashabaranov/go-openai/jsonschema"
 )

+// UserLocation represents the user's location for web search
+type UserLocation struct {
+	Type     string  `json:"type"`
+	City     *string `json:"city,omitempty"`
+	Country  *string `json:"country,omitempty"`
+	Region   *string `json:"region,omitempty"`
+	Timezone *string `json:"timezone,omitempty"`
+}
+
+type Tool struct {
+	Type string `json:"type"`
+
+	// Function tool fields (used when type == "function")
+	Name        *string                `json:"name,omitempty"`
+	Description *string                `json:"description,omitempty"`
+	Parameters  *jsonschema.Definition `json:"parameters,omitempty"`
+
+	// Web search tool fields (used when type == "web_search_preview" etc.)
+	SearchContextSize *string       `json:"search_context_size,omitempty"`
+	UserLocation      *UserLocation `json:"user_location,omitempty"`
+}
+
+type ToolChoice struct {
+	Name string `json:"name"`
+	Type string `json:"type"`
+}
+
 // RequestBody represents the message request to the AI model
 type RequestBody struct {
 	Model       string   `json:"model"`
 	Input       any      `json:"input"`
 	Temperature *float64 `json:"temperature,omitempty"`
+	Tools       []Tool   `json:"tools,omitempty"`
+	ToolChoice *ToolChoice `json:"tool_choice"`  
 	MaxTokens   *int     `json:"max_output_tokens,omitempty"`
 }

+type InputFunctionToolCallOutput struct {
+	CallID string `json:"call_id"`
+	Output string `json:"output"`
+	Type   string `json:"type"`
+	ID     string `json:"id"`
+	Status string `json:"status"`
+}
+
 // InputMessage represents a user input message
 type InputMessage struct {
+	Type    string `json:"type"`
 	Role    string `json:"role"`
 	Content any    `json:"content"`
 }
@@ -29,10 +69,49 @@ type ContentItem struct {

 // ResponseBody represents the response from the AI model
 type ResponseBody struct {
-	CreatedAt int64             `json:"created_at"`
-	Status    string            `json:"status"`
-	Error     any               `json:"error,omitempty"`
-	Output    []ResponseMessage `json:"output"`
+	CreatedAt int64          `json:"created_at"`
+	Status    string         `json:"status"`
+	Error     any            `json:"error,omitempty"`
+	Output    []ResponseBase `json:"output"`
+	Tools     []Tool         `json:"tools"`
+}
+
+type ResponseType string
+
+const (
+	ResponseTypeFunctionToolCall ResponseType = "function_call"
+	ResponseTypeMessage          ResponseType = "message"
+)
+
+type ResponseBase json.RawMessage
+
+func (r *ResponseBase) UnmarshalJSON(data []byte) error {
+	return (*json.RawMessage)(r).UnmarshalJSON(data)
+}
+
+func (r *ResponseBase) ToMessage() (msg ResponseMessage, err error) {
+	err = json.Unmarshal(*r, &msg)
+	if msg.Type != string(ResponseTypeMessage) {
+		return ResponseMessage{}, fmt.Errorf("Expected %s, not %s", ResponseTypeMessage, msg.Type)
+	}
+	return
+}
+
+func (r *ResponseBase) ToFunctionToolCall() (msg ResponseFunctionToolCall, err error) {
+	err = json.Unmarshal(*r, &msg)
+	if msg.Type != string(ResponseTypeFunctionToolCall) {
+		return ResponseFunctionToolCall{}, fmt.Errorf("Expected %s, not %s", ResponseTypeFunctionToolCall, msg.Type)
+	}
+	return
+}
+
+type ResponseFunctionToolCall struct {
+	Arguments string `json:"arguments"`
+	CallID    string `json:"call_id"`
+	Name      string `json:"name"`
+	Type      string `json:"type"`
+	ID        string `json:"id"`
+	Status    string `json:"status"`
 }

 // ResponseMessage represents a message in the response
@@ -85,7 +164,11 @@ func (c *Client) SimpleAIResponse(agentName, input string) (string, error) {
 	}

 	// Extract the text response from the output
-	for _, msg := range response.Output {
+	for _, out := range response.Output {
+		msg, err := out.ToMessage()
+		if err != nil {
+			return "", fmt.Errorf("out.ToMessage: %w", err)
+		}
 		if msg.Role == "assistant" {
 			for _, content := range msg.Content {
 				if content.Type == "output_text" {
@@ -113,7 +196,12 @@ func (c *Client) ChatAIResponse(agentName string, messages []InputMessage) (stri
 	}

 	// Extract the text response from the output
-	for _, msg := range response.Output {
+	for _, out := range response.Output {
+		msg, err := out.ToMessage()
+		if err != nil {
+			return "", fmt.Errorf("out.ToMessage: %w", err)
+		}
+
 		if msg.Role == "assistant" {
 			for _, content := range msg.Content {
 				if content.Type == "output_text" {
--- a/pkg/utils/ptr/ptr.go
+++ b/pkg/utils/ptr/ptr.go
@@ -0,0 +1,5 @@
+package ptr
+
+func To[T any](v T) *T {
+	return &v
+}
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@@ -1,10 +1,15 @@
 package e2e_test

 import (
+	"encoding/json"
+	"fmt"
 	"net/http"
 	"time"

 	localagi "github.com/mudler/LocalAGI/pkg/client"
+	"github.com/mudler/LocalAGI/pkg/utils/ptr"
+	"github.com/mudler/LocalAGI/pkg/xlog"
+	"github.com/sashabaranov/go-openai/jsonschema"

 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -15,9 +20,13 @@ var _ = Describe("Agent test", func() {
 		BeforeEach(func() {
 			Eventually(func() error {
 				// test apiURL is working and available
-				_, err := http.Get(apiURL + "/readyz")
+				_, err := http.Get(localagiURL + "/readyz")
 				return err
 			}, "10m", "10s").ShouldNot(HaveOccurred())
+
+			client := localagi.NewClient(localagiURL, "", time.Minute)
+			err := client.DeleteAgent("testagent")
+			Expect(err).ToNot(HaveOccurred())
 		})

 		It("create agent", func() {
@@ -33,5 +42,187 @@ var _ = Describe("Agent test", func() {

 			Expect(result).ToNot(BeEmpty())
 		})
+
+		It("tool call", func() {
+			client := localagi.NewClient(localagiURL, "", 5*time.Minute)
+
+			err := client.CreateAgent(&localagi.AgentConfig{
+				Name: "testagent",
+			})
+			Expect(err).ToNot(HaveOccurred())
+
+			req := localagi.RequestBody{
+				Model: "testagent",
+				Input: "Create an appointment next week on wednesday at 10:00 am for the whole day. The topic is about AI and you include ABC and DEF to the appointment.",
+				Tools: []localagi.Tool{
+					{
+						Type:        "function",
+						Name:        ptr.To("CreateTask"),
+						Description: ptr.To("Write the needed details whenever you're asked to create something like an info, appointment, e-mail or when you're asked to remind of anything or create a remainder. Also use this if you're supposed to answer an e-mail."),
+						Parameters: ptr.To(jsonschema.Definition{
+							Type: "object",
+							Properties: map[string]jsonschema.Definition{
+								"task": {
+									Type:        "string",
+									Description: "Look for the name of the task you're supposed to do or create ",
+									Enum: []string{
+										"appointment",
+										"E-mail",
+									},
+								},
+								"subject": {
+									Type:        "string",
+									Description: "A subject the task is about. Infer this from the given context data and user prompt.",
+								},
+								"reply": {
+									Type:        "string",
+									Description: "A sharp and short reply to the contextual data given. Use a friendly and neutral general greeting.",
+								},
+								"recipient": {
+									Type:        "array",
+									Description: "A list of names and abbreviations to send our task to. Abbreviations always have to match exactly. If the user gives you first names you can deduce the last name.",
+									Items: &jsonschema.Definition{
+										Type: "string",
+										Enum: []string{
+											"ABC",
+											"DEF",
+										},
+									},
+								},
+								"datestart": {
+									Type:        "string",
+									Description: "The date and time when the task should start. Discard any older dates than today. Use tomorrow as default. Use the format DD/MM/YYYY HH:MM",
+								},
+								"dateend": {
+									Type:        "string",
+									Description: "The date and time when a meeting should end. Default to start date. If the duration of an appointment is given, calculate the end with the start date. Use the format DD/MM/YYYY HH:MM",
+								},
+								"datedone": {
+									Type:        "string",
+									Description: "The date and time when the task should be done. Use the format DD/MM/YYYY HH:MM",
+								},
+								"private": {
+									Type:        "boolean",
+									Description: "Whether the task should be private or not. Default to false.",
+								},
+								"includeall": {
+									Type:        "boolean",
+									Description: "Whether the task should include every mentioned person or not. Default to true. If you find explicitly mentioned people in the prompt whilst ignoring the contextual xml schema you choose false unless it is mentioned that you should include everyone.",
+								},
+								"wholedayappointment": {
+									Type:        "boolean",
+									Description: "Whether the appointment should be done for the whole days. Default to false unless mentioned by the user prompt. Ignore the xml schema for this.",
+								},
+								"remainder": {
+									Type:        "boolean",
+									Description: "Whether you are explicitly supposed to remind of something or not. Default to false. Ignore the xml schema for this.",
+								},
+							},
+							Required: []string{
+								"task",
+								"recipient",
+								"datestart",
+								"dateend",
+								"datedone",
+								"private",
+								"wholedayappointment",
+								"remainder",
+								"subject",
+								"reply",
+								"includeall",
+							},
+						}),
+					},
+				}}
+			result, err := client.GetAIResponse(&req)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(result).ToNot(BeNil())
+
+			var call localagi.ResponseFunctionToolCall
+			var args struct {
+				Task                string   `json:"task"`
+				Subject             string   `json:"subject"`
+				Reply               string   `json:"reply"`
+				Recipient           []string `json:"recipient"`
+				DateStart           string   `json:"datestart"`
+				DateEnd             string   `json:"dateend"`
+				DateDone            string   `json:"datedone"`
+				Private             bool     `json:"private"`
+				IncludeAll          bool     `json:"includeall"`
+				WholeDayAppointment bool     `json:"wholedayappointment"`
+				Remainder           bool     `json:"remainder"`
+			}
+
+			for _, out := range result.Output {
+				msg, err := out.ToMessage()
+				if err == nil && msg.Role == "assistant" {
+					xlog.Info("Agent returned message", "message", msg)
+					continue
+				}
+				fnc, err := out.ToFunctionToolCall()
+				call = fnc
+				Expect(err).ToNot(HaveOccurred())
+				Expect(string(fnc.Type)).To(Equal("function_call"))
+				Expect(fnc.Name).To(Equal("CreateTask"))
+
+				err = json.Unmarshal([]byte(fnc.Arguments), &args)
+				Expect(err).ToNot(HaveOccurred())
+
+				Expect(args.Task).To(Equal("appointment"))
+				Expect(args.Subject).ToNot(BeEmpty())
+				Expect(args.Reply).ToNot(BeEmpty())
+			}
+
+			req = localagi.RequestBody{
+				Model: "testagent",
+				Input: []any{
+					localagi.InputMessage{
+						Type:    "message",
+						Role:    "user",
+						Content: "Create an appointment next week on wednesday at 10:00 am for the whole day. The topic is about AI and you include ABC and DEF to the appointment.",
+					},
+					call,
+					localagi.InputFunctionToolCallOutput{
+						Type:   "function_call_output",
+						CallID: call.CallID,
+						Output: fmt.Sprintf("Successfully created %s: %s", args.Task, args.Subject),
+					},
+					localagi.InputMessage{
+						Type:    "message",
+						Role:    "user",
+						Content: "Was the appointment created?",
+					},
+				},
+				Tools: []localagi.Tool{
+					{
+						Type:        "function",
+						Name:        ptr.To("ChooseAnswer"),
+						Description: ptr.To("Select Yes or No"),
+						Parameters: ptr.To(jsonschema.Definition{
+							Type: "object",
+							Properties: map[string]jsonschema.Definition{
+								"answer": {
+									Type:        "boolean",
+									Description: "Set true for Yes and false for no",
+								},
+							},
+							Required: []string{
+								"answer",
+							},
+						}),
+					},
+				},
+				ToolChoice: &localagi.ToolChoice{
+					Type: "function",
+					Name: "ChooseAnswer",
+				},
+			}
+			result, err = client.GetAIResponse(&req)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(result.Output)).To(BeNumerically(">", 0))
+			fnc, err := result.Output[len(result.Output)-1].ToFunctionToolCall()
+			Expect(err).ToNot(HaveOccurred())
+			Expect(fnc.Arguments).To(ContainSubstring("true"))
+		})
 	})
 })
--- a/webui/app.go
+++ b/webui/app.go
@@ -488,6 +488,63 @@ func (a *App) ListActions() func(c *fiber.Ctx) error {
 	}
 }

+// createToolCallResponse generates a proper tool call response for user-defined actions
+func (a *App) createToolCallResponse(id, agentName string, actionState coreTypes.ActionState, conv []openai.ChatCompletionMessage) types.ResponseBody {
+	// Create tool call ID
+	toolCallID := fmt.Sprintf("call_%d", time.Now().UnixNano())
+	
+	// Get function name and arguments
+	functionName := actionState.Action.Definition().Name.String()
+	argumentsJSON, err := json.Marshal(actionState.Params)
+	if err != nil {
+		xlog.Error("Error marshaling action params for tool call", "error", err)
+		// Fallback to empty arguments
+		argumentsJSON = []byte("{}")
+	}
+	
+	// Create message object with reasoning
+	messageObj := types.ResponseMessage{
+		Type:   "message",
+		ID:     fmt.Sprintf("msg_%d", time.Now().UnixNano()),
+		Status: "completed",
+		Role:   "assistant",
+		Content: []types.MessageContentItem{
+			{
+				Type: "output_text",
+				Text: actionState.Reasoning,
+			},
+		},
+	}
+	
+	// Create function tool call object
+	functionToolCall := types.FunctionToolCall{
+		Arguments: string(argumentsJSON),
+		CallID:    toolCallID,
+		Name:      functionName,
+		Type:      "function_call",
+		ID:        fmt.Sprintf("tool_%d", time.Now().UnixNano()),
+		Status:    "completed",
+	}
+	
+	// Create response with both message and tool call in output array
+	return types.ResponseBody{
+		ID:        id,
+		Object:    "response",
+		CreatedAt: time.Now().Unix(),
+		Status:    "completed",
+		Model:     agentName,
+		Output: []interface{}{
+			messageObj,
+			functionToolCall,
+		},
+		Usage: types.UsageInfo{
+			InputTokens:  0, // TODO: calculate actual usage
+			OutputTokens: 0,
+			TotalTokens:  0,
+		},
+	}
+}
+
 func (a *App) Responses(pool *state.AgentPool, tracker *conversations.ConversationTracker[string]) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		var request types.RequestBody
@@ -507,15 +564,38 @@ func (a *App) Responses(pool *state.AgentPool, tracker *conversations.Conversati
 		agentName := request.Model
 		messages := append(conv, request.ToChatCompletionMessages()...)

-		a := pool.GetAgent(agentName)
-		if a == nil {
+		agent := pool.GetAgent(agentName)
+		if agent == nil {
 			xlog.Info("Agent not found in pool", c.Params("name"))
 			return c.Status(http.StatusInternalServerError).JSON(types.ResponseBody{Error: "Agent not found"})
 		}

-		res := a.Ask(
+		// Prepare job options
+		jobOptions := []coreTypes.JobOption{
 			coreTypes.WithConversationHistory(messages),
-		)
+		}
+
+		// Add tools if present in the request
+		if len(request.Tools) > 0 {
+			builtinTools, userTools := types.SeparateTools(request.Tools)
+			if len(builtinTools) > 0 {
+				jobOptions = append(jobOptions, coreTypes.WithBuiltinTools(builtinTools))
+				xlog.Debug("Adding builtin tools to job", "count", len(builtinTools), "agent", agentName)
+			}
+			if len(userTools) > 0 {
+				jobOptions = append(jobOptions, coreTypes.WithUserTools(userTools))
+				xlog.Debug("Adding user tools to job", "count", len(userTools), "agent", agentName)
+			}
+		}
+
+		var choice types.ToolChoice
+		if err := json.Unmarshal(request.ToolChoice, &choice); err == nil {
+			if choice.Type == "function" {
+				jobOptions = append(jobOptions, coreTypes.WithToolChoice(choice.Name))
+			}
+		}
+
+		res := agent.Ask(jobOptions...)
 		if res.Error != nil {
 			xlog.Error("Error asking agent", "agent", agentName, "error", res.Error)

@@ -524,28 +604,44 @@ func (a *App) Responses(pool *state.AgentPool, tracker *conversations.Conversati
 			xlog.Info("we got a response from the agent", "agent", agentName, "response", res.Response)
 		}

+		id := uuid.New().String()
+
+		// Check if this is a user-defined tool call
+		if res.Response == "" && len(res.State) > 0 {
+			// Get the last action from state
+			lastAction := res.State[len(res.State)-1]
+			if coreTypes.IsActionUserDefined(lastAction.Action) {
+				xlog.Debug("Detected user-defined action, creating tool call response", "action", lastAction.Action.Definition().Name)
+				
+				// Generate tool call response
+				response := a.createToolCallResponse(id, agentName, lastAction, conv)
+				tracker.SetConversation(id, conv) // Save conversation without adding assistant message
+				return c.JSON(response)
+			}
+		}
+
+		// Regular text response
 		conv = append(conv, openai.ChatCompletionMessage{
 			Role:    "assistant",
 			Content: res.Response,
 		})

-		id := uuid.New().String()
-
 		tracker.SetConversation(id, conv)

 		response := types.ResponseBody{
-			ID:     id,
-			Object: "response",
-			//   "created_at": 1741476542,
+			ID:        id,
+			Object:    "response",
 			CreatedAt: time.Now().Unix(),
 			Status:    "completed",
-			Output: []types.ResponseMessage{
-				{
+			Model:     agentName,
+			Output: []interface{}{
+				types.ResponseMessage{
 					Type:   "message",
+					ID:     fmt.Sprintf("msg_%d", time.Now().UnixNano()),
 					Status: "completed",
 					Role:   "assistant",
 					Content: []types.MessageContentItem{
-						types.MessageContentItem{
+						{
 							Type: "output_text",
 							Text: res.Response,
 						},
--- a/webui/types/openai.go
+++ b/webui/types/openai.go
@@ -3,16 +3,133 @@ package types
 import (
 	"encoding/json"

+	coreTypes "github.com/mudler/LocalAGI/core/types"
 	"github.com/mudler/LocalAGI/pkg/xlog"
 	"github.com/sashabaranov/go-openai"
+	"github.com/sashabaranov/go-openai/jsonschema"
 )

+// Input represents either a string or a slice of Message
+type Input struct {
+	Text     *string    `json:"-"`
+	Messages *[]Message `json:"-"`
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling for Input
+func (i *Input) UnmarshalJSON(data []byte) error {
+	// Try to unmarshal as string first
+	var text string
+	if err := json.Unmarshal(data, &text); err == nil {
+		i.Text = &text
+		return nil
+	}
+
+	// Try to unmarshal as []Message
+	var messages []Message
+	if err := json.Unmarshal(data, &messages); err == nil {
+		i.Messages = &messages
+		return nil
+	}
+
+	return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
+}
+
+// MarshalJSON implements custom JSON marshaling for Input
+func (i *Input) MarshalJSON() ([]byte, error) {
+	if i.Text != nil {
+		return json.Marshal(*i.Text)
+	}
+	if i.Messages != nil {
+		return json.Marshal(*i.Messages)
+	}
+	return json.Marshal(nil)
+}
+
+// IsText returns true if the input contains text
+func (i *Input) IsText() bool {
+	return i.Text != nil
+}
+
+// IsMessages returns true if the input contains messages
+func (i *Input) IsMessages() bool {
+	return i.Messages != nil
+}
+
+// GetText returns the text value or empty string
+func (i *Input) GetText() string {
+	if i.Text != nil {
+		return *i.Text
+	}
+	return ""
+}
+
+// GetMessages returns the messages value or empty slice
+func (i *Input) GetMessages() []Message {
+	if i.Messages != nil {
+		return *i.Messages
+	}
+	return nil
+}
+
+// Message represents different types of messages in the input
+type Message struct {
+	// Common fields
+	Type string `json:"type,omitempty"`
+
+	// InputMessage fields (when this is a regular chat message)
+	Role    *string  `json:"role,omitempty"`
+	Content *Content `json:"content,omitempty"`
+
+	// WebSearchToolCall fields (when type == "web_search_call")
+	ID     *string `json:"id,omitempty"`
+	Status *string `json:"status,omitempty"`
+
+	// Function call and function call output
+	Arguments *string `json:"arguments,omitempty"`
+	CallId    *string `json:"call_id,omitempty"`
+	Name      *string `json:"name,omitempty"`
+	Output    *string `json:"output,omitempty"`
+}
+
+// IsInputMessage returns true if this is a regular chat message
+func (m *Message) IsInputMessage() bool {
+	return m.Role != nil
+}
+
+// IsWebSearchCall returns true if this is a web search tool call
+func (m *Message) IsWebSearchCall() bool {
+	return m.Type == "web_search_call"
+}
+
+func (m *Message) IsFunctionCall() bool {
+	return m.Type == "function_call"
+}
+
+func (m *Message) IsFunctionCallOutput() bool {
+	return m.Type == "function_call_output"
+}
+
+// ToInputMessage converts to InputMessage if this is a regular message
+func (m *Message) ToInputMessage() *InputMessage {
+	if m.IsInputMessage() && m.Role != nil && m.Content != nil {
+		content := *m.Content
+		return &InputMessage{
+			Role:    *m.Role,
+			Content: content,
+		}
+	}
+	return nil
+}
+
+type ToolChoice struct {
+	Name string `json:"name"`
+	Type string `json:"type"`
+}
+
 // RequestBody represents the request body structure for the OpenAI API
 type RequestBody struct {
 	Model              string            `json:"model"`
-	Input              json.RawMessage   `json:"input"`
-	InputText          string            `json:"input_text"`
-	InputMessages      []InputMessage    `json:"input_messages"`
+	Input              Input             `json:"input"`
 	Include            []string          `json:"include,omitempty"`
 	Instructions       *string           `json:"instructions,omitempty"`
 	MaxOutputTokens    *int              `json:"max_output_tokens,omitempty"`
@@ -24,92 +141,103 @@ type RequestBody struct {
 	Stream             *bool             `json:"stream,omitempty"`
 	Temperature        *float64          `json:"temperature,omitempty"`
 	Text               *TextConfig       `json:"text,omitempty"`
-	ToolChoice         interface{}       `json:"tool_choice,omitempty"`
-	Tools              []interface{}     `json:"tools,omitempty"`
+	ToolChoice         json.RawMessage   `json:"tool_choice,omitempty"`
+	Tools              []Tool            `json:"tools,omitempty"`
 	TopP               *float64          `json:"top_p,omitempty"`
 	Truncation         *string           `json:"truncation,omitempty"`
 }

 func (r *RequestBody) SetInputByType() {
-	xlog.Debug("[Parse Request] Set input type", "input", string(r.Input))
-
-	var inputText string
-	if err := json.Unmarshal(r.Input, &inputText); err == nil {
-		r.InputText = inputText
-		return
+	// This method is no longer needed as Input handles unmarshaling automatically
+	if r.Input.IsText() {
+		xlog.Debug("[Parse Request] Set input type as text", "input", r.Input.GetText())
+	} else if r.Input.IsMessages() {
+		xlog.Debug("[Parse Request] Input messages parsed", "messages", r.Input.GetMessages())
 	}
-
-	var inputMessages []InputMessage
-	if err := json.Unmarshal(r.Input, &inputMessages); err != nil {
-		xlog.Warn("[Parse Request] Input type not recognized", "input", string(r.Input))
-		return
-	}
-
-	for _, i := range inputMessages {
-		switch content := i.Content.(type) {
-		case []ContentItem:
-			i.ContentItems = content
-		case string:
-			i.ContentText = content
-		default:
-			xlog.Warn("[Parse Request] Input content type not recognized", "content", content)
-		}
-
-		r.InputMessages = append(r.InputMessages, i)
-	}
-
-	xlog.Debug("[Parse Request] Input messages parsed", "messages", r.InputMessages)
 }

 func (r *RequestBody) ToChatCompletionMessages() []openai.ChatCompletionMessage {
 	result := []openai.ChatCompletionMessage{}

-	for _, m := range r.InputMessages {
-		content := []openai.ChatMessagePart{}
-		oneImageWasFound := false
+	if r.Input.IsMessages() {
+		for _, m := range r.Input.GetMessages() {

-		if m.ContentText != "" {
-			content = append(content, openai.ChatMessagePart{
-				Type: "text",
-				Text: m.ContentText,
-			})
-		}
-
-		for _, c := range m.ContentItems {
-			switch c.Type {
-			case "text":
-				content = append(content, openai.ChatMessagePart{
-					Type: "text",
-					Text: c.Text,
-				})
-			case "image":
-				oneImageWasFound = true
-				content = append(content, openai.ChatMessagePart{
-					Type:     "image",
-					ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
+			if m.IsFunctionCall() {
+				result = append(result, openai.ChatCompletionMessage{
+					Role: "assistant",
+					ToolCalls: []openai.ToolCall{
+						{
+							Type: "function",
+							ID:   *m.CallId,
+							Function: openai.FunctionCall{
+								Arguments: *m.Arguments,
+								Name:      *m.Name,
+							},
+						},
+					},
 				})
 			}
-		}

-		if oneImageWasFound {
-			result = append(result, openai.ChatCompletionMessage{
-				Role:         m.Role,
-				MultiContent: content,
-			})
-		} else {
-			for _, c := range content {
+			if m.IsFunctionCallOutput() {
 				result = append(result, openai.ChatCompletionMessage{
-					Role:    m.Role,
-					Content: c.Text,
+					Role:       "tool",
+					Content:    *m.Output,
+					ToolCallID: *m.CallId,
 				})
 			}
+
+			if !m.IsInputMessage() {
+				continue
+			}
+
+			content := []openai.ChatMessagePart{}
+			oneImageWasFound := false
+
+			if m.Content != nil && m.Content.IsText() && m.Content.GetText() != "" {
+				content = append(content, openai.ChatMessagePart{
+					Type: "text",
+					Text: m.Content.GetText(),
+				})
+			}
+
+			if m.Content != nil && m.Content.IsItems() {
+				for _, c := range m.Content.GetItems() {
+					switch c.Type {
+					case "text":
+						content = append(content, openai.ChatMessagePart{
+							Type: "text",
+							Text: c.Text,
+						})
+					case "image":
+						oneImageWasFound = true
+						content = append(content, openai.ChatMessagePart{
+							Type:     "image",
+							ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
+						})
+					}
+				}
+			}
+
+			if oneImageWasFound {
+				result = append(result, openai.ChatCompletionMessage{
+					Role:         *m.Role,
+					MultiContent: content,
+				})
+			} else {
+				for _, c := range content {
+					result = append(result, openai.ChatCompletionMessage{
+						Role:    *m.Role,
+						Content: c.Text,
+					})
+				}
+			}
 		}
 	}

-	if r.InputText != "" {
+	if r.Input.IsText() && r.Input.GetText() != "" {
 		result = append(result, openai.ChatCompletionMessage{
 			Role:    "user",
-			Content: r.InputText,
+			Content: r.Input.GetText(),
 		})
 	}

@@ -148,6 +276,16 @@ type MessageContentItem struct {
 	Annotations []interface{} `json:"annotations"`
 }

+// FunctionToolCall represents a function tool call as a top-level object in the output array
+type FunctionToolCall struct {
+	Arguments string `json:"arguments"`
+	CallID    string `json:"call_id"`
+	Name      string `json:"name"`
+	Type      string `json:"type"`
+	ID        string `json:"id"`
+	Status    string `json:"status"`
+}
+
 // UsageInfo represents token usage information
 type UsageInfo struct {
 	InputTokens         int          `json:"input_tokens"`
@@ -174,7 +312,7 @@ type ResponseBody struct {
 	Instructions       interface{}            `json:"instructions"`
 	MaxOutputTokens    interface{}            `json:"max_output_tokens"`
 	Model              string                 `json:"model"`
-	Output             []ResponseMessage      `json:"output"`
+	Output             []interface{}          `json:"output"`
 	ParallelToolCalls  bool                   `json:"parallel_tool_calls"`
 	PreviousResponseID interface{}            `json:"previous_response_id"`
 	Reasoning          ReasoningConfig        `json:"reasoning"`
@@ -182,7 +320,7 @@ type ResponseBody struct {
 	Temperature        float64                `json:"temperature"`
 	Text               TextConfig             `json:"text"`
 	ToolChoice         string                 `json:"tool_choice"`
-	Tools              []interface{}          `json:"tools"`
+	Tools              []Tool                 `json:"tools"`
 	TopP               float64                `json:"top_p"`
 	Truncation         string                 `json:"truncation"`
 	Usage              UsageInfo              `json:"usage"`
@@ -190,12 +328,72 @@ type ResponseBody struct {
 	Metadata           map[string]interface{} `json:"metadata"`
 }

+// Content represents either a string or a slice of ContentItem
+type Content struct {
+	Text  *string        `json:"-"`
+	Items *[]ContentItem `json:"-"`
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling for Content
+func (c *Content) UnmarshalJSON(data []byte) error {
+	// Try to unmarshal as string first
+	var text string
+	if err := json.Unmarshal(data, &text); err == nil {
+		c.Text = &text
+		return nil
+	}
+
+	// Try to unmarshal as []ContentItem
+	var items []ContentItem
+	if err := json.Unmarshal(data, &items); err == nil {
+		c.Items = &items
+		return nil
+	}
+
+	return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
+}
+
+// MarshalJSON implements custom JSON marshaling for Content
+func (c *Content) MarshalJSON() ([]byte, error) {
+	if c.Text != nil {
+		return json.Marshal(*c.Text)
+	}
+	if c.Items != nil {
+		return json.Marshal(*c.Items)
+	}
+	return json.Marshal(nil)
+}
+
+// IsText returns true if the content contains text
+func (c *Content) IsText() bool {
+	return c.Text != nil
+}
+
+// IsItems returns true if the content contains items
+func (c *Content) IsItems() bool {
+	return c.Items != nil
+}
+
+// GetText returns the text value or empty string
+func (c *Content) GetText() string {
+	if c.Text != nil {
+		return *c.Text
+	}
+	return ""
+}
+
+// GetItems returns the items value or empty slice
+func (c *Content) GetItems() []ContentItem {
+	if c.Items != nil {
+		return *c.Items
+	}
+	return nil
+}
+
 // InputMessage represents a user input message
 type InputMessage struct {
-	Role         string        `json:"role"`
-	Content      any           `json:"content"`
-	ContentText  string        `json:"content_text"`
-	ContentItems []ContentItem `json:"content_items"`
+	Role    string  `json:"role"`
+	Content Content `json:"content"`
 }

 // ContentItem represents an item in a content array
@@ -204,3 +402,129 @@ type ContentItem struct {
 	Text     string `json:"text,omitempty"`
 	ImageURL string `json:"image_url,omitempty"`
 }
+
+// Tool represents a tool that can be called by the assistant
+type Tool struct {
+	Type string `json:"type"`
+
+	// Function tool fields (used when type == "function")
+	Name        *string                `json:"name,omitempty"`
+	Description *string                `json:"description,omitempty"`
+	Parameters  *jsonschema.Definition `json:"parameters,omitempty"`
+	Strict      *bool                  `json:"strict,omitempty"`
+
+	// Web search tool fields (used when type == "web_search_preview" etc.)
+	SearchContextSize *string       `json:"search_context_size,omitempty"`
+	UserLocation      *UserLocation `json:"user_location,omitempty"`
+}
+
+// IsFunction returns true if this is a function tool
+func (t *Tool) IsFunction() bool {
+	return t.Type == "function"
+}
+
+// IsWebSearch returns true if this is a web search tool
+func (t *Tool) IsWebSearch() bool {
+	return t.Type == "web_search_preview" || t.Type == "web_search_preview_2025_03_11"
+}
+
+// ToActionDefinition converts this tool to an ActionDefinition
+func (t *Tool) ToActionDefinition() *coreTypes.ActionDefinition {
+	if t.IsFunction() && t.Name != nil {
+		// Regular function tool
+		properties := make(map[string]jsonschema.Definition)
+		required := []string{}
+
+		if t.Parameters != nil {
+			properties = t.Parameters.Properties
+			required = t.Parameters.Required
+		}
+
+		desc := ""
+		if t.Description != nil {
+			desc = *t.Description
+		}
+
+		return &coreTypes.ActionDefinition{
+			Name:        coreTypes.ActionDefinitionName(*t.Name),
+			Description: desc,
+			Properties:  properties,
+			Required:    required,
+		}
+	}
+
+	if t.IsWebSearch() {
+		// Convert web search builtin to ActionDefinition
+		name := "web_search_" + t.Type
+		desc := "Web search tool for finding relevant information online"
+
+		// Create parameters schema for web search options
+		properties := map[string]jsonschema.Definition{
+			"search_context_size": {
+				Type:        jsonschema.String,
+				Enum:        []string{"low", "medium", "high"},
+				Description: "Amount of context window space to use for search",
+			},
+			"user_location": {
+				Type: jsonschema.Object,
+				Properties: map[string]jsonschema.Definition{
+					"type": {
+						Type:        jsonschema.String,
+						Enum:        []string{"approximate"},
+						Description: "Type of location approximation",
+					},
+					"city": {
+						Type:        jsonschema.String,
+						Description: "City of the user",
+					},
+					"country": {
+						Type:        jsonschema.String,
+						Description: "Two-letter ISO country code",
+					},
+					"region": {
+						Type:        jsonschema.String,
+						Description: "Region of the user",
+					},
+					"timezone": {
+						Type:        jsonschema.String,
+						Description: "IANA timezone of the user",
+					},
+				},
+			},
+		}
+
+		return &coreTypes.ActionDefinition{
+			Name:        coreTypes.ActionDefinitionName(name),
+			Description: desc,
+			Properties:  properties,
+			Required:    []string{},
+		}
+	}
+
+	return nil
+}
+
+// SeparateTools separates a slice of Tools into builtin tools and user tools as ActionDefinitions
+func SeparateTools(tools []Tool) (builtinTools []coreTypes.ActionDefinition, userTools []coreTypes.ActionDefinition) {
+	for _, tool := range tools {
+		if actionDef := tool.ToActionDefinition(); actionDef != nil {
+			if tool.IsFunction() {
+				// User-defined function tool
+				userTools = append(userTools, *actionDef)
+			} else if tool.IsWebSearch() {
+				// Builtin tool (web search)
+				builtinTools = append(builtinTools, *actionDef)
+			}
+		}
+	}
+	return builtinTools, userTools
+}
+
+// UserLocation represents the user's location for web search
+type UserLocation struct {
+	Type     string  `json:"type"`
+	City     *string `json:"city,omitempty"`
+	Country  *string `json:"country,omitempty"`
+	Region   *string `json:"region,omitempty"`
+	Timezone *string `json:"timezone,omitempty"`
+}