diff --git a/core/cli/run.go b/core/cli/run.go
index 38541bd0f..912bd5522 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -80,6 +80,8 @@ type RunCMD struct {
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
+ EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
+ TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
Version bool
@@ -152,6 +154,15 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
opts = append(opts, config.DisableRuntimeSettings)
}
+ if r.EnableTracing {
+ opts = append(opts, config.EnableTracing)
+ }
+
+ if r.EnableTracing {
+ opts = append(opts, config.EnableTracing)
+ }
+ opts = append(opts, config.WithTracingMaxItems(r.TracingMaxItems))
+
token := ""
if r.Peer2Peer || r.Peer2PeerToken != "" {
xlog.Info("P2P mode enabled")
diff --git a/core/config/application_config.go b/core/config/application_config.go
index fdd8810e3..26b603f53 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -19,6 +19,8 @@ type ApplicationConfig struct {
UploadLimitMB, Threads, ContextSize int
F16 bool
Debug bool
+ EnableTracing bool
+ TracingMaxItems int
GeneratedContentDir string
UploadDir string
@@ -97,6 +99,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
AgentJobRetentionDays: 30, // Default: 30 days
LRUEvictionMaxRetries: 30, // Default: 30 retries
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
+ TracingMaxItems: 1024,
PathWithoutAuth: []string{
"/static/",
"/generated-audio/",
@@ -165,6 +168,10 @@ var EnableWatchDog = func(o *ApplicationConfig) {
o.WatchDog = true
}
+var EnableTracing = func(o *ApplicationConfig) {
+ o.EnableTracing = true
+}
+
var EnableWatchDogIdleCheck = func(o *ApplicationConfig) {
o.WatchDog = true
o.WatchDogIdle = true
@@ -418,6 +425,12 @@ func WithDebug(debug bool) AppOption {
}
}
+func WithTracingMaxItems(items int) AppOption {
+ return func(o *ApplicationConfig) {
+ o.TracingMaxItems = items
+ }
+}
+
func WithGeneratedContentDir(generatedContentDir string) AppOption {
return func(o *ApplicationConfig) {
o.GeneratedContentDir = generatedContentDir
@@ -543,6 +556,8 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
contextSize := o.ContextSize
f16 := o.F16
debug := o.Debug
+ tracingMaxItems := o.TracingMaxItems
+ enableTracing := o.EnableTracing
cors := o.CORS
csrf := o.CSRF
corsAllowOrigins := o.CORSAllowOrigins
@@ -599,6 +614,8 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
ContextSize: &contextSize,
F16: &f16,
Debug: &debug,
+ TracingMaxItems: &tracingMaxItems,
+ EnableTracing: &enableTracing,
CORS: &cors,
CSRF: &csrf,
CORSAllowOrigins: &corsAllowOrigins,
@@ -713,6 +730,12 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req
if settings.Debug != nil {
o.Debug = *settings.Debug
}
+ if settings.EnableTracing != nil {
+ o.EnableTracing = *settings.EnableTracing
+ }
+ if settings.TracingMaxItems != nil {
+ o.TracingMaxItems = *settings.TracingMaxItems
+ }
if settings.CORS != nil {
o.CORS = *settings.CORS
}
diff --git a/core/config/runtime_settings.go b/core/config/runtime_settings.go
index 93783511d..1a7f6db81 100644
--- a/core/config/runtime_settings.go
+++ b/core/config/runtime_settings.go
@@ -32,10 +32,12 @@ type RuntimeSettings struct {
LRUEvictionRetryInterval *string `json:"lru_eviction_retry_interval,omitempty"` // Interval between retries when waiting for busy models (e.g., 1s, 2s) (default: 1s)
// Performance settings
- Threads *int `json:"threads,omitempty"`
- ContextSize *int `json:"context_size,omitempty"`
- F16 *bool `json:"f16,omitempty"`
- Debug *bool `json:"debug,omitempty"`
+ Threads *int `json:"threads,omitempty"`
+ ContextSize *int `json:"context_size,omitempty"`
+ F16 *bool `json:"f16,omitempty"`
+ Debug *bool `json:"debug,omitempty"`
+ EnableTracing *bool `json:"enable_tracing,omitempty"`
+ TracingMaxItems *int `json:"tracing_max_items,omitempty"`
// Security/CORS settings
CORS *bool `json:"cors,omitempty"`
diff --git a/core/http/middleware/trace.go b/core/http/middleware/trace.go
new file mode 100644
index 000000000..aa63ba349
--- /dev/null
+++ b/core/http/middleware/trace.go
@@ -0,0 +1,156 @@
+package middleware
+
+import (
+ "bytes"
+ "github.com/emirpasic/gods/v2/queues/circularbuffer"
+ "io"
+ "net/http"
+ "sort"
+ "sync"
+ "time"
+
+ "github.com/labstack/echo/v4"
+ "github.com/mudler/LocalAI/core/application"
+ "github.com/mudler/xlog"
+)
+
+type APIExchangeRequest struct {
+ Method string `json:"method"`
+ Path string `json:"path"`
+ Headers *http.Header `json:"headers"`
+ Body *[]byte `json:"body"`
+}
+
+type APIExchangeResponse struct {
+ Status int `json:"status"`
+ Headers *http.Header `json:"headers"`
+ Body *[]byte `json:"body"`
+}
+
+type APIExchange struct {
+ Timestamp time.Time `json:"timestamp"`
+ Request APIExchangeRequest `json:"request"`
+ Response APIExchangeResponse `json:"response"`
+}
+
+var traceBuffer *circularbuffer.Queue[APIExchange]
+var mu sync.Mutex
+var logChan = make(chan APIExchange, 100)
+
+type bodyWriter struct {
+ http.ResponseWriter
+ body *bytes.Buffer
+}
+
+func (w *bodyWriter) Write(b []byte) (int, error) {
+ w.body.Write(b)
+ return w.ResponseWriter.Write(b)
+}
+
+func (w *bodyWriter) Flush() {
+ if flusher, ok := w.ResponseWriter.(http.Flusher); ok {
+ flusher.Flush()
+ }
+}
+
+// TraceMiddleware intercepts and logs JSON API requests and responses
+func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
+ if app.ApplicationConfig().EnableTracing && traceBuffer == nil {
+ traceBuffer = circularbuffer.New[APIExchange](app.ApplicationConfig().TracingMaxItems)
+
+ go func() {
+ for exchange := range logChan {
+ mu.Lock()
+ traceBuffer.Enqueue(exchange)
+ mu.Unlock()
+ }
+ }()
+ }
+
+ return func(next echo.HandlerFunc) echo.HandlerFunc {
+ return func(c echo.Context) error {
+ if !app.ApplicationConfig().EnableTracing {
+ return next(c)
+ }
+
+ if c.Request().Header.Get("Content-Type") != "application/json" {
+ return next(c)
+ }
+
+ body, err := io.ReadAll(c.Request().Body)
+ if err != nil {
+ xlog.Error("Failed to read request body")
+ return err
+ }
+
+ // Restore the body for downstream handlers
+ c.Request().Body = io.NopCloser(bytes.NewBuffer(body))
+
+ startTime := time.Now()
+
+ // Wrap response writer to capture body
+ resBody := new(bytes.Buffer)
+ mw := &bodyWriter{
+ ResponseWriter: c.Response().Writer,
+ body: resBody,
+ }
+ c.Response().Writer = mw
+
+ err = next(c)
+ if err != nil {
+ c.Response().Writer = mw.ResponseWriter // Restore original writer if error
+ return err
+ }
+
+ // Create exchange log
+ requestHeaders := c.Request().Header.Clone()
+ requestBody := make([]byte, len(body))
+ copy(requestBody, body)
+ responseHeaders := c.Response().Header().Clone()
+ responseBody := make([]byte, resBody.Len())
+ copy(responseBody, resBody.Bytes())
+ exchange := APIExchange{
+ Timestamp: startTime,
+ Request: APIExchangeRequest{
+ Method: c.Request().Method,
+ Path: c.Path(),
+ Headers: &requestHeaders,
+ Body: &requestBody,
+ },
+ Response: APIExchangeResponse{
+ Status: c.Response().Status,
+ Headers: &responseHeaders,
+ Body: &responseBody,
+ },
+ }
+
+ select {
+ case logChan <- exchange:
+ default:
+ xlog.Warn("Trace channel full, dropping trace")
+ }
+
+ return nil
+ }
+ }
+}
+
+// GetTraces returns a copy of the logged API exchanges for display
+func GetTraces() []APIExchange {
+ mu.Lock()
+ traces := traceBuffer.Values()
+ mu.Unlock()
+
+ sort.Slice(traces, func(i, j int) bool {
+ return traces[i].Timestamp.Before(traces[j].Timestamp)
+ })
+
+ return traces
+}
+
+// ClearTraces clears the in-memory logs
+func ClearTraces() {
+ mu.Lock()
+ traceBuffer.Clear()
+ mu.Unlock()
+}
diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go
index 921582a6b..93fed71db 100644
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -14,16 +14,18 @@ func RegisterOpenAIRoutes(app *echo.Echo,
re *middleware.RequestExtractor,
application *application.Application) {
// openAI compatible API endpoint
+ traceMiddleware := middleware.TraceMiddleware(application)
// realtime
// TODO: Modify/disable the API key middleware for this endpoint to allow ephemeral keys created by sessions
app.GET("/v1/realtime", openai.Realtime(application))
- app.POST("/v1/realtime/sessions", openai.RealtimeTranscriptionSession(application))
- app.POST("/v1/realtime/transcription_session", openai.RealtimeTranscriptionSession(application))
+ app.POST("/v1/realtime/sessions", openai.RealtimeTranscriptionSession(application), traceMiddleware)
+ app.POST("/v1/realtime/transcription_session", openai.RealtimeTranscriptionSession(application), traceMiddleware)
// chat
chatHandler := openai.ChatEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
chatMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
@@ -41,6 +43,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
// edit
editHandler := openai.EditEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
editMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EDIT)),
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
@@ -59,6 +62,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
// completion
completionHandler := openai.CompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
completionMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_COMPLETION)),
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
@@ -78,6 +82,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
// MCPcompletion
mcpCompletionHandler := openai.MCPCompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
mcpCompletionMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
@@ -95,6 +100,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
// embeddings
embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
embeddingMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)),
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
@@ -113,6 +119,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
audioHandler := openai.TranscriptEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
audioMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
@@ -130,6 +137,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
audioSpeechHandler := localai.TTSEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
audioSpeechMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
}
@@ -140,6 +148,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
// images
imageHandler := openai.ImageEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
imageMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
// Default: use the first available image generation model
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_IMAGE)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
@@ -164,6 +173,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
// videos (OpenAI-compatible endpoints mapped to LocalAI video handler)
videoHandler := openai.VideoEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
videoMiddleware := []echo.MiddlewareFunc{
+ traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 4b852875f..da6f5d1ee 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -317,4 +317,24 @@ func RegisterUIRoutes(app *echo.Echo,
// Render index
return c.Render(200, "views/tts", summary)
})
+
+ // Traces UI
+ app.GET("/traces", func(c echo.Context) error {
+ summary := map[string]interface{}{
+ "Title": "LocalAI - Traces",
+ "BaseURL": middleware.BaseURL(c),
+ "Version": internal.PrintableVersion(),
+ }
+ return c.Render(200, "views/traces", summary)
+ })
+
+ app.GET("/api/traces", func(c echo.Context) error {
+ return c.JSON(200, middleware.GetTraces())
+ })
+
+ app.POST("/api/traces/clear", func(c echo.Context) error {
+ middleware.ClearTraces()
+ return c.NoContent(204)
+ })
+
}
diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go
index 36194d5c0..84af2e32f 100644
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -16,6 +16,7 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
+ "github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
@@ -947,4 +948,24 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance))
app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance))
}
+
+ // Logs API
+ app.GET("/api/traces", func(c echo.Context) error {
+ if !appConfig.EnableTracing {
+ return c.JSON(503, map[string]any{
+ "error": "Tracing disabled",
+ })
+ }
+ traces := middleware.GetTraces()
+ return c.JSON(200, map[string]interface{}{
+ "traces": traces,
+ })
+ })
+
+ app.POST("/api/traces/clear", func(c echo.Context) error {
+ middleware.ClearTraces()
+ return c.JSON(200, map[string]interface{}{
+ "message": "Traces cleared",
+ })
+ })
}
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index 2afdfa36e..cbc092c41 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -37,6 +37,9 @@
Agent Jobs
+
+ Traces
+
API
@@ -94,6 +97,9 @@
Agent Jobs
+
+ Traces
+
API
diff --git a/core/http/views/settings.html b/core/http/views/settings.html
index 1ad960430..97587f0e3 100644
--- a/core/http/views/settings.html
+++ b/core/http/views/settings.html
@@ -10,7 +10,7 @@