feat(loader): enhance single active backend to support LRU eviction (#7535)
* feat(loader): refactor single active backend support to LRU This changeset introduces LRU management of loaded backends. Users can set now a maximum number of models to be loaded concurrently, and, when setting LocalAI in single active backend mode we set LRU to 1 for backward compatibility. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
c141a40e00
commit
fc5b9ebfcc
@@ -29,7 +29,7 @@ type Application struct {
|
||||
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
||||
return &Application{
|
||||
backendLoader: config.NewModelConfigLoader(appConfig.SystemState.Model.ModelsPath),
|
||||
modelLoader: model.NewModelLoader(appConfig.SystemState, appConfig.SingleBackend),
|
||||
modelLoader: model.NewModelLoader(appConfig.SystemState),
|
||||
applicationConfig: appConfig,
|
||||
templatesEvaluator: templates.NewEvaluator(appConfig.SystemState.Model.ModelsPath),
|
||||
}
|
||||
|
||||
@@ -191,7 +191,8 @@ type runtimeSettings struct {
|
||||
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
|
||||
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
|
||||
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
|
||||
SingleBackend *bool `json:"single_backend,omitempty"`
|
||||
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
|
||||
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited, 1 = single backend mode)
|
||||
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
|
||||
Threads *int `json:"threads,omitempty"`
|
||||
ContextSize *int `json:"context_size,omitempty"`
|
||||
@@ -224,6 +225,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout
|
||||
envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout
|
||||
envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
|
||||
envMaxActiveBackends := appConfig.MaxActiveBackends == startupAppConfig.MaxActiveBackends
|
||||
envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
|
||||
envThreads := appConfig.Threads == startupAppConfig.Threads
|
||||
envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
|
||||
@@ -275,8 +277,19 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
|
||||
}
|
||||
}
|
||||
if settings.SingleBackend != nil && !envSingleBackend {
|
||||
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
|
||||
if settings.MaxActiveBackends != nil && !envMaxActiveBackends {
|
||||
appConfig.MaxActiveBackends = *settings.MaxActiveBackends
|
||||
// For backward compatibility, also set SingleBackend if MaxActiveBackends == 1
|
||||
appConfig.SingleBackend = (*settings.MaxActiveBackends == 1)
|
||||
} else if settings.SingleBackend != nil && !envSingleBackend {
|
||||
// Legacy: SingleBackend maps to MaxActiveBackends = 1
|
||||
appConfig.SingleBackend = *settings.SingleBackend
|
||||
if *settings.SingleBackend {
|
||||
appConfig.MaxActiveBackends = 1
|
||||
} else {
|
||||
appConfig.MaxActiveBackends = 0
|
||||
}
|
||||
}
|
||||
if settings.ParallelBackendRequests != nil && !envParallelRequests {
|
||||
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
|
||||
|
||||
@@ -224,7 +224,8 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
|
||||
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
|
||||
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
|
||||
SingleBackend *bool `json:"single_backend,omitempty"`
|
||||
SingleBackend *bool `json:"single_backend,omitempty"` // Deprecated: use MaxActiveBackends = 1 instead
|
||||
MaxActiveBackends *int `json:"max_active_backends,omitempty"` // Maximum number of active backends (0 = unlimited)
|
||||
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
|
||||
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
|
||||
}
|
||||
@@ -280,9 +281,21 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if settings.SingleBackend != nil {
|
||||
// Handle MaxActiveBackends (new) and SingleBackend (deprecated)
|
||||
if settings.MaxActiveBackends != nil {
|
||||
// Only apply if current value is default (0), suggesting it wasn't set from env var
|
||||
if options.MaxActiveBackends == 0 {
|
||||
options.MaxActiveBackends = *settings.MaxActiveBackends
|
||||
// For backward compatibility, also set SingleBackend if MaxActiveBackends == 1
|
||||
options.SingleBackend = (*settings.MaxActiveBackends == 1)
|
||||
}
|
||||
} else if settings.SingleBackend != nil {
|
||||
// Legacy: SingleBackend maps to MaxActiveBackends = 1
|
||||
if !options.SingleBackend {
|
||||
options.SingleBackend = *settings.SingleBackend
|
||||
if *settings.SingleBackend {
|
||||
options.MaxActiveBackends = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
if settings.ParallelBackendRequests != nil {
|
||||
@@ -307,15 +320,25 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
|
||||
// initializeWatchdog initializes the watchdog with current ApplicationConfig settings
|
||||
func initializeWatchdog(application *Application, options *config.ApplicationConfig) {
|
||||
if options.WatchDog {
|
||||
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
|
||||
lruLimit := options.GetEffectiveMaxActiveBackends()
|
||||
|
||||
// Create watchdog if enabled OR if LRU limit is set
|
||||
if options.WatchDog || lruLimit > 0 {
|
||||
wd := model.NewWatchDog(
|
||||
application.ModelLoader(),
|
||||
options.WatchDogBusyTimeout,
|
||||
options.WatchDogIdleTimeout,
|
||||
options.WatchDogBusy,
|
||||
options.WatchDogIdle)
|
||||
options.WatchDogIdle,
|
||||
lruLimit)
|
||||
application.ModelLoader().SetWatchDog(wd)
|
||||
go wd.Run()
|
||||
|
||||
// Start watchdog goroutine only if busy/idle checks are enabled
|
||||
if options.WatchDogBusy || options.WatchDogIdle {
|
||||
go wd.Run()
|
||||
}
|
||||
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
log.Debug().Msgf("Context canceled, shutting down")
|
||||
|
||||
@@ -20,21 +20,29 @@ func (a *Application) StopWatchdog() error {
|
||||
func (a *Application) startWatchdog() error {
|
||||
appConfig := a.ApplicationConfig()
|
||||
|
||||
// Create new watchdog if enabled
|
||||
if appConfig.WatchDog {
|
||||
// Get effective max active backends (considers both MaxActiveBackends and deprecated SingleBackend)
|
||||
lruLimit := appConfig.GetEffectiveMaxActiveBackends()
|
||||
|
||||
// Create watchdog if enabled OR if LRU limit is set
|
||||
// LRU eviction requires watchdog infrastructure even without busy/idle checks
|
||||
if appConfig.WatchDog || lruLimit > 0 {
|
||||
wd := model.NewWatchDog(
|
||||
a.modelLoader,
|
||||
appConfig.WatchDogBusyTimeout,
|
||||
appConfig.WatchDogIdleTimeout,
|
||||
appConfig.WatchDogBusy,
|
||||
appConfig.WatchDogIdle)
|
||||
appConfig.WatchDogIdle,
|
||||
lruLimit)
|
||||
a.modelLoader.SetWatchDog(wd)
|
||||
|
||||
// Create new stop channel
|
||||
a.watchdogStop = make(chan bool, 1)
|
||||
|
||||
// Start watchdog goroutine
|
||||
go wd.Run()
|
||||
// Start watchdog goroutine only if busy/idle checks are enabled
|
||||
// LRU eviction doesn't need the Run() loop - it's triggered on model load
|
||||
if appConfig.WatchDogBusy || appConfig.WatchDogIdle {
|
||||
go wd.Run()
|
||||
}
|
||||
|
||||
// Setup shutdown handler
|
||||
go func() {
|
||||
@@ -48,7 +56,7 @@ func (a *Application) startWatchdog() error {
|
||||
}
|
||||
}()
|
||||
|
||||
log.Info().Msg("Watchdog started with new settings")
|
||||
log.Info().Int("lruLimit", lruLimit).Bool("busyCheck", appConfig.WatchDogBusy).Bool("idleCheck", appConfig.WatchDogIdle).Msg("Watchdog started with new settings")
|
||||
} else {
|
||||
log.Info().Msg("Watchdog disabled")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user