feat: refactor build process, drop embedded backends (#5875)

* feat: split remaining backends and drop embedded backends - Drop silero-vad, huggingface, and stores backend from embedded binaries - Refactor Makefile and Dockerfile to avoid building grpc backends - Drop golang code that was used to embed backends - Simplify building by using goreleaser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(gallery): be specific with llama-cpp backend templates Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(docs): update Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(ci): minor fixes Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: drop all ffmpeg references Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: run protogen-go Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Always enable p2p mode Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update gorelease file Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(stores): do not always load Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix linting issues Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Mac OS fixup Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-07-22 16:31:04 +02:00
parent e29b2c3aff
commit 98e5291afc
118 changed files with 631 additions and 1339 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -43,7 +43,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -55,7 +55,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -67,7 +67,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -79,7 +79,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -91,7 +91,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -104,7 +104,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -116,7 +116,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -128,7 +128,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -140,7 +140,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-bark'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -152,7 +152,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -165,7 +165,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -177,7 +177,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -189,7 +189,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -201,7 +201,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -213,7 +213,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -226,7 +226,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -238,7 +238,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -250,7 +250,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -262,7 +262,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-bark'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -274,7 +274,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -287,7 +287,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -299,7 +299,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -311,7 +311,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -323,7 +323,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -335,7 +335,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -348,7 +348,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -360,7 +360,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -372,7 +372,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -384,7 +384,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-rocm-hipblas-bark'
            runs-on: 'ubuntu-latest'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
@@ -397,7 +397,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -409,7 +409,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-rerankers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -421,7 +421,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -433,7 +433,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -445,7 +445,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -457,7 +457,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-vllm'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -469,7 +469,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -481,7 +481,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-transformers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -493,7 +493,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-diffusers'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -506,7 +506,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -518,7 +518,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-kokoro'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -530,7 +530,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -542,7 +542,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -554,7 +554,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -566,7 +566,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-coqui'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -578,7 +578,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-bark'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -590,7 +590,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-bark'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -603,7 +603,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-piper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -616,7 +616,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-bark-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -628,7 +628,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64,linux/arm64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-cpu-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -652,7 +652,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-llama-cpp'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -665,7 +665,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-cpu-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -677,7 +677,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -689,7 +689,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -701,7 +701,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -713,7 +713,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -725,7 +725,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -749,8 +749,8 @@ jobs:
          - build-type: ''
            cuda-major-version: ""
            cuda-minor-version: ""
-            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
            tag-suffix: '-cpu-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -762,7 +762,7 @@ jobs:
            cuda-major-version: "12"
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -774,7 +774,7 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -786,7 +786,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f32-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -798,7 +798,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-intel-sycl-f16-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
@@ -810,7 +810,7 @@ jobs:
            cuda-major-version: ""
            cuda-minor-version: ""
            platforms: 'linux/amd64'
-            tag-latest: 'true'
+            tag-latest: 'auto'
            tag-suffix: '-gpu-vulkan-whisper'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
@@ -842,6 +842,45 @@ jobs:
            backend: "whisper"
            dockerfile: "./backend/Dockerfile.go"
            context: "./"
+          #silero-vad
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-silero-vad'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "silero-vad"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          # local-store
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-local-store'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "local-store"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"
+          # huggingface
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-huggingface'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:22.04"
+            skip-drivers: 'false'
+            backend: "huggingface"
+            dockerfile: "./backend/Dockerfile.go"
+            context: "./"  
  llama-cpp-darwin:
    runs-on: macOS-14
    strategy:
@@ -866,7 +905,7 @@ jobs:
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
-          make build-api
+          make build
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
          mv build/darwin.tar build/llama-cpp.tar
@@ -954,7 +993,7 @@ jobs:
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
-          make build-api
+          make build
          export PLATFORMARCH=darwin/amd64
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
--- a/.github/workflows/build-test.yaml
+++ b/.github/workflows/build-test.yaml
@@ -0,0 +1,23 @@
+name: Build test
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  build-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.23
+      - name: Run GoReleaser
+        run: |
+          make dev-dist
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -31,7 +31,7 @@ jobs:
          make protogen-go
      - name: Build api
        run: |
-          CGO_ENABLED=0 make build-api
+          CGO_ENABLED=0 make build
      - name: rm
        uses: appleboy/ssh-action@v1.2.2
        with:
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -14,7 +14,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -40,8 +39,7 @@ jobs:
            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
-            ffmpeg: 'true'
+            tag-suffix: '-gpu-nvidia-cuda12'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=3 --output-sync=target"
@@ -49,7 +47,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-hipblas'
-            ffmpeg: 'false'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
@@ -59,15 +56,13 @@ jobs:
            tag-latest: 'false'
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
-            tag-suffix: 'sycl-f16-ffmpeg'
-            ffmpeg: 'true'
+            tag-suffix: 'sycl-f16'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'vulkan'
            platforms: 'linux/amd64'
            tag-latest: 'false'
-            tag-suffix: '-vulkan-ffmpeg-core'
-            ffmpeg: 'true'
+            tag-suffix: '-vulkan-core'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -18,7 +18,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -40,7 +39,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-hipblas'
-            ffmpeg: 'true'
            base-image: "rocm/dev-ubuntu-22.04:6.1"
            grpc-base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
@@ -52,7 +50,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -76,7 +73,6 @@ jobs:
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: ''
-            ffmpeg: 'true'
            base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
            aio: "-aio-cpu"
@@ -88,7 +84,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda11'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            makeflags: "--jobs=4 --output-sync=target"
@@ -100,7 +95,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-gpu-nvidia-cuda12'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
@@ -110,7 +104,6 @@ jobs:
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-vulkan'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            base-image: "ubuntu:22.04"
            skip-drivers: 'false'
@@ -122,7 +115,6 @@ jobs:
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-gpu-intel-f16'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
            aio: "-aio-gpu-intel-f16"
@@ -132,7 +124,6 @@ jobs:
            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
            grpc-base-image: "ubuntu:22.04"
            tag-suffix: '-gpu-intel-f32'
-            ffmpeg: 'true'
            runs-on: 'ubuntu-latest'
            makeflags: "--jobs=3 --output-sync=target"
            aio: "-aio-gpu-intel-f32"
@@ -142,7 +133,6 @@ jobs:
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
-      ffmpeg: ${{ matrix.ffmpeg }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
@@ -167,7 +157,6 @@ jobs:
            platforms: 'linux/arm64'
            tag-latest: 'auto'
            tag-suffix: '-nvidia-l4t-arm64'
-            ffmpeg: 'true'
            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
            runs-on: 'ubuntu-24.04-arm'
            makeflags: "--jobs=4 --output-sync=target"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -37,10 +37,6 @@ on:
        description: 'Tag suffix'
        default: ''
        type: string
-      ffmpeg:
-        description: 'FFMPEG'
-        default: ''
-        type: string
      skip-drivers:
        description: 'Skip drivers by default'
        default: 'false'
@@ -236,7 +232,6 @@ jobs:
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
@@ -264,7 +259,6 @@ jobs:
            BUILD_TYPE=${{ inputs.build-type }}
            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
-            FFMPEG=${{ inputs.ffmpeg }}
            BASE_IMAGE=${{ inputs.base-image }}
            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -96,7 +96,7 @@ jobs:
    - name: Start LocalAI
      run: |
        echo "Starting LocalAI..."
-        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
+        docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
        until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
      # Check the PR diff using the current branch and the base branch of the PR
    - uses: GrantBirki/git-diff-action@v2.8.1
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,399 +1,26 @@
-name: Build and Release
+name: goreleaser

 on:
  push:
-    branches:
-      - master
    tags:
      - 'v*'
-  pull_request:
-
-env:
-  GRPC_VERSION: v1.65.0
-
-permissions:
-  contents: write
-
-concurrency:
-  group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
-  cancel-in-progress: true

 jobs:
-
-  # TODO: temporary disable linux-arm64 build
-  # build-linux-arm:
-  #   runs-on: ubuntu-24.04-arm
-  #   steps:
-  #     - name: Free Disk Space (Ubuntu)
-  #       uses: jlumbroso/free-disk-space@main
-  #       with:
-  #         # this might remove tools that are actually needed,
-  #         # if set to "true" but frees about 6 GB
-  #         tool-cache: true
-  #         # all of these default to true, but feel free to set to
-  #         # "false" if necessary for your workflow
-  #         android: true
-  #         dotnet: true
-  #         haskell: true
-  #         large-packages: true
-  #         docker-images: true
-  #         swap-storage: true
-
-  #     - name: Release space from worker
-  #       run: |
-  #         echo "Listing top largest packages"
-  #         pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #         head -n 30 <<< "${pkgs}"
-  #         echo
-  #         df -h
-  #         echo
-  #         sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-  #         sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-  #         sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo apt-get remove -y '^mono-.*' || true
-  #         sudo apt-get remove -y '^ghc-.*' || true
-  #         sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-  #         sudo apt-get remove -y 'php.*' || true
-  #         sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-  #         sudo apt-get remove -y '^google-.*' || true
-  #         sudo apt-get remove -y azure-cli || true
-  #         sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-  #         sudo apt-get remove -y '^gfortran-.*' || true
-  #         sudo apt-get remove -y microsoft-edge-stable || true
-  #         sudo apt-get remove -y firefox || true
-  #         sudo apt-get remove -y powershell || true
-  #         sudo apt-get remove -y r-base-core || true
-  #         sudo apt-get autoremove -y
-  #         sudo apt-get clean
-  #         echo
-  #         echo "Listing top largest packages"
-  #         pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-  #         head -n 30 <<< "${pkgs}"
-  #         echo
-  #         sudo rm -rfv build || true
-  #         sudo rm -rf /usr/share/dotnet || true
-  #         sudo rm -rf /opt/ghc || true
-  #         sudo rm -rf "/usr/local/share/boost" || true
-  #         sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-  #         df -h
-
-  #     - name: Force Install GIT latest
-  #       run: |
-  #         sudo apt-get update \
-  #         && sudo apt-get install -y software-properties-common \
-  #         && sudo apt-get update \
-  #         && sudo add-apt-repository -y ppa:git-core/ppa \
-  #         && sudo apt-get update \
-  #         && sudo apt-get install -y git
-  #     - name: Clone
-  #       uses: actions/checkout@v4
-  #       with:
-  #         submodules: true
-  #     - uses: actions/setup-go@v5
-  #       with:
-  #         go-version: '1.21.x'
-  #         cache: false
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-  #         make install-go-tools
-  #     - name: Install CUDA Dependencies
-  #       run: |
-  #         curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
-  #         sudo dpkg -i cuda-keyring_1.1-1_all.deb
-  #         sudo apt-get update
-  #         sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
-  #       env:
-  #         CUDA_VERSION: 12-5
-  #     - name: Cache grpc
-  #       id: cache-grpc
-  #       uses: actions/cache@v4
-  #       with:
-  #         path: grpc
-  #         key: ${{ runner.os }}-grpc-arm64-${{ env.GRPC_VERSION }}
-  #     - name: Build grpc
-  #       if: steps.cache-grpc.outputs.cache-hit != 'true'
-  #       run: |
-  #         git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-  #         cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-  #         cd cmake/build && cmake -DgRPC_INSTALL=ON \
-  #           -DgRPC_BUILD_TESTS=OFF \
-  #           ../.. && sudo make --jobs 5 --output-sync=target
-  #     - name: Install gRPC
-  #       run: |
-  #         cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-  #     # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
-  #     - name: Build
-  #       id: build
-  #       run: |
-  #         go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-  #         go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-  #         export PATH=$PATH:$GOPATH/bin
-  #         export PATH=/usr/local/cuda/bin:$PATH
-  #         sudo cp /lib64/ld-linux-aarch64.so.1 ld.so
-  #         BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/aarch64-linux-gnu/libdl.so.2 /usr/lib/aarch64-linux-gnu/librt.so.1 /usr/lib/aarch64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
-  #         make -j4 dist
-  #     - uses: actions/upload-artifact@v4
-  #       with:
-  #         name: LocalAI-linux-arm64
-  #         path: release/
-  #     - name: Release
-  #       uses: softprops/action-gh-release@v2
-  #       if: startsWith(github.ref, 'refs/tags/')
-  #       with:
-  #         files: |
-  #           release/*
-  #     - name: Setup tmate session if tests fail
-  #       if: ${{ failure() }}
-  #       uses: mxschmitt/action-tmate@v3.22
-  #       with:
-  #         detached: true
-  #         connect-timeout-seconds: 180
-  #         limit-access-to-actor: true
-  build-linux:
+  goreleaser:
    runs-on: ubuntu-latest
    steps:
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: true
-          swap-storage: true
-
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get remove -y microsoft-edge-stable || true
-          sudo apt-get remove -y firefox || true
-          sudo apt-get remove -y powershell || true
-          sudo apt-get remove -y r-base-core || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          sudo rm -rf /usr/share/dotnet || true
-          sudo rm -rf /opt/ghc || true
-          sudo rm -rf "/usr/local/share/boost" || true
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
-          df -h
-
-      - name: Force Install GIT latest
-        run: |
-          sudo apt-get update \
-          && sudo apt-get install -y software-properties-common \
-          && sudo apt-get update \
-          && sudo add-apt-repository -y ppa:git-core/ppa \
-          && sudo apt-get update \
-          && sudo apt-get install -y git
-      - name: Clone
+      - name: Checkout
        uses: actions/checkout@v4
        with:
-          submodules: true
-      - uses: actions/setup-go@v5
+          fetch-depth: 0
+      - name: Set up Go
+        uses: actions/setup-go@v5
        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
-          make install-go-tools
-      - name: Intel Dependencies
-        run: |
-          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
-          echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
-          sudo apt update
-          sudo apt install -y intel-basekit
-      - name: Install CUDA Dependencies
-        run: |
-          curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt-get update
-          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
+          go-version: 1.23
+      - name: Run GoReleaser
+        uses: goreleaser/goreleaser-action@v6
+        with:
+          version: v2.11.0
+          args: release --clean
        env:
-          CUDA_VERSION: 12-5
-      - name: "Install Hipblas"
-        env:
-          ROCM_VERSION: "6.1"
-          AMDGPU_VERSION: "6.1"
-        run: |
-            set -ex
-
-            sudo apt-get update
-            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
-
-            sudo apt update
-            wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
-            sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
-            sudo apt update
-
-            sudo amdgpu-install --usecase=rocm
-
-            sudo apt-get clean
-            sudo rm -rf /var/lib/apt/lists/*
-            sudo ldconfig
-      - name: Cache grpc
-        id: cache-grpc
-        uses: actions/cache@v4
-        with:
-          path: grpc
-          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
-      - name: Build grpc
-        if: steps.cache-grpc.outputs.cache-hit != 'true'
-        run: |
-          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-          cd grpc && sed -i "216i\  TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
-          cd cmake/build && cmake -DgRPC_INSTALL=ON \
-            -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make --jobs 5 --output-sync=target
-      - name: Install gRPC
-        run: |
-          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
-      # BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
-      - name: Build
-        id: build
-        run: |
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-          export PATH=$PATH:$GOPATH/bin
-          export PATH=/usr/local/cuda/bin:$PATH
-          export PATH=/opt/rocm/bin:$PATH
-          source /opt/intel/oneapi/setvars.sh
-          sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
-          make -j4 dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-linux
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-
-  build-macOS-x86_64:
-    runs-on: macos-13
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc
-          make install-go-tools
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-x86_64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
-
-  build-macOS-arm64:
-    runs-on: macos-14
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21.x'
-          cache: false
-      - name: Dependencies
-        run: |
-          brew install protobuf grpc libomp llvm
-          make install-go-tools
-      - name: Build
-        id: build
-        run: |
-          export C_INCLUDE_PATH=/usr/local/include
-          export CPLUS_INCLUDE_PATH=/usr/local/include
-          export PATH=$PATH:$GOPATH/bin
-          export CC=/opt/homebrew/opt/llvm/bin/clang
-          make dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: LocalAI-MacOS-arm64
-          path: release/
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        with:
-          files: |
-            release/*
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.22
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -75,7 +75,6 @@ jobs:
          rm protoc.zip
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install github.com/GeertJohan/go.rice/rice@latest
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Dependencies
        run: |
@@ -103,7 +102,7 @@ jobs:

          make -C backend/python/transformers

-          make backends/llama-cpp backends/piper backends/whisper backends/stablediffusion-ggml
+          make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
        env:
          CUDA_VERSION: 12-4
      - name: Test
@@ -164,11 +163,10 @@ jobs:
          rm protoc.zip
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-          go install github.com/GeertJohan/go.rice/rice@latest
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Test
        run: |
-            PATH="$PATH:$HOME/go/bin" make backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.22
@@ -199,11 +197,10 @@ jobs:
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
          pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
-          go install github.com/GeertJohan/go.rice/rice@latest
      - name: Build llama-cpp-darwin
        run: |
          make protogen-go
-          make build-api
+          make build
          bash scripts/build-llama-cpp-darwin.sh
          ls -la build/darwin.tar
          mv build/darwin.tar build/llama-cpp.tar