The example for Aquiles-image has been added (#1420)

* The example for Aquiles-image has been added * Fix * Fix print statement formatting in aquiles_image_server.py * fix with ruff * Fix formatting with ruff format
2025-12-09 16:48:44 -06:00
parent 7cf8ec79a0
commit a9e3688861
1 changed files with 267 additions and 0 deletions
--- a/misc/aquiles_image_server.py
+++ b/misc/aquiles_image_server.py
@@ -0,0 +1,267 @@
+# Run Aquiles-Image API server with FLUX.1-Krea-dev on Modal
+
+"""
+[Aquiles-Image](https://github.com/Aquiles-ai/Aquiles-Image) is a production-ready API server that brings state-of-the-art image generation
+models to your applications. Built on FastAPI and Diffusers, it provides an OpenAI-compatible
+interface for generating and editing images using models like FLUX, Stable Diffusion 3.5, and more.
+
+This example shows how to deploy an Aquiles-Image server on Modal using the FLUX.1-Krea-dev model,
+providing a simple REST API for generating images from text prompts on Modal's GPU infrastructure.
+"""
+
+import os
+
+import modal
+
+# ## Set up the container image
+#
+# Our first order of business is to define the environment our server will run in:
+# the [container `Image`](https://modal.com/docs/guide/custom-container).
+#
+# We start with an NVIDIA CUDA base image and install the necessary dependencies:
+# - Git and build tools for installing packages from source
+# - PyTorch 2.8
+# - Diffusers
+# - Transformers and tokenizers for text processing
+# - Aquiles-Image from GitHub for the optimized API server
+#
+# Aquiles-Image provides 3x faster inference compared to vanilla implementations
+# through advanced optimizations and efficient model loading strategies.
+
+aquiles_image = (
+    modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.12")
+    .apt_install(
+        "git",
+        "curl",
+        "build-essential",
+    )
+    .entrypoint([])
+    .run_commands(
+        "python -m pip install --upgrade pip",
+        "python -m pip install --upgrade setuptools wheel",
+    )
+    .uv_pip_install(
+        "torch==2.8",
+        "git+https://github.com/huggingface/diffusers.git",
+        "transformers==4.57.3",
+        "tokenizers==0.22.1",
+        "git+https://github.com/Aquiles-ai/Aquiles-Image.git",
+    )
+    .env(
+        {
+            "HF_XET_HIGH_PERFORMANCE": "1",  # faster model transfers from Hugging Face
+            "HF_TOKEN": os.getenv(
+                "Hugging_face_token_for_deploy", ""
+            ),  # HuggingFace token to download the models if you don't have them available in Modal secrets
+        }
+    )
+)
+
+# ## Select the model
+#
+# We'll be running the FLUX.1-Krea-dev model from Black Forest Labs.
+# This is a powerful text-to-image diffusion model that produces high-quality images.
+#
+# You can swap this model out for any of the compatible models below by changing the string.
+#
+# Note: Larger models may require more VRAM. A single H100 GPU has 80GB of VRAM,
+# which is sufficient for most models listed above.
+
+MODEL_NAME = "black-forest-labs/FLUX.1-Krea-dev"
+
+# ## Cache model weights and configuration
+#
+# Although Aquiles-Image will download weights from Hugging Face on-demand,
+# we want to cache them so we don't do it every time our server starts.
+# We'll use [Modal Volumes](https://modal.com/docs/guide/volumes) for our cache.
+#
+# Modal Volumes are essentially a "shared disk" that all Modal Functions can access
+# like it's a regular disk. For more on storing model weights on Modal, see
+# [this guide](https://modal.com/docs/guide/model-weights).
+
+hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
+aquiles_config_vol = modal.Volume.from_name("aquiles-cache", create_if_missing=True)
+
+# ## Build the Aquiles-Image server and serve it
+#
+# The function below spawns an Aquiles-Image instance listening at port 5500.
+# We wrap it in the [`@modal.web_server` decorator](https://modal.com/docs/guide/webhooks#non-asgi-web-servers)
+# to connect it to the Internet.
+#
+# The server runs in an independent process, via `subprocess.Popen`, and only starts
+# accepting requests once the model is loaded and the `serve` function returns.
+
+app = modal.App("aquiles-image-server")
+
+N_GPU = 1
+MINUTES = 60  # seconds
+AQUILES_PORT = 5500
+
+
+@app.function(
+    image=aquiles_image,
+    secrets=[modal.Secret.from_name("huggingface-secret")],
+    gpu=f"H100:{N_GPU}",
+    scaledown_window=6 * MINUTES,  # how long should we stay up with no requests?
+    timeout=10 * MINUTES,  # how long should we wait for container start?
+    volumes={
+        "/root/.cache/huggingface": hf_cache_vol,
+        "/root/.local/share": aquiles_config_vol,
+    },
+)
+@modal.concurrent(  # how many requests can one replica handle? tune carefully!
+    max_inputs=5
+)
+@modal.web_server(port=AQUILES_PORT, startup_timeout=10 * MINUTES)
+def serve():
+    import subprocess
+
+    # Configure the Aquiles-Image server command with:
+    # - Host and port settings for network access
+    # - Model name and inference steps
+    # - API key for authentication
+    # - Device map to use CUDA GPU
+    cmd = [
+        "aquiles-image",
+        "serve",
+        "--host",
+        "0.0.0.0",
+        "--port",
+        str(AQUILES_PORT),
+        "--model",
+        MODEL_NAME,
+        "--set-steps",
+        "35",  # number of diffusion steps (higher = better quality, slower)
+        "--api-key",
+        "dummy-api-key",  # set your own API key for production
+        "--device-map",
+        "cuda",  # use GPU acceleration
+    ]
+
+    print(f"Starting Aquiles-Image with the model: {MODEL_NAME}")
+    print(f"Command: {' '.join(cmd)}")
+
+    subprocess.Popen(" ".join(cmd), shell=True)
+
+
+# ## Deploy the server
+#
+# To deploy the API on Modal, just run:
+# ```bash
+# modal deploy aquiles_image_server.py
+# ```
+#
+# This will create a new app on Modal, build the container image for it
+# if it hasn't been built yet, and deploy the app.
+
+# ## Interact with the server
+#
+# Once it is deployed, you'll see a URL appear in the command line,
+# something like `https://your-workspace-name--aquiles-image-server-serve.modal.run`.
+#
+# The server provides an **OpenAI-compatible API**, making it a drop-in replacement for
+# OpenAI's image generation endpoints. You can use the official `openai` library
+# to interact with it, making integration seamless with existing code.
+#
+# ### Using the OpenAI Python library
+#
+# ```python
+# # pip install openai
+# from openai import OpenAI
+# import base64
+#
+# client = OpenAI(
+#     base_url="https://your-workspace-name--aquiles-image-server-serve.modal.run",
+#     api_key="dummy-api-key"  # use the same key configured in the server
+# )
+#
+# prompt = "A vast futuristic city curving upward into the sky, its buildings bending and connecting overhead in a continuous loop."
+#
+# result = client.images.generate(
+#     model="black-forest-labs/FLUX.1-Krea-dev",
+#     prompt=prompt,
+#     size="1024x1024",
+#     response_format="b64_json"
+# )
+#
+# # Save the generated image
+# image_bytes = base64.b64decode(result.data[0].b64_json)
+# with open("output.png", "wb") as f:
+#     f.write(image_bytes)
+# ```
+#
+# ### Using curl
+#
+# You can also send POST requests directly using curl:
+# ```bash
+# curl -X POST https://your-url.modal.run/images/generations \
+#   -H "Content-Type: application/json" \
+#   -H "Authorization: Bearer dummy-api-key" \
+#   -d '{
+#     "model": "black-forest-labs/FLUX.1-Krea-dev",
+#     "prompt": "A beautiful sunset over mountains",
+#     "size": "1024x1024"
+#   }'
+# ```
+#
+# ### Additional Endpoints
+#
+# Aquiles-Image provides multiple endpoints depending on your use case:
+# - `/images/generations` - Generate new images from text prompts
+# - `/images/edits` - Edit existing images with text guidance
+# - `/videos` - Generate videos from text prompts (experimental, Wan2.2 model)
+#
+# For full API documentation, visit the `/docs` route of your deployed server:
+# `https://your-workspace-name--aquiles-image-server-serve.modal.run/docs`
+
+# ## Testing the server
+#
+# To make it easier to test the server setup, we include a `local_entrypoint`
+# that generates a test image and saves it locally.
+#
+# If you execute the command:
+# ```bash
+# modal run aquiles_image_server.py
+# ```
+#
+# a fresh replica of the server will be spun up on Modal while
+# the code executes on your local machine.
+#
+# Think of this like writing simple tests inside of the `if __name__ == "__main__"`
+# block of a Python script, but for cloud deployments!
+
+
+@app.local_entrypoint()
+async def test():
+    import base64
+
+    from openai import OpenAI
+
+    url = serve.get_web_url()
+
+    print(f"Server is available at: {url}\n")
+
+    # Create OpenAI client pointing to our Modal server
+    client = OpenAI(base_url=url, api_key="dummy-api-key")
+
+    prompt = """A vast futuristic city curving upward into the sky, its buildings bending
+        and connecting overhead in a continuous loop. Gravity shifts seamlessly along
+        the curve, with sunlight streaming across inverted skyscrapers. The scene feels
+        serene and awe-inspiring—earthlike fields and rivers running along the inner
+        surface of a colossal rotating structure."""
+
+    print(f"Generating image with prompt:\n{prompt}\n")
+
+    # Generate image using OpenAI-compatible API
+    result = client.images.generate(
+        model=MODEL_NAME, prompt=prompt, size="1024x1024", response_format="b64_json"
+    )
+
+    print("Downloading image...\n")
+
+    # Save the generated image
+    image_bytes = base64.b64decode(result.data[0].b64_json)
+    with open("output.png", "wb") as f:
+        f.write(image_bytes)
+
+    print("Image saved successfully as 'output.png'!")