Remove modal.NetworkFileSystem model loading usage (pt-2) (#383)

2023-07-30 13:53:49 -04:00
parent ddc1cb71df
commit 32c111007a
4 changed files with 119 additions and 80 deletions
--- a/03_scaling_out/youtube_face_detection.py
+++ b/03_scaling_out/youtube_face_detection.py
@@ -59,7 +59,7 @@ if stub.is_inside():

 # For temporary storage and sharing of downloaded movie clips, we use a network file system.

-stub.sv = modal.NetworkFileSystem.new()
+stub.net_file_system = modal.NetworkFileSystem.new()

 # ### Face detection function
 #
@@ -73,7 +73,9 @@ stub.sv = modal.NetworkFileSystem.new()
 # and stores the resulting video back to the shared storage.


-@stub.function(network_file_systems={"/clips": stub.sv}, timeout=600)
+@stub.function(
+    network_file_systems={"/clips": stub.net_file_system}, timeout=600
+)
 def detect_faces(fn, start, stop):
    # Extract the subclip from the video
    clip = moviepy.editor.VideoFileClip(fn).subclip(start, stop)
@@ -106,7 +108,7 @@ def detect_faces(fn, start, stop):
 # 3. Stitch the results back into a new video


-@stub.function(network_file_systems={"/clips": stub.sv}, retries=1)
+@stub.function(network_file_systems={"/clips": stub.net_file_system}, retries=1)
 def process_video(url):
    print(f"Downloading video from '{url}'")
    yt = pytube.YouTube(url)
--- a/06_gpu_and_ml/obj_detection_webcam/webcam.py
+++ b/06_gpu_and_ml/obj_detection_webcam/webcam.py
@@ -38,27 +38,41 @@ from fastapi.staticfiles import StaticFiles
 from modal import (
    Image,
    Mount,
-    Secret,
-    NetworkFileSystem,
    Stub,
    method,
    asgi_app,
 )

-# We mainly need to install [transformers](https://github.com/huggingface/transformers)
+
+# We need to install [transformers](https://github.com/huggingface/transformers)
 # which is a package Huggingface uses for all their models, but also
 # [Pillow](https://python-pillow.org/) which lets us work with images from Python,
 # and a system font for drawing.
+#
+# This example uses the `facebook/detr-resnet-50` pre-trained model, which is downloaded
+# one at image build time using the `download_model` function and saved into the image.
+# 'Baking' models into the `modal.Image` at build time provided the fastest cold start.
+
+model_repo_id = "facebook/detr-resnet-50"
+
+
+def download_model():
+    from huggingface_hub import snapshot_download
+
+    snapshot_download(repo_id=model_repo_id, cache_dir="/cache")
+

 stub = Stub("example-webcam-object-detection")
 image = (
    Image.debian_slim()
    .pip_install(
+        "huggingface-hub==0.16.4",
        "Pillow",
        "timm",
        "transformers",
    )
    .apt_install("fonts-freefont-ttf")
+    .run_function(download_model)
 )


@@ -69,8 +83,8 @@ image = (
 # * There's a container initialization step in the `__enter__` method, which
 #   runs on every container start. This lets us load the model only once per
 #   container, so that it's reused for subsequent function calls.
-# * We store the model in a network file system. This lets us download the model only
-#   the first time the function is ever called.
+# * Above we stored the model in the container image. This lets us download the model only
+#   when the image is (re)built, and not everytime the function is called.
 # * We're running it on multiple CPUs for extra performance
 #
 # Note that the function takes an image and returns a new image.
@@ -82,21 +96,19 @@ image = (

@stub.cls(
    cpu=4,
-    network_file_systems={"/cache": NetworkFileSystem.new()},
    image=image,
-    secret=Secret.from_dict(
-        {"TORCH_HOME": "/cache", "TRANSFORMERS_CACHE": "/cache"}
-    ),
 )
 class ObjectDetection:
    def __enter__(self):
-        from transformers import DetrFeatureExtractor, DetrForObjectDetection
+        from transformers import DetrImageProcessor, DetrForObjectDetection

-        self.feature_extractor = DetrFeatureExtractor.from_pretrained(
-            "facebook/detr-resnet-50"
+        self.feature_extractor = DetrImageProcessor.from_pretrained(
+            model_repo_id,
+            cache_dir="/cache",
        )
        self.model = DetrForObjectDetection.from_pretrained(
-            "facebook/detr-resnet-50"
+            model_repo_id,
+            cache_dir="/cache",
        )

    @method()
@@ -113,8 +125,12 @@ class ObjectDetection:
        inputs = self.feature_extractor(image, return_tensors="pt")
        outputs = self.model(**inputs)
        img_size = torch.tensor([tuple(reversed(image.size))])
-        processed_outputs = self.feature_extractor.post_process(
-            outputs, img_size
+        processed_outputs = (
+            self.feature_extractor.post_process_object_detection(
+                outputs=outputs,
+                target_sizes=img_size,
+                threshold=0,
+            )
        )
        output_dict = processed_outputs[0]

@@ -131,7 +147,7 @@ class ObjectDetection:
        )
        output_image = Image.new("RGBA", (image.width, image.height))
        output_image_draw = ImageDraw.Draw(output_image)
-        for score, box, label in zip(scores, boxes, labels):
+        for _score, box, label in zip(scores, boxes, labels):
            color = colors[label % len(colors)]
            text = self.model.config.id2label[label]
            box = tuple(map(int, box))
--- a/10_integrations/stable_diffusion_slackbot.py
+++ b/10_integrations/stable_diffusion_slackbot.py
@@ -16,7 +16,7 @@ import io
 import os
 from typing import Optional

-from modal import Image, Secret, NetworkFileSystem, Stub, web_endpoint
+from modal import Image, Secret, Stub, web_endpoint

 # All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
 # the application. Let's give it a friendly name.
@@ -40,58 +40,69 @@ stub = Stub("example-stable-diff-bot")
 #
 # ![create a huggingface token](./huggingface_token.png)
 #
-# ### Model cache
+# ### Model caching
 #
 # The `diffusers` library downloads the weights for a pre-trained model to a local
 # directory, if those weights don't already exist. To decrease start-up time, we want
 # this download to happen just once, even across separate function invocations.
-# To accomplish this, we use a [`NetworkFileSystem`](/docs/guide/shared-volumes), a
-# writable volume that can be attached to Modal functions and persisted across function runs.
-
-volume = NetworkFileSystem.persisted("stable-diff-model-vol")
-
-# ### The actual function
-#
-# Now that we have our token and `NetworkFileSystem` set up, we can put everything together.
-#
-# Let's define a function that takes a text prompt and an optional channel name
-# (so we can post results to Slack if the value is set) and runs stable diffusion.
-# The `@stub.function()` decorator declares all the resources this function will
-# use: we configure it to use a GPU, run on an image that has all the packages we
-# need to run the model, mount the `NetworkFileSystem` to a path of our choice, and
-# also provide it the secret that contains the token we created above.
-#
-# By setting the `cache_dir` argument for the model to the mount path of our
-# `NetworkFileSystem`, we ensure that the model weights are downloaded only once.
+# To accomplish this, we use simple function that will run at image build time and save the model into
+# the image's filesystem.

 CACHE_PATH = "/root/model_cache"


-@stub.function(
-    gpu="A10G",
-    image=(
-        Image.debian_slim()
-        .run_commands(
-            "pip install torch --extra-index-url https://download.pytorch.org/whl/cu117"
-        )
-        .pip_install("diffusers", "transformers", "scipy", "ftfy", "accelerate")
-    ),
-    network_file_systems={CACHE_PATH: volume},
-    secret=Secret.from_name("huggingface-secret"),
-)
-async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None):
+def fetch_model(local_files_only: bool = False):
    from diffusers import StableDiffusionPipeline
    from torch import float16

-    pipe = StableDiffusionPipeline.from_pretrained(
+    return StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        use_auth_token=os.environ["HUGGINGFACE_TOKEN"],
-        revision="fp16",
+        variant="fp16",
        torch_dtype=float16,
-        cache_dir=CACHE_PATH,
        device_map="auto",
+        cache_dir=CACHE_PATH,  # reads model saved in the modal.Image's filesystem.
+        local_files_only=local_files_only,
    )

+
+image = (
+    Image.debian_slim()
+    .run_commands(
+        "pip install torch --extra-index-url https://download.pytorch.org/whl/cu117"
+    )
+    .pip_install(
+        "diffusers",
+        "huggingface-hub",
+        "safetensors",
+        "transformers",
+        "scipy",
+        "ftfy",
+        "accelerate",
+    )
+    .run_function(fetch_model, secret=Secret.from_name("huggingface-secret"))
+)
+
+# ### The actual function
+#
+# Now that we have our token and `modal.Image` set up, we can put everything together.
+#
+# Let's define a function that takes a text prompt and an optional channel name
+# (so we can post results to Slack if the value is set) and runs stable diffusion.
+# The `@stub.function()` decorator declares all the resources this function will
+# use: we configure it to use a GPU, run on an image that has all the packages and files we
+# need to run the model, and
+# also provide it the secret that contains the token we created above.
+
+
+@stub.function(
+    gpu="A10G",
+    image=image,
+    secret=Secret.from_name("huggingface-secret"),
+)
+async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None):
+    pipe = fetch_model(local_files_only=True)
+
    image = pipe(prompt, num_inference_steps=100).images[0]

    # Convert PIL Image to PNG byte array.
--- a/misc/news_summarizer.py
+++ b/misc/news_summarizer.py
@@ -13,21 +13,41 @@ from typing import List

 import modal

+stub = modal.Stub(name="example-news-summarizer")
+
 # ## Building Images and Downloading Pre-trained Model
 #
 # We start by defining our images. In Modal, each function can use a different
 # image. This is powerful because you add only the dependencies you need for
 # each function.

-stub = modal.Stub(name="example-news-summarizer")
-MODEL_NAME = "google/pegasus-xsum"
-CACHE_DIR = "/cache"
-
 # The first image contains dependencies for running our model. We also download the
-# pre-trained model into the image using the `huggingface` API. This caches the model so that
-# we don't have to download it on every function call.
-stub["deep_learning_image"] = modal.Image.debian_slim().pip_install(
-    "transformers==4.16.2", "torch", "sentencepiece"
+# pre-trained model into the image using the `from_pretrained` method.
+# This caches the model so that we don't have to download it on every function call.
+# The model will be saved at `/cache` when this function is called at image build time;
+# subsequent calls of this function at runtime will then load the model from `/cache`.
+
+
+def fetch_model(local_files_only: bool = False):
+    from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+
+    tokenizer = PegasusTokenizer.from_pretrained(
+        "google/pegasus-xsum",
+        cache_dir="/cache",
+        local_files_only=local_files_only,
+    )
+    model = PegasusForConditionalGeneration.from_pretrained(
+        "google/pegasus-xsum",
+        cache_dir="/cache",
+        local_files_only=local_files_only,
+    )
+    return model, tokenizer
+
+
+stub["deep_learning_image"] = (
+    modal.Image.debian_slim()
+    .pip_install("transformers==4.16.2", "torch", "sentencepiece")
+    .run_function(fetch_model)
 )

 # Defining the scraping image is very similar. This image only contains the packages required
@@ -36,19 +56,6 @@ stub["scraping_image"] = modal.Image.debian_slim().pip_install(
    "requests", "beautifulsoup4", "lxml"
 )

-volume = modal.NetworkFileSystem.persisted("pegasus-modal-vol")
-
-# We will also instantiate the model and tokenizer globally so it’s available for all functions that use this image.
-if stub.is_inside(stub["deep_learning_image"]):
-    from transformers import PegasusForConditionalGeneration, PegasusTokenizer
-
-    TOKENIZER = PegasusTokenizer.from_pretrained(
-        MODEL_NAME, cache_dir=CACHE_DIR
-    )
-    MODEL = PegasusForConditionalGeneration.from_pretrained(
-        MODEL_NAME, cache_dir=CACHE_DIR
-    )
-

 if stub.is_inside(stub["scraping_image"]):
    import requests
@@ -145,18 +152,21 @@ def scrape_nyc_article(url: str) -> str:
@stub.function(
    image=stub["deep_learning_image"],
    gpu=False,
-    network_file_systems={CACHE_DIR: volume},
    memory=4096,
 )
 def summarize_article(text: str) -> str:
    print(f"Summarizing text with {len(text)} characters.")

+    # `local_files_only` is set to `True` because we expect to read the model
+    # files saved in the image.
+    model, tokenizer = fetch_model(local_files_only=True)
+
    # summarize text
-    batch = TOKENIZER(
+    batch = tokenizer(
        [text], truncation=True, padding="longest", return_tensors="pt"
    ).to("cpu")
-    translated = MODEL.generate(**batch)
-    summary = TOKENIZER.batch_decode(translated, skip_special_tokens=True)[0]
+    translated = model.generate(**batch)
+    summary = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]

    return summary