Remove modal.NetworkFileSystem model loading usage (pt-2) (#383)
This commit is contained in:
@@ -59,7 +59,7 @@ if stub.is_inside():
|
||||
|
||||
# For temporary storage and sharing of downloaded movie clips, we use a network file system.
|
||||
|
||||
stub.sv = modal.NetworkFileSystem.new()
|
||||
stub.net_file_system = modal.NetworkFileSystem.new()
|
||||
|
||||
# ### Face detection function
|
||||
#
|
||||
@@ -73,7 +73,9 @@ stub.sv = modal.NetworkFileSystem.new()
|
||||
# and stores the resulting video back to the shared storage.
|
||||
|
||||
|
||||
@stub.function(network_file_systems={"/clips": stub.sv}, timeout=600)
|
||||
@stub.function(
|
||||
network_file_systems={"/clips": stub.net_file_system}, timeout=600
|
||||
)
|
||||
def detect_faces(fn, start, stop):
|
||||
# Extract the subclip from the video
|
||||
clip = moviepy.editor.VideoFileClip(fn).subclip(start, stop)
|
||||
@@ -106,7 +108,7 @@ def detect_faces(fn, start, stop):
|
||||
# 3. Stitch the results back into a new video
|
||||
|
||||
|
||||
@stub.function(network_file_systems={"/clips": stub.sv}, retries=1)
|
||||
@stub.function(network_file_systems={"/clips": stub.net_file_system}, retries=1)
|
||||
def process_video(url):
|
||||
print(f"Downloading video from '{url}'")
|
||||
yt = pytube.YouTube(url)
|
||||
|
||||
@@ -38,27 +38,41 @@ from fastapi.staticfiles import StaticFiles
|
||||
from modal import (
|
||||
Image,
|
||||
Mount,
|
||||
Secret,
|
||||
NetworkFileSystem,
|
||||
Stub,
|
||||
method,
|
||||
asgi_app,
|
||||
)
|
||||
|
||||
# We mainly need to install [transformers](https://github.com/huggingface/transformers)
|
||||
|
||||
# We need to install [transformers](https://github.com/huggingface/transformers)
|
||||
# which is a package Huggingface uses for all their models, but also
|
||||
# [Pillow](https://python-pillow.org/) which lets us work with images from Python,
|
||||
# and a system font for drawing.
|
||||
#
|
||||
# This example uses the `facebook/detr-resnet-50` pre-trained model, which is downloaded
|
||||
# one at image build time using the `download_model` function and saved into the image.
|
||||
# 'Baking' models into the `modal.Image` at build time provided the fastest cold start.
|
||||
|
||||
model_repo_id = "facebook/detr-resnet-50"
|
||||
|
||||
|
||||
def download_model():
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
snapshot_download(repo_id=model_repo_id, cache_dir="/cache")
|
||||
|
||||
|
||||
stub = Stub("example-webcam-object-detection")
|
||||
image = (
|
||||
Image.debian_slim()
|
||||
.pip_install(
|
||||
"huggingface-hub==0.16.4",
|
||||
"Pillow",
|
||||
"timm",
|
||||
"transformers",
|
||||
)
|
||||
.apt_install("fonts-freefont-ttf")
|
||||
.run_function(download_model)
|
||||
)
|
||||
|
||||
|
||||
@@ -69,8 +83,8 @@ image = (
|
||||
# * There's a container initialization step in the `__enter__` method, which
|
||||
# runs on every container start. This lets us load the model only once per
|
||||
# container, so that it's reused for subsequent function calls.
|
||||
# * We store the model in a network file system. This lets us download the model only
|
||||
# the first time the function is ever called.
|
||||
# * Above we stored the model in the container image. This lets us download the model only
|
||||
# when the image is (re)built, and not everytime the function is called.
|
||||
# * We're running it on multiple CPUs for extra performance
|
||||
#
|
||||
# Note that the function takes an image and returns a new image.
|
||||
@@ -82,21 +96,19 @@ image = (
|
||||
|
||||
@stub.cls(
|
||||
cpu=4,
|
||||
network_file_systems={"/cache": NetworkFileSystem.new()},
|
||||
image=image,
|
||||
secret=Secret.from_dict(
|
||||
{"TORCH_HOME": "/cache", "TRANSFORMERS_CACHE": "/cache"}
|
||||
),
|
||||
)
|
||||
class ObjectDetection:
|
||||
def __enter__(self):
|
||||
from transformers import DetrFeatureExtractor, DetrForObjectDetection
|
||||
from transformers import DetrImageProcessor, DetrForObjectDetection
|
||||
|
||||
self.feature_extractor = DetrFeatureExtractor.from_pretrained(
|
||||
"facebook/detr-resnet-50"
|
||||
self.feature_extractor = DetrImageProcessor.from_pretrained(
|
||||
model_repo_id,
|
||||
cache_dir="/cache",
|
||||
)
|
||||
self.model = DetrForObjectDetection.from_pretrained(
|
||||
"facebook/detr-resnet-50"
|
||||
model_repo_id,
|
||||
cache_dir="/cache",
|
||||
)
|
||||
|
||||
@method()
|
||||
@@ -113,8 +125,12 @@ class ObjectDetection:
|
||||
inputs = self.feature_extractor(image, return_tensors="pt")
|
||||
outputs = self.model(**inputs)
|
||||
img_size = torch.tensor([tuple(reversed(image.size))])
|
||||
processed_outputs = self.feature_extractor.post_process(
|
||||
outputs, img_size
|
||||
processed_outputs = (
|
||||
self.feature_extractor.post_process_object_detection(
|
||||
outputs=outputs,
|
||||
target_sizes=img_size,
|
||||
threshold=0,
|
||||
)
|
||||
)
|
||||
output_dict = processed_outputs[0]
|
||||
|
||||
@@ -131,7 +147,7 @@ class ObjectDetection:
|
||||
)
|
||||
output_image = Image.new("RGBA", (image.width, image.height))
|
||||
output_image_draw = ImageDraw.Draw(output_image)
|
||||
for score, box, label in zip(scores, boxes, labels):
|
||||
for _score, box, label in zip(scores, boxes, labels):
|
||||
color = colors[label % len(colors)]
|
||||
text = self.model.config.id2label[label]
|
||||
box = tuple(map(int, box))
|
||||
|
||||
@@ -16,7 +16,7 @@ import io
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from modal import Image, Secret, NetworkFileSystem, Stub, web_endpoint
|
||||
from modal import Image, Secret, Stub, web_endpoint
|
||||
|
||||
# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
|
||||
# the application. Let's give it a friendly name.
|
||||
@@ -40,58 +40,69 @@ stub = Stub("example-stable-diff-bot")
|
||||
#
|
||||
# 
|
||||
#
|
||||
# ### Model cache
|
||||
# ### Model caching
|
||||
#
|
||||
# The `diffusers` library downloads the weights for a pre-trained model to a local
|
||||
# directory, if those weights don't already exist. To decrease start-up time, we want
|
||||
# this download to happen just once, even across separate function invocations.
|
||||
# To accomplish this, we use a [`NetworkFileSystem`](/docs/guide/shared-volumes), a
|
||||
# writable volume that can be attached to Modal functions and persisted across function runs.
|
||||
|
||||
volume = NetworkFileSystem.persisted("stable-diff-model-vol")
|
||||
|
||||
# ### The actual function
|
||||
#
|
||||
# Now that we have our token and `NetworkFileSystem` set up, we can put everything together.
|
||||
#
|
||||
# Let's define a function that takes a text prompt and an optional channel name
|
||||
# (so we can post results to Slack if the value is set) and runs stable diffusion.
|
||||
# The `@stub.function()` decorator declares all the resources this function will
|
||||
# use: we configure it to use a GPU, run on an image that has all the packages we
|
||||
# need to run the model, mount the `NetworkFileSystem` to a path of our choice, and
|
||||
# also provide it the secret that contains the token we created above.
|
||||
#
|
||||
# By setting the `cache_dir` argument for the model to the mount path of our
|
||||
# `NetworkFileSystem`, we ensure that the model weights are downloaded only once.
|
||||
# To accomplish this, we use simple function that will run at image build time and save the model into
|
||||
# the image's filesystem.
|
||||
|
||||
CACHE_PATH = "/root/model_cache"
|
||||
|
||||
|
||||
@stub.function(
|
||||
gpu="A10G",
|
||||
image=(
|
||||
Image.debian_slim()
|
||||
.run_commands(
|
||||
"pip install torch --extra-index-url https://download.pytorch.org/whl/cu117"
|
||||
)
|
||||
.pip_install("diffusers", "transformers", "scipy", "ftfy", "accelerate")
|
||||
),
|
||||
network_file_systems={CACHE_PATH: volume},
|
||||
secret=Secret.from_name("huggingface-secret"),
|
||||
)
|
||||
async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None):
|
||||
def fetch_model(local_files_only: bool = False):
|
||||
from diffusers import StableDiffusionPipeline
|
||||
from torch import float16
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
return StableDiffusionPipeline.from_pretrained(
|
||||
"runwayml/stable-diffusion-v1-5",
|
||||
use_auth_token=os.environ["HUGGINGFACE_TOKEN"],
|
||||
revision="fp16",
|
||||
variant="fp16",
|
||||
torch_dtype=float16,
|
||||
cache_dir=CACHE_PATH,
|
||||
device_map="auto",
|
||||
cache_dir=CACHE_PATH, # reads model saved in the modal.Image's filesystem.
|
||||
local_files_only=local_files_only,
|
||||
)
|
||||
|
||||
|
||||
image = (
|
||||
Image.debian_slim()
|
||||
.run_commands(
|
||||
"pip install torch --extra-index-url https://download.pytorch.org/whl/cu117"
|
||||
)
|
||||
.pip_install(
|
||||
"diffusers",
|
||||
"huggingface-hub",
|
||||
"safetensors",
|
||||
"transformers",
|
||||
"scipy",
|
||||
"ftfy",
|
||||
"accelerate",
|
||||
)
|
||||
.run_function(fetch_model, secret=Secret.from_name("huggingface-secret"))
|
||||
)
|
||||
|
||||
# ### The actual function
|
||||
#
|
||||
# Now that we have our token and `modal.Image` set up, we can put everything together.
|
||||
#
|
||||
# Let's define a function that takes a text prompt and an optional channel name
|
||||
# (so we can post results to Slack if the value is set) and runs stable diffusion.
|
||||
# The `@stub.function()` decorator declares all the resources this function will
|
||||
# use: we configure it to use a GPU, run on an image that has all the packages and files we
|
||||
# need to run the model, and
|
||||
# also provide it the secret that contains the token we created above.
|
||||
|
||||
|
||||
@stub.function(
|
||||
gpu="A10G",
|
||||
image=image,
|
||||
secret=Secret.from_name("huggingface-secret"),
|
||||
)
|
||||
async def run_stable_diffusion(prompt: str, channel_name: Optional[str] = None):
|
||||
pipe = fetch_model(local_files_only=True)
|
||||
|
||||
image = pipe(prompt, num_inference_steps=100).images[0]
|
||||
|
||||
# Convert PIL Image to PNG byte array.
|
||||
|
||||
@@ -13,21 +13,41 @@ from typing import List
|
||||
|
||||
import modal
|
||||
|
||||
stub = modal.Stub(name="example-news-summarizer")
|
||||
|
||||
# ## Building Images and Downloading Pre-trained Model
|
||||
#
|
||||
# We start by defining our images. In Modal, each function can use a different
|
||||
# image. This is powerful because you add only the dependencies you need for
|
||||
# each function.
|
||||
|
||||
stub = modal.Stub(name="example-news-summarizer")
|
||||
MODEL_NAME = "google/pegasus-xsum"
|
||||
CACHE_DIR = "/cache"
|
||||
|
||||
# The first image contains dependencies for running our model. We also download the
|
||||
# pre-trained model into the image using the `huggingface` API. This caches the model so that
|
||||
# we don't have to download it on every function call.
|
||||
stub["deep_learning_image"] = modal.Image.debian_slim().pip_install(
|
||||
"transformers==4.16.2", "torch", "sentencepiece"
|
||||
# pre-trained model into the image using the `from_pretrained` method.
|
||||
# This caches the model so that we don't have to download it on every function call.
|
||||
# The model will be saved at `/cache` when this function is called at image build time;
|
||||
# subsequent calls of this function at runtime will then load the model from `/cache`.
|
||||
|
||||
|
||||
def fetch_model(local_files_only: bool = False):
|
||||
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
|
||||
|
||||
tokenizer = PegasusTokenizer.from_pretrained(
|
||||
"google/pegasus-xsum",
|
||||
cache_dir="/cache",
|
||||
local_files_only=local_files_only,
|
||||
)
|
||||
model = PegasusForConditionalGeneration.from_pretrained(
|
||||
"google/pegasus-xsum",
|
||||
cache_dir="/cache",
|
||||
local_files_only=local_files_only,
|
||||
)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
stub["deep_learning_image"] = (
|
||||
modal.Image.debian_slim()
|
||||
.pip_install("transformers==4.16.2", "torch", "sentencepiece")
|
||||
.run_function(fetch_model)
|
||||
)
|
||||
|
||||
# Defining the scraping image is very similar. This image only contains the packages required
|
||||
@@ -36,19 +56,6 @@ stub["scraping_image"] = modal.Image.debian_slim().pip_install(
|
||||
"requests", "beautifulsoup4", "lxml"
|
||||
)
|
||||
|
||||
volume = modal.NetworkFileSystem.persisted("pegasus-modal-vol")
|
||||
|
||||
# We will also instantiate the model and tokenizer globally so it’s available for all functions that use this image.
|
||||
if stub.is_inside(stub["deep_learning_image"]):
|
||||
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
|
||||
|
||||
TOKENIZER = PegasusTokenizer.from_pretrained(
|
||||
MODEL_NAME, cache_dir=CACHE_DIR
|
||||
)
|
||||
MODEL = PegasusForConditionalGeneration.from_pretrained(
|
||||
MODEL_NAME, cache_dir=CACHE_DIR
|
||||
)
|
||||
|
||||
|
||||
if stub.is_inside(stub["scraping_image"]):
|
||||
import requests
|
||||
@@ -145,18 +152,21 @@ def scrape_nyc_article(url: str) -> str:
|
||||
@stub.function(
|
||||
image=stub["deep_learning_image"],
|
||||
gpu=False,
|
||||
network_file_systems={CACHE_DIR: volume},
|
||||
memory=4096,
|
||||
)
|
||||
def summarize_article(text: str) -> str:
|
||||
print(f"Summarizing text with {len(text)} characters.")
|
||||
|
||||
# `local_files_only` is set to `True` because we expect to read the model
|
||||
# files saved in the image.
|
||||
model, tokenizer = fetch_model(local_files_only=True)
|
||||
|
||||
# summarize text
|
||||
batch = TOKENIZER(
|
||||
batch = tokenizer(
|
||||
[text], truncation=True, padding="longest", return_tensors="pt"
|
||||
).to("cpu")
|
||||
translated = MODEL.generate(**batch)
|
||||
summary = TOKENIZER.batch_decode(translated, skip_special_tokens=True)[0]
|
||||
translated = model.generate(**batch)
|
||||
summary = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
Reference in New Issue
Block a user