Shababo/build run example timeout fix (#1320)
* set optimum version back to 1.26.1 since 1.27 has come out and breaks dep on transformers * update bpy for blender example * update timeout to handle longer running examples * allow timeout parameter to run_example::main
This commit is contained in:
2
.github/workflows/build-and-run-example.yml
vendored
2
.github/workflows/build-and-run-example.yml
vendored
@@ -26,4 +26,4 @@ jobs:
|
||||
|
||||
- name: Run a random example with MODAL_IGNORE_CACHE set
|
||||
run: |
|
||||
MODAL_IGNORE_CACHE=1 python3 -m internal.run_example
|
||||
MODAL_IGNORE_CACHE=1 python3 -m internal.run_example --timeout 1800
|
||||
|
||||
@@ -39,7 +39,7 @@ app = modal.App("example-blender-video")
|
||||
rendering_image = (
|
||||
modal.Image.debian_slim(python_version="3.11")
|
||||
.apt_install("xorg", "libxkbcommon0") # X11 (Unix GUI) dependencies
|
||||
.pip_install("bpy==4.1.0") # Blender as a Python package
|
||||
.pip_install("bpy==4.5.0") # Blender as a Python package
|
||||
)
|
||||
|
||||
# ## Rendering a single frame
|
||||
|
||||
@@ -104,9 +104,10 @@ infinity_image = (
|
||||
"hf_transfer==0.1.9", # for fast huggingface data download
|
||||
"huggingface_hub[hf_xet]==0.33.2",
|
||||
"tqdm==4.67.1", # progress bar for dataset download
|
||||
"infinity_emb[all]==0.0.76", # for Infinity inference lib
|
||||
"sentencepiece==0.2.0", # for this particular chosen model
|
||||
"torchvision==0.22.1", # for fast image loading
|
||||
"infinity_emb[all]==0.0.76", # for Infinity inference lib
|
||||
"optimum==1.26.1", # need to pin this because newer version requires
|
||||
]
|
||||
)
|
||||
.env(
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
@@ -7,18 +8,20 @@ import time
|
||||
from . import utils
|
||||
|
||||
MINUTES = 60
|
||||
TIMEOUT = 12 * MINUTES
|
||||
DEFAULT_TIMEOUT = 12 * MINUTES
|
||||
|
||||
|
||||
def run_script(example):
|
||||
def run_script(example, timeout=DEFAULT_TIMEOUT):
|
||||
t0 = time.time()
|
||||
|
||||
print(f"Running example {example.stem} with timeout {timeout}s")
|
||||
|
||||
try:
|
||||
print(f"cli args: {example.cli_args}")
|
||||
process = subprocess.run(
|
||||
[str(x) for x in example.cli_args],
|
||||
env=os.environ | example.env | {"MODAL_SERVE_TIMEOUT": "5.0"},
|
||||
timeout=TIMEOUT,
|
||||
timeout=timeout,
|
||||
)
|
||||
total_time = time.time() - t0
|
||||
if process.returncode == 0:
|
||||
@@ -31,32 +34,37 @@ def run_script(example):
|
||||
returncode = process.returncode
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f"Past timeout of {TIMEOUT}s :(")
|
||||
print(f"Past timeout of {timeout}s :(")
|
||||
returncode = 999
|
||||
|
||||
return returncode
|
||||
|
||||
|
||||
def run_single_example(stem):
|
||||
def run_single_example(stem, timeout=DEFAULT_TIMEOUT):
|
||||
examples = utils.get_examples()
|
||||
for example in examples:
|
||||
if stem == example.stem and example.metadata.get("lambda-test", True):
|
||||
return run_script(example)
|
||||
return run_script(example, timeout=timeout)
|
||||
else:
|
||||
print(f"Could not find example name {stem}")
|
||||
return 0
|
||||
|
||||
|
||||
def run_random_example():
|
||||
def run_random_example(timeout=DEFAULT_TIMEOUT):
|
||||
examples = filter(
|
||||
lambda ex: ex.metadata and ex.metadata.get("lambda-test", True),
|
||||
utils.get_examples(),
|
||||
)
|
||||
return run_script(random.choice(list(examples)))
|
||||
return run_script(random.choice(list(examples)), timeout=timeout)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
sys.exit(run_single_example(sys.argv[1]))
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("example", nargs="?", default=None)
|
||||
parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT)
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
if args.example:
|
||||
sys.exit(run_single_example(args.example, timeout=args.timeout))
|
||||
else:
|
||||
sys.exit(run_random_example())
|
||||
sys.exit(run_random_example(timeout=args.timeout))
|
||||
|
||||
Reference in New Issue
Block a user