diff --git a/02_building_containers/screenshot.py b/02_building_containers/screenshot.py index 75d26f46..125db6f3 100644 --- a/02_building_containers/screenshot.py +++ b/02_building_containers/screenshot.py @@ -2,25 +2,24 @@ # integration-test: false # output-directory: "/tmp/screenshots" # --- -# # Screenshot with headless Chromium +# # Screenshot with Chromium -# In this example, we use Modal functions and the `playwright` package to screenshot websites from a list of urls in parallel. -# Please also see our [introductory guide](/docs/guide/web-scraper) for another example of a web-scraper, with more in-depth examples. +# In this example, we use Modal functions and the `playwright` package to take screenshots +# of websites from a list of URLs in parallel. # -# You can run this example on the command line, like this: +# You can run this example on the command line with # # ``` # python 02_building_containers/screenshot.py 'https://www.youtube.com/watch?v=dQw4w9WgXcQ' # ``` # -# This should take a few seconds then write a `/tmp/screenshots/screenshot.png` file. -# This is what the file should look like: +# This should take a few seconds then create a `/tmp/screenshots/screenshot.png` file, shown below. # # ![screenshot](./screenshot.png) # # ## Setup # -# First we import the `modal` client library: +# First we import the Modal client library. import pathlib import sys @@ -29,7 +28,7 @@ import modal stub = modal.Stub("example-screenshot") -# ## Defining a custom image +# ## Define a custom image # # We need an image with the `playwright` Python package as well as its `chromium` plugin pre-installed. # This requires intalling a few Debian packages, as well as setting up a new Debian repository. @@ -48,7 +47,7 @@ image = modal.Image.debian_slim().run_commands( ], ) -# ## Defining the screenshot function +# ## The screenshot function # # Next, the scraping function which runs headless Chromium, goes to a website, and takes a screenshot. # This is a Modal function which runs inside the remote container. @@ -82,3 +81,6 @@ if __name__ == "__main__": with open(filename, "wb") as f: f.write(data) print(f"wrote {len(data)} bytes to {filename}") + +# And we're done! Please also see our [introductory guide](/docs/guide/web-scraper) for another +# example of a web scraper, with more in-depth logic. diff --git a/03_scaling_out/basic_grid_search.py b/03_scaling_out/basic_grid_search.py index 168146b5..954c9e65 100644 --- a/03_scaling_out/basic_grid_search.py +++ b/03_scaling_out/basic_grid_search.py @@ -1,20 +1,23 @@ -# # Grid search +# # Hyperparameter search +# +# This example showcases a simple grid search in one dimension, where we try different +# parameters for a model and pick the one with the best results on a holdout set. # -# This example showcases a very simplistic grid search in 1-D where we try different parameters for a model -# and pick the one with the best results on a holdout set. - -import modal - # ## Defining the image # # First, let's build a custom image and install scikit-learn in it. -stub = modal.Stub("example-basic-grid-search", image=modal.Image.debian_slim().pip_install(["scikit-learn"])) +import modal + +stub = modal.Stub( + "example-basic-grid-search", + image=modal.Image.debian_slim().pip_install(["scikit-learn"]), +) # ## The Modal function # # Next, define the function. Note that we use the custom image with scikit-learn in it. -# We also take the hyperparameter `k` which is how many nearest neighbors we use. +# We also take the hyperparameter `k`, which is how many nearest neighbors we use. @stub.function @@ -33,10 +36,10 @@ def fit_knn(k): return score, k -# ## Hyperparameter search +# ## Parallel search # -# To do a hyperparameter search, let's map over this function with lots of different values -# for `k`, and then pick whichever `k` has the best score on the holdout set: +# To do a hyperparameter search, let's map over this function with different values +# for `k`, and then select for the best score on the holdout set: if __name__ == "__main__": with stub.run(): diff --git a/03_scaling_out/fetch_stock_prices.py b/03_scaling_out/fetch_stock_prices.py index b067c370..91b0b958 100644 --- a/03_scaling_out/fetch_stock_prices.py +++ b/03_scaling_out/fetch_stock_prices.py @@ -33,7 +33,9 @@ import modal stub = modal.Stub( "example-fetch-stock-prices", - image=modal.Image.debian_slim().pip_install(["requests", "yfinance", "beautifulsoup4", "matplotlib"]), + image=modal.Image.debian_slim().pip_install( + ["requests", "yfinance", "beautifulsoup4", "matplotlib"], + ), ) # ## Fetch a list of tickers @@ -48,7 +50,10 @@ def get_stocks(): import bs4 import requests - headers = {"user-agent": "curl/7.55.1", "referer": "https://finance.yahoo.com/"} + headers = { + "user-agent": "curl/7.55.1", + "referer": "https://finance.yahoo.com/", + } url = "https://finance.yahoo.com/etfs/?count=100&offset=0" res = requests.get(url, headers=headers) soup = bs4.BeautifulSoup(res.text, "html.parser") diff --git a/03_scaling_out/youtube_face_detection.py b/03_scaling_out/youtube_face_detection.py index 8c74a5de..c5fecdc8 100644 --- a/03_scaling_out/youtube_face_detection.py +++ b/03_scaling_out/youtube_face_detection.py @@ -2,7 +2,7 @@ # deploy: true # output-directory: "/tmp" # --- -# # Face detection on Youtube videos +# # Face detection on YouTube videos # # This is an example that uses # [OpenCV](https://github.com/opencv/opencv-python) @@ -44,13 +44,16 @@ FACE_CASCADE_FN = "haarcascade_frontalface_default.xml" image = ( modal.Image.debian_slim() - .run_commands(["apt-get install -y libgl1-mesa-glx libglib2.0-0 wget"]) .run_commands( - [f"wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/{FACE_CASCADE_FN} -P /root"] + [ + "apt-get install -y libgl1-mesa-glx libglib2.0-0 wget", + f"wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/{FACE_CASCADE_FN} -P /root", + ] ) .pip_install(["pytube", "opencv-python", "moviepy"]) ) stub = modal.Stub("example-youtube-face-detection", image=image) + if stub.is_inside(): import cv2 import moviepy.editor @@ -100,7 +103,7 @@ def detect_faces(fn, start, stop): # # The "entrypoint" into Modal controls the main flow of the program: # -# 1. Download the video from Youtube +# 1. Download the video from YouTube # 2. Fan-out face detection of individual 1s clips # 3. Stitch the results back into a new video @@ -138,7 +141,7 @@ def run(url): # # The code we run locally to fire up the Modal job is quite simple # -# * Take a Youtube URL on the command line +# * Take a YouTube URL on the command line # * Run the Modal function # * Store the output data diff --git a/data/covid_datasette.py b/data/covid_datasette.py index f4c19c68..d13cb22b 100644 --- a/data/covid_datasette.py +++ b/data/covid_datasette.py @@ -58,7 +58,7 @@ DB_PATH = pathlib.Path(CACHE_DIR, "covid-19.db") # ## Getting a dataset # -# Johns Hopkins has been publishing up-to-date COVID-19 pandemic data on Github since early February 2020, and +# Johns Hopkins has been publishing up-to-date COVID-19 pandemic data on GitHub since early February 2020, and # as of late September 2022 daily reporting is still rolling in. Their dataset is what this example will use to # show off Modal and Datasette's capabilities. # diff --git a/data/meltano.py b/data/meltano.py index 1b3a75ee..2d470ecc 100644 --- a/data/meltano.py +++ b/data/meltano.py @@ -32,7 +32,7 @@ stub = modal.Stub( ) -# For this example to work, the secret provides a valid Github personal access token +# For this example to work, the secret provides a valid GitHub personal access token # under the key `TAP_GITHUB_ACCESS_TOKEN`. You may provide any other plugin configs that you wish via modal's Secrets. @stub.function( secrets=[