From 1d6c266f9cd9dde41713f73dfa61ef2a767852fe Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 27 Jan 2026 21:33:05 +0000 Subject: [PATCH] Add retry logic to webscraper get_links function Add Modal's built-in Retries decorator to handle transient network failures and Playwright timeouts when scraping external URLs. Uses exponential backoff with max 3 retries, consistent with the approach used in potus_speech_qanda.py. https://claude.ai/code/session_017voJx76EsKYvwHVHpkbSSh --- 10_integrations/webscraper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/10_integrations/webscraper.py b/10_integrations/webscraper.py index 7da4ee35..0c5d9a9d 100644 --- a/10_integrations/webscraper.py +++ b/10_integrations/webscraper.py @@ -127,8 +127,15 @@ playwright_image = modal.Image.debian_slim(python_version="3.10").run_commands( # machine since this will all run in Modal. We can now modify our `get_links` # function to make use of the new tools. +# Since we're fetching from external servers, we use Modal's built-in +# [`Retries`](https://modal.com/docs/reference/modal.Retries) to handle transient +# network failures or server timeouts with exponential backoff. -@app.function(image=playwright_image) + +@app.function( + image=playwright_image, + retries=modal.Retries(max_retries=3, backoff_coefficient=2.0), +) async def get_links(cur_url: str) -> list[str]: from playwright.async_api import async_playwright