From 1d6c266f9cd9dde41713f73dfa61ef2a767852fe Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 27 Jan 2026 21:33:05 +0000
Subject: [PATCH] Add retry logic to webscraper get_links function

Add Modal's built-in Retries decorator to handle transient network
failures and Playwright timeouts when scraping external URLs. Uses
exponential backoff with max 3 retries, consistent with the approach
used in potus_speech_qanda.py.

https://claude.ai/code/session_017voJx76EsKYvwHVHpkbSSh
---
 10_integrations/webscraper.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/10_integrations/webscraper.py b/10_integrations/webscraper.py
index 7da4ee35..0c5d9a9d 100644
--- a/10_integrations/webscraper.py
+++ b/10_integrations/webscraper.py
@@ -127,8 +127,15 @@ playwright_image = modal.Image.debian_slim(python_version="3.10").run_commands(
 # machine since this will all run in Modal. We can now modify our `get_links`
 # function to make use of the new tools.
 
+# Since we're fetching from external servers, we use Modal's built-in
+# [`Retries`](https://modal.com/docs/reference/modal.Retries) to handle transient
+# network failures or server timeouts with exponential backoff.
 
-@app.function(image=playwright_image)
+
+@app.function(
+    image=playwright_image,
+    retries=modal.Retries(max_retries=3, backoff_coefficient=2.0),
+)
 async def get_links(cur_url: str) -> list[str]:
     from playwright.async_api import async_playwright