diff --git a/doc/source/conf.py b/doc/source/conf.py index 9072b264195a..d29fa0b11713 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -242,7 +242,6 @@ def __init__(self, version: str): # Other misc files (overviews, console-only examples, etc) "ray-overview/examples/llamafactory-llm-fine-tune/README.ipynb", "ray-overview/examples/llamafactory-llm-fine-tune/**/*.ipynb", - "serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb", "serve/tutorials/video-analysis/*.ipynb", # Legacy/backward compatibility "ray-overview/examples/**/README.md", diff --git a/doc/source/serve/examples.yml b/doc/source/serve/examples.yml index f035effa63c0..cccd8ff6afcc 100644 --- a/doc/source/serve/examples.yml +++ b/doc/source/serve/examples.yml @@ -153,7 +153,7 @@ examples: skill_level: advanced use_cases: - generative ai - link: tutorials/asynchronous-inference/content/README + link: tutorials/asynchronous-inference/content/asynchronous-inference related_technology: integrations - title: Video Analysis Inference Pipeline skill_level: advanced @@ -163,5 +163,5 @@ examples: related_technology: ml applications - title: Integrate with MLflow Model Registry skill_level: intermediate - link: mlflow-serving-intig + link: model-registries related_technology: integrations diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/README.md b/doc/source/serve/tutorials/asynchronous-inference/content/README.md index fb85c632099b..26e1602b7c0c 100644 --- a/doc/source/serve/tutorials/asynchronous-inference/content/README.md +++ b/doc/source/serve/tutorials/asynchronous-inference/content/README.md @@ -1,6 +1,3 @@ ---- -orphan: true ---- # Asynchronous Inference with Ray Serve **⏱️ Time to complete:** 30 minutes @@ -57,14 +54,14 @@ Redis serves as both the message broker (task queue) and result backend. **Install and start Redis (Google Colab compatible):** -```bash +```python # Install and start Redis server -sudo apt-get update -qq -sudo apt-get install -y redis-server -redis-server --port 6399 --save "" --appendonly no --daemonize yes +!sudo apt-get update -qq +!sudo apt-get install -y redis-server +!redis-server --port 6399 --save "" --appendonly no --daemonize yes # Verify Redis is running -redis-cli -p 6399 ping +!redis-cli -p 6399 ping ``` **Alternative methods:** @@ -73,11 +70,16 @@ redis-cli -p 6399 ping - **Docker:** `docker run -d -p 6379:6379 redis:latest` - **Other platforms:** [Official Redis Installation Guide](https://redis.io/docs/getting-started/installation/) +If you're using a hosted Redis instance, ensure that your Ray Serve cluster can access it. For example, when using AWS ElastiCache for Redis: + +- Launch the ElastiCache instance in the same VPC that's attached to your Anyscale cloud. +- Attach IAM roles with read/write access to ElastiCache to your cluster instances. + ## Step 2: Install Dependencies ```python -pip install -q ray[serve-async-inference]>=2.50.0 requests>=2.31.0 PyPDF2>=3.0.0 celery[redis] +!pip install -q ray[serve-async-inference]>=2.50.0 requests>=2.31.0 PyPDF2>=3.0.0 celery[redis] ``` ## Step 3: Start the Ray Serve Application @@ -368,15 +370,28 @@ def get_task_status(task_id: str) -> Dict[str, Any]: response.raise_for_status() return response.json() +def wait_for_task_completion(task_id: str, timeout: int = 120, poll_interval: float = 2.0) -> Dict[str, Any]: + """Poll for task completion with timeout.""" + start_time = time.time() + while time.time() - start_time < timeout: + result = get_task_status(task_id) + status = result.get("status") + if status in ("SUCCESS", "FAILURE"): + return result + print(f" ⏳ Status: {status}, waiting...") + time.sleep(poll_interval) + raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds") + for i, (task_id, url) in enumerate(task_ids, 1): print(f"\nTask {i} ({url.split('/')[-1]}):") - result = get_task_status(task_id) + result = wait_for_task_completion(task_id) res = result.get("result") if res: print(f" ✓ Complete: {res.get('page_count')} pages, {res.get('word_count')} words") print(f" ✓ Processing time: {res.get('processing_time_seconds')}s") else: - print(" ✗ No result payload found in response.") + error = result.get("error") + print(f" ✗ Task failed: {error}" if error else " ✗ No result payload found in response.") ``` ## Deploy to Anyscale diff --git a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb index 4ff5a163e0d1..2164a388accd 100644 --- a/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb +++ b/doc/source/serve/tutorials/asynchronous-inference/content/asynchronous-inference.ipynb @@ -129,12 +129,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "```note\n", "If you're using a hosted Redis instance, ensure that your Ray Serve cluster can access it. For example, when using AWS ElastiCache for Redis:\n", "\n", "- Launch the ElastiCache instance in the same VPC that's attached to your Anyscale cloud.\n", - "- Attach IAM roles with read/write access to ElastiCache to your cluster instances.\n", - "```" + "- Attach IAM roles with read/write access to ElastiCache to your cluster instances." ] }, { @@ -591,16 +589,7 @@ "name": "python3" }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.12" + "pygments_lexer": "ipython3" } }, "nbformat": 4,