diff --git a/.azdignore b/.azdignore
new file mode 100644
index 0000000..38c064a
--- /dev/null
+++ b/.azdignore
@@ -0,0 +1,8 @@
+.git
+.github
+__pycache__
+*.pyc
+.ipynb_checkpoints
+.env
+node_modules
+dist
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 2528afa..785f746 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,4 +161,5 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-sandbox/
\ No newline at end of file
+sandbox/
+.azure
diff --git a/README.md b/README.md
index 2bd1215..b546bb5 100644
--- a/README.md
+++ b/README.md
@@ -1,149 +1,334 @@
 # Healthcare AI Examples
+Healthcare AI Examples is a comprehensive collection of code samples, templates, and solution patterns that demonstrate how to deploy and use Microsoft's healthcare AI models across diverse medical scenarios—from basic model deployment to advanced multimodal healthcare applications.
 
-## Introduction
+This repository contains comprehensive information to help you get started with Microsoft's cutting-edge healthcare AI models.
 
-Welcome to the Healthcare AI Examples repository! This repository is designed to help you get started with Microsoft's healthcare AI models. Whether you are a researcher, data scientist, or developer, you will find a variety of examples and solution templates that showcase how to leverage these powerful models for different healthcare scenarios. From basic deployment and usage patterns to advanced solutions addressing real-world medical problems, this repository aims to provide you with the tools and knowledge to build and implement healthcare AI solutions using Microsoft AI ecosystem effectively.
+> [!IMPORTANT]
+> Healthcare AI Examples is a code sample collection intended for research and model development exploration only. The models, code and examples are not designed or intended to be deployed in clinical settings as-is nor for use in the diagnosis or treatment of any health or medical condition, and the individual models' performances for such purposes have not been established. By using the Healthcare AI Examples, you are acknowledging that you bear sole responsibility and liability for any use of these models and code, including verification of outputs and incorporation into any product or service intended for a medical purpose or to inform clinical decision-making, compliance with applicable healthcare laws and regulations, and obtaining any necessary clearances or approvals.
 
-**Disclaimer**: _The Microsoft healthcare AI models, code and examples are intended for research and model development exploration. The models, code and examples are not designed or intended to be deployed in clinical settings as-is nor for use in the diagnosis or treatment of any health or medical condition, and the individual models’ performances for such purposes have not been established. You bear sole responsibility and liability for any use of the healthcare AI models, code and examples, including verification of outputs and incorporation into any product or service intended for a medical purpose or to inform clinical decision-making, compliance with applicable healthcare laws and regulations, and obtaining any necessary clearances or approvals._
+## Features
 
-## What's in the repository?
+- **Model Deployment Patterns**: Programmatic deployment examples for key healthcare AI models including MedImageInsight, MedImageParse, and CXRReportGen
+- **Basic Usage Examples**: Zero-shot classification, image segmentation, and foundational calling patterns for medical imaging
+- **Advanced Solutions**: Multimodal analysis, outlier detection, exam parameter extraction, and 3D image search capabilities
+- **Production-Ready Code**: Concurrent calling patterns, batch processing, and efficient image preprocessing for scalable healthcare AI systems
+- **Fine-tuning Templates**: Complete workflows for adapter training and model fine-tuning using AzureML pipelines
+- **Real-World Applications**: Cancer survival prediction, radiology-pathology analysis, and clinical decision support scenarios
+- **Integrated Toolkit**: Helper utilities and model libraries through the `healthcareai_toolkit` package
+- **Azure Integration**: Seamless deployment using Azure Developer CLI (azd) and Azure Machine Learning
 
-In this repository you will find examples and solution templates that will help you get started with multimodal Healthcare AI models available in Microsoft AI Foundry. This is what is available:
+## What's Available
 
-### Deployment samples and basic usage examples
+### 🚀 Deployment Samples and Basic Usage Examples
 
 These notebooks show how to programmatically deploy some of the models available in the catalog:
 
-* [MedImageInsight](https://aka.ms/healthcare-ai-examples-mi2-deploy)
-* [MedImageParse](https://aka.ms/healthcare-ai-examples-mip-deploy)
-* [CXRReportGen](https://aka.ms/healthcare-ai-examples-cxr-deploy)
+* **[MedImageInsight](https://aka.ms/healthcare-ai-examples-mi2-deploy)** [MI2] - Image and text embedding foundation model deployment
+* **[MedImageParse](https://aka.ms/healthcare-ai-examples-mip-deploy)** [MIP] - Medical image segmentation model deployment  
+* **[CXRReportGen](https://aka.ms/healthcare-ai-examples-cxr-deploy)** [CXR] - Chest X-ray report generation model deployment
+* **Providence-GigaPath** [PGP] - Embedding model specifically for histopathology
 
-### Basic usage examples and patterns
+### 📋 Basic Usage Examples and Patterns
 
-These notebooks show basic patterns that require very little specialized knowledge about medical data or implementation specifics.
+These notebooks show basic patterns that require very little specialized knowledge about medical data or implementation specifics:
 
-* [MedImageParse call patterns](./azureml/medimageparse/medimageparse_segmentation_demo.ipynb) - a collection of snippets showcasing how to send various image types to MedImageParse and retrieve segmentation masks. See how to read and package xrays, ophthalmology images, CT scans, pathology patches, and more.
-* [Zero shot classification with MedImageInsight](./azureml/medimageinsight/zero-shot-classification.ipynb) - learn how to use MedImageInsight to perform zero-shot classification of medical images using its text or image encoding abilities.
-* [Training adapters using MedImageInsight](./azureml/medimageinsight/adapter-training.ipynb) - build on top of zero shot pattern and learn how to train simple task adapters for MedImageInsight to create classification models out of this powerful image encoder. For additional thoughts on when you would use this and the zero shot patterns as well as considerations on fine tuning, [read our blog on Microsoft Techcommunity Hub](https://techcommunity.microsoft.com/blog/healthcareandlifesciencesblog/unlocking-the-magic-of-embedding-models-practical-patterns-for-healthcare-ai/4358000).
-* [Advanced calling patterns](./azureml/medimageinsight/advanced-call-example.ipynb) - no production implementation is complete without understanding how to deal with concurrent calls, batches, efficient image preprocessing, and deep understanding of parallelism. This notebook contains snippets that will help you write more efficient code to build your cloud-based healthcare AI systems.
-* [Fine-tuning MedImageInsight with AzureML Pipelines](./azureml/medimageinsight/finetuning/mi2-finetuning.ipynb) - This detailed notebook demonstrates how to fine-tune the MedImageInsight model using AzureML pipelines. It guides you through setting up prerequisites, data preprocessing, GPU-accelerated training, model deployment, and performance validation, paving the way for state-of-the-art image analysis.
+* **[MedImageParse call patterns](./azureml/medimageparse/medimageparse_segmentation_demo.ipynb)** [MIP] - a collection of snippets showcasing how to send various image types to MedImageParse and retrieve segmentation masks. See how to read and package xrays, ophthalmology images, CT scans, pathology patches, and more.
+* **[Zero shot classification](./azureml/medimageinsight/zero-shot-classification.ipynb)** [MI2] - learn how to use MedImageInsight to perform zero-shot classification of medical images using its text or image encoding abilities.
+* **[Training adapters](./azureml/medimageinsight/adapter-training.ipynb)** [MI2] - build on top of zero shot pattern and learn how to train simple task adapters for MedImageInsight to create classification models out of this powerful image encoder. For additional thoughts on when you would use this and the zero shot patterns as well as considerations on fine tuning, [read our blog on Microsoft Techcommunity Hub](https://techcommunity.microsoft.com/blog/healthcareandlifesciencesblog/unlocking-the-magic-of-embedding-models-practical-patterns-for-healthcare-ai/4358000).
+* **[Advanced calling patterns](./azureml/medimageinsight/advanced-call-example.ipynb)** [MI2] - no production implementation is complete without understanding how to deal with concurrent calls, batches, efficient image preprocessing, and deep understanding of parallelism. This notebook contains snippets that will help you write more efficient code to build your cloud-based healthcare AI systems.
+* **[Fine-tuning MedImageInsight with AzureML Pipelines](./azureml/medimageinsight/finetuning/mi2-finetuning.ipynb)** [MI2] - comprehensive guide through prerequisites, data preprocessing, GPU-accelerated training, model deployment, and performance validation. Read [our blog](https://aka.ms/MedImageFinetuning) for additional insights on fine-tuning strategies.
 
-### Advanced examples and Solution templates
+### 🏥 Advanced Examples and Solution Templates
 
-These examples take a closer look at certain solutions and patterns of usage for the multimodal healthcare AI models to address real world medical problems.
+These examples take a closer look at certain solutions and patterns of usage for the multimodal healthcare AI models to address real world medical problems:
 
-* [Detecting outliers in MedImageInsight](./azureml/medimageinsight/outlier-detection-demo.ipynb) - go beyond encoding single image instances and learn how to use MedImageInsight to encode CT/MR series and studies, and detect outliers in image collections.
-* [Exam Parameter Detection](./azureml/medimageinsight/exam-parameter-demo/exam-parameter-detection.ipynb) - dealing with entire MRI imaging series, this notebook explores an approach to a common problem in radiological imaging - normalizing and understanding image acquisition parameters. Surprisingly (or not), in many cases DICOM metadata can not be relied upon to retrieve exam parameters. Take a look inside this notebook to understand how you can build a computationally efficient exam parameter detection system using an embedding model like MedImageInsight.
-* [Multimodal image analysis using radiology and pathology imaging](./azureml/advanced_demos/radpath/rad_path_survival_demo.ipynb) - can foundational models be connected together to build systems that understand multiple modalities? This notebook shows a way this can be done using the problem of predicting cancer hazard score via a combination of MRI studies and digital pathology slides. Also [read our blog](https://techcommunity.microsoft.com/blog/healthcareandlifesciencesblog/cancer-survival-with-radiology-pathology-analysis-and-healthcare-ai-models-in-az/4366241) that goes into more depth on this topic.
-* [Image Search Series Pt 1: Searching for similar XRay images](./azureml/advanced_demos/image_search/2d_image_search.ipynb) - an opener in the series on image-based search. How do you use foundation models to build en efficient system to look up similar Xrays? Read [our blog](https://techcommunity.microsoft.com/blog/healthcareandlifesciencesblog/image-search-series-part-1-chest-x-ray-lookup-with-medimageinsight/4372736) for more details.
-* [Image Search Series Pt 2: 3D Image Search with MedImageInsight (MI2)](./azureml/advanced_demos/image_search/3d_image_search.ipynb) - expanding on the image-based search topics we look at 3D images. How do you use foundation models to build a system to search the archive of CT scans for those with similar lesions in the pancreas? Read [our blog](https://aka.ms/healthcare-ai-examples-mi2-3d-image-search-blog) for more details.
+* **[Detecting outliers in MedImageInsight](./azureml/medimageinsight/outlier-detection-demo.ipynb)** [MI2] - go beyond encoding single image instances and learn how to use MedImageInsight to encode CT/MR series and studies, and detect outliers in image collections. Learn more in our [detailed resource guide](https://aka.ms/HLSOutlierDetection).
+* **[Exam Parameter Detection](./azureml/medimageinsight/exam-parameter-demo/exam-parameter-detection.ipynb)** [MI2, GPT*] - dealing with entire MRI imaging series, this notebook explores an approach to a common problem in radiological imaging - normalizing and understanding image acquisition parameters. Surprisingly (or not), in many cases DICOM metadata can not be relied upon to retrieve exam parameters. Take a look inside this notebook to understand how you can build a computationally efficient exam parameter detection system using an embedding model like MedImageInsight.
+* **[Multimodal image analysis using radiology and pathology imaging](./azureml/advanced_demos/radpath/rad_path_survival_demo.ipynb)** [MI2, PGP] - can foundational models be connected together to build systems that understand multiple modalities? This notebook shows a way this can be done using the problem of predicting cancer hazard score via a combination of MRI studies and digital pathology slides. Also [read our blog](https://techcommunity.microsoft.com/blog/healthcareandlifesciencesblog/cancer-survival-with-radiology-pathology-analysis-and-healthcare-ai-models-in-az/4366241) that goes into more depth on this topic.
+* **[Image Search Series Pt 1: Searching for similar XRay images](./azureml/advanced_demos/image_search/2d_image_search.ipynb)** [MI2] - an opener in the series on image-based search. How do you use foundation models to build an efficient system to look up similar Xrays? Read [our blog](https://techcommunity.microsoft.com/blog/healthcareandlifesciencesblog/image-search-series-part-1-chest-x-ray-lookup-with-medimageinsight/4372736) for more details.
+* **[Image Search Series Pt 2: 3D Image Search with MedImageInsight](./azureml/advanced_demos/image_search/3d_image_search.ipynb)** [MI2] - expanding on the image-based search topics we look at 3D images. How do you use foundation models to build a system to search the archive of CT scans for those with similar lesions in the pancreas? Read [our blog](https://aka.ms/3DImageSearch) for more details.
 
 ## Getting Started
 
-To get started with this project, follow these steps:
+To get started with using our healthcare AI models and examples, follow the instructions below to set up your environment and run the sample applications.
 
-### 1. Clone the repository
+### Prerequisites
+
+> [!IMPORTANT]
+> Follow the steps in order. Each step builds on the previous ones, and jumping ahead may require restarting deployments that can take significant time to complete. Detailed documentation is linked for each step if you need additional context.
+
+- **Azure Subscription** with access to:
+  - Azure Machine Learning workspace _or_ permissions to create one.
+    - See [required permissions](#required-permissions) for details and [Step 3](#step-3-deploy-healthcare-ai-models) for deployment options.
+  - Models deployed or permissions to deploy them into a subscription or AzureML workspace.
+    - See [Step 3](#step-3-deploy-healthcare-ai-models) for deployment options and tips on selecting models.
+  - GPU compute resource availablity (quota) for model deployments.
+    - See [Step 1](#step-1-verify-prerequisites-quota) for details.
+  - **Optional**: Azure OpenAI access for GPT models (limited use in examples).
+- **Tools**:
+  - **For running examples**:
+    - [AzCopy](https://learn.microsoft.com/en-us/azure/storage/common/storage-ref-azcopy) for downloading sample data
+    - Python `>=3.9.0,<3.12` and pip `>=21.3` (for running locally)
+  - **For deploying models**:
+    - [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli)
+    - [Azure Developer CLI](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/install-azd?tabs=winget-windows%2Cbrew-mac%2Cscript-linux&pivots=os-linux)
+
+
+### Step 1: Verify Prerequisites (Quota)
+
+Before deploying, verify your Azure subscription has sufficient quota and your account has the necessary permissions to avoid resource limitations during deployment.
+
+**Azure Quota Management Tools**
+- [Azure Machine Learning Quota Management](https://ml.azure.com/quota) - For GPU compute quota
+- [Azure AI Management Center](https://ai.azure.com/managementCenter/quota) - For Azure OpenAI quota management
+
+**Quota Requirements** 
+
+You need quota for **one or more** of the following:
+
+| Model | VM Family | Instance Type | Instance Count | Cores per Instance | Min Cores Needed |
+|-------|-----------|---------------|----------------|-------------------|---------------------|
+| **MedImageInsight** | NCasT4_v3 | `Standard_NC4as_T4_v3` | 2 | 4 | **8 cores** |
+| **MedImageParse** | NCadsH100_v5 | `Standard_NC40ads_H100_v5` | 1 | 40 | **40 cores** |
+| **CXRReportGen** | NCadsH100_v5 | `Standard_NC40ads_H100_v5` | 1 | 40 | **40 cores** |
+| **Prov-GigaPath*** | NCv3 | `Standard_NC6s_v3` | 1 | 6 | **6 cores** |
+| **GPT-4o or GPT-4.1** (optional) | GlobalStandard | GPT-4o or GPT-4.1 | - | - | **50K-100K TPM** |
+
+*Used in advanced demos only
+
+> [!TIP]
+> **Healthcare AI Models**: All healthcare AI models (MedImageInsight, MedImageParse, CXRReportGen, Prov-GigaPath) require GPU compute quota as shown above.
+>
+> **GPT Models (Optional)**: GPT models are deployed to Azure AI Services with Tokens Per Minute (TPM) capacity instead of compute cores. GPT deployment is completely optional and can be skipped by leaving the `AZURE_GPT_MODEL` environment variable empty.
+>
+> **Quota Management**: We recommend requesting quota for **all models** and requesting **2-3x+ the minimum cores** shown above. Requesting quota does not incur any charges - you only pay for what you actually use. Having extra quota available prevents deployment delays and allows for scaling when needed.
+> 
+> **⚠️ ONGOING COSTS**: Online model endpoints bill continuously while deployed, even when not actively processing requests. Monitor your usage in the Azure portal and use cleanup procedures when finished (see deployment-specific instructions in the deploy folders).
+
+#### Required Permissions:
+
+**To run examples only**: If models are already deployed by your admin, you only need access to the deployed model endpoints. Your admin can provide you with revelevant information and authentication credentials.
+
+**To deploy models yourself**: 
+- **Fresh deployment**: Requires **Owner** role OR **User Access Administrator + Contributor** roles on the Azure subscription to create resource groups, workspaces, and configure role-based access controls.
+- **Existing deployment**: Requires **Contributor** role on the resource group containing your existing Azure ML workspace.
+
+> [!TIP]
+> If you lack deployment permissions, ask your IT administrator to either grant you the appropriate access in a resource group or deploy the models for you and provide the endpoint details.
+
+### Step 2: Clone the Repository
 
 ```sh
 git clone https://github.com/microsoft/healthcareai-examples.git
 cd healthcareai-examples
 ```
 
-### 2. Set up your environment
+### Step 3: Deploy Healthcare AI Models
 
-#### Prerequisites
+The examples in this repository require AI model endpoints to be deployed. We provide several deployment methods to accommodate different workflows and preferences.
 
-To run most examples, you will need to download the data and have an appropriate endpoint deployed.
+> [!WARNING]
+> **⚠️ COST ALERT**: Deploying these models will create Azure resources that **incur charges**. Online model endpoints **continue billing even when idle**. Review the [quota requirements table](#step-1-verify-prerequisites-quota) to understand compute costs before proceeding. See deployment-specific cleanup instructions in the respective deploy folders when finished.
 
-#### Download data
+> [!TIP]
+> **Not all models are required:** you can deploy only the subset you need for specific notebooks.
+> **For basic examples**: Deploy only the specific model you want to explore:
+>  - `cxrreportgen/` notebooks → **CXRReportGen** model
+>  - `medimageinsight/` notebooks → **MedImageInsight** model  
+>  - `medimageparse/` notebooks → **MedImageParse** model
+>
+> **For advanced demos**: You'll need **MedImageInsight** + **Prov-GigaPath** (plus others depending on the specific demo).
+> See [Model Selection](./docs/deployment-guide.md#model-selection) in the deployment guide to select specific models.
 
-The sample data used by the examples is located in our Blob Storage account.
+#### Automatic Deployment - Recommended
 
-Use the following command to download the dataset with samples into your data folder located at `/home/azureuser/data/healthcare-ai/`, or substitute it with your folder of choice. Note that you will need to use [azcopy tool](https://learn.microsoft.com/en-us/azure/storage/common/storage-ref-azcopy):
+The Azure Developer CLI provides automated infrastructure provisioning and configuration. See the [Deployment Guide](docs/deployment-guide.md) for more information. You can use the [Quickstart Deployment](docs/deployment-guide.md#quick-start) or choose the option that matches your situation:
 
-```sh
-azcopy copy --recursive https://azuremlexampledata.blob.core.windows.net/data/healthcare-ai/ /home/azureuser/data/
-```
+> [!TIP]
+> **Authentication Issues?** If the standard login commands fail (especially in constrained network environments or when using certain authentication methods), try using device code authentication instead:
+> - `az login --use-device-code`
+> - `azd auth login --use-device-code`
+> 
+> This opens a browser-based authentication flow that can work around common login issues.
 
-This command will download the entire dataset used by all examples. If you are only interested in a subset of data, the individual examples will reference subfolders that you can download by appending the subfolder name to the source URL.
+**Deploy into your existing Azure ML workspace:**
 
-#### Deploy and configure an endpoint
+*Recommended if:*
+- ✅ You're running from an Azure ML workspace compute instance
+- ✅ You have an existing workspace you want to use
+- ✅ You have Contributor permissions (no role assignment permissions needed)
 
-To run the examples you will need to access to a Azure-deployed endpoints. You can use the the SDK to programmatically deploy the endpoints:
+  *Quick Start*
+  ```bash
+  cd deploy/existing
+  az login
+  azd auth login
+  azd env new <envName>
+  # Auto-configure environment from current AML compute instance:
+  ./setup_azd_env_for_aml.sh
+  azd env set AZURE_GPT_LOCATION "southcentralus"
+  azd env set AZURE_GPT_MODEL "gpt-4.1;2025-04-14"
+  azd up
+  ```
+  See [Existing Deplpyment Guide](deploy/existing/README.md) for more details.
 
-* [MedImageInsight deployment](https://aka.ms/healthcare-ai-examples-mi2-deploy)
-* [MedImageParse deployment](https://aka.ms/healthcare-ai-examples-mip-deploy)
-* [CXRReportGen deployment](https://aka.ms/healthcare-ai-examples-cxr-deploy)
 
-#### Set up .env file
+**Create a Fresh AML Environment**:
 
-You need to set up your environment variables by creating a `.env` file. The environment variables define parameters like endpoint paths, keys, etc. You don't need to set them all up upfront, each notebook will describe which values it relies upon. An example file named `env.example` is provided in the repository. Copy this file to create your own `.env` file:
+Creates a new resource group and Azure ML workspace from scratch.
 
-```sh
-cp env.example .env
-```
+*Recommended if:*
+- ✅ You want to run the examples locally
+- ✅ You need a completely new workspace setup
+- ✅ You have Owner or User Access Administrator permissions
+- ⚠️  Note: May be slower if you don't have a stable connection
+  
+  *Quick start*
+  ```bash
+  cd deploy/fresh
+  az login
+  azd auth login
+  azd env new <envName>
+  azd env set AZURE_LOCATION <location>
+  azd env set AZURE_GPT_LOCATION <gpt_location> # if different from AZURE_LOCATION
+  azd env set AZURE_GPT_MODEL "gpt-4.1;2025-04-14"
+  azd up
+  ```
+  See [Fresh Deplpyment Guide](deploy/fresh/README.md) for more details.
+
+> [!TIP]
+> **GPT Model Integration**: Both deployment options now support optional GPT model deployment (GPT-4o or GPT-4.1) alongside healthcare AI models. This enables multimodal workflows combining medical imaging AI with language models. See the deployment guides for configuration details.
+
+> [!NOTE]
+> **For Admins**: You can deploy resources on behalf of another user by setting `AZURE_PRINCIPAL_ID` to their Azure AD object ID during deployment. This grants the target user access to the deployed resources while you maintain the deployment permissions. This is useful when deploying fresh infrastructure where role assignments are created.
+
+#### Manual Deployment Methods
+
+For users who prefer other deployment approaches, we provide instructions for:
+
+- **[Complete Deployment Guide](docs/deployment-guide.md)** - Comprehensive guide covering all deployment options with troubleshooting.
+- **[Manual Deployment](docs/manual-deployment.md)** - Portal and SDK deployment methods.
+
+### Step 4: Setup your local environment
 
-After copying, open the `.env` file and fill in the values as you need them.
+> [!CAUTION]
+> If you followed the automatic deployment steps, you might currently be in either the `deploy/fresh/` or `deploy/existing/` directory. You should move back to the repository root level.
 
-#### Healthcare AI Toolkit Installation
+Now that you have deployed the models, you need to configure your local environment to use them effectively. This invols three key tasks: verifying your environment configuration, installing the required toolkit, and downloading sample data.
 
-A lot of useful functions that facilitate working with endpoints, DICOM files, etc, have been organized into a simple package called **healthcareai_toolkit** that goes alongside this repository to make the code inside the notebooks cleaner. In order to install it, follow the steps below:
+#### Verify Your Environment File
 
-##### Package Prerequisites
+After deployment, verify that your root level `.env` file contains the necessary environment variables for connecting to your deployed models. Each automatic deployment method will configure this file with the appropriate settings for your chosen approach. 
 
-* Python version: `>=3.9.0,<3.12`
-* pip version: `>=21.3`
+> [!IMPORTANT]
+> Check the value of `DATA_ROOT` in your `.env` file to ensure it's appropriate for your setup. The default value is `/home/azureuser/data/`, but you may need to modify it based on your environment. If you change the `DATA_ROOT` value, you'll also need to update the destination path in the azcopy command in the following step.
 
-Many examples in this repository require the `healthcareai_toolkit` package. Install it by running the following command in the repository root:
+> [!NOTE]
+> If you used a manual deployment method you will have to configure this file yourself, see [Manual Deployment](docs/manual-deployment.md) for more information.
+
+#### Download Sample Data
+
+The sample data used by the examples is located in our Blob Storage account. Use [azcopy tool](https://learn.microsoft.com/en-us/azure/storage/common/storage-ref-azcopy) to download:
 
 ```sh
-pip install ./package/
+azcopy copy --recursive https://azuremlexampledata.blob.core.windows.net/data/healthcare-ai/ /home/azureuser/data/
 ```
 
-If you wish to edit the package easily, you can also install it in editable mode using the `-e` flag:
+> [!TIP]
+> This downloads the entire dataset. For specific examples, you can download subsets by appending the subfolder name to the source URL.
+
+#### Install Healthcare AI Toolkit
+
+Install the helper toolkit that facilitates working with endpoints, DICOM files, and medical imaging:
 
 ```sh
+# Standard installation
+pip install ./package/
+```
+_or_
+```sh
+# Editable installation for development
 pip install -e ./package/
 ```
 
-### 3. Examples and Sample Code
+After installation, you can test your endpoint connectivity:
+
+```sh
+# Test all configured endpoints
+healthcareai-test
+
+# Test specific model endpoint quietly
+healthcareai-test --models cxr,pgp --quiet
+```
+
+### Step 5: Explore Examples
+
+Now you're ready to explore the notebooks! Start with one of these paths:
+
+**🎯 Beginners**: Try **[zero-shot classification](./azureml/medimageinsight/zero-shot-classification.ipynb)** and **[adapter training](./azureml/medimageinsight/adapter-training.ipynb)**.
+
+**🔍 Image Segmentation**: Try **[segmentation patterns](./azureml/medimageparse/medimageparse_segmentation_demo.ipynb)**.
+
+**📋 Report Generation**: See example usage in **[CXRReportGen deployment](./azureml/cxrreportgen/cxr-deploy.ipynb)**.
+
+**🚀 Advanced**: Explore **[image search](./azureml/advanced_demos/image_search/2d_image_search.ipynb)**, **[outlier detection](./azureml/medimageinsight/outlier-detection-demo.ipynb)**, or **[multimodal analysis](./azureml/advanced_demos/radpath/rad_path_survival_demo.ipynb)**.
+
+## Project Structure
+
+```
+healthcareai-examples/
+├── azureml/                   # Core notebooks and examples
+│   ├── cxrreportgen/          # Chest X-ray report generation examples
+│   ├── medimageinsight/       # Medical image embedding and analysis
+│   ├── medimageparse/         # Medical image segmentation examples
+│   └── advanced_demos/        # Advanced multimodal solutions
+├── package/                   # Healthcare AI Toolkit
+│   ├── healthcareai_toolkit/  # Helper utilities and functions
+│   └── model_library/         # Pre-defined models and utilities
+├── deploy/                    # Infrastructure as Code (Bicep templates)
+├── docs/                      # Additional documentation
+└── tests/                     # Test suites and validation notebooks
+```
+
+### Key Components
+
+* **azureml**: Contains Jupyter notebooks and scripts for deploying and using AI models with Azure Machine Learning
+  * **cxrreportgen**: Notebooks for deploying and examples using CXRReportGen
+  * **medimageinsight**: Notebooks for deploying and examples using MedImageInsight  
+  * **medimageparse**: Notebooks for deploying and examples using MedImageParse
+  * **advanced_demos**: Complex multimodal healthcare applications
+* **package**: Contains the helper toolkit and model libraries
+  * **healthcareai_toolkit**: Helper utilities and functions to run the examples
+  * **model_library**: Useful pre-defined models and related utilities
+
+## See Also
 
-Now you are ready to explore the notebooks in the `azureml` directory to see various examples of how to use the healthcare ai models!
+- **[Healthcare Model Studio](https://aka.ms/healthcaremodelstudio)** - AI Foundry Healthcare Model Catalog
+- **[CXRReportGen Model Card](https://aka.ms/cxrreportgenmodelcard)** - Model card for CXRReportGen, a chest X-ray report generation model
+- **[MedImageParse Model Card](https://aka.ms/medimageparsemodelcard)** - Model card for MedImageParse, a model for medical image segmentation
+- **[MedImageInsight Model Card](https://aka.ms/mi2modelcard)** - Model card for MedImageInsight, an image and text embedding foundation model
 
-## Folder Structure
+## Resources
 
-* **azureml**: Contains Jupyter notebooks and scripts for deploying and using AI models with Azure Machine Learning. Inside you will find various folders with sample notebooks such as
-  * **cxrreportgen**: Notebooks for deploying and and examples using CXRReportGen.
-  * **medimageinsight**: Notebooks for deploying and examples using the MedImageInsight.
-  * **medimageparse**: Notebooks for deploying and and examples using MedImageParse.
-  * and many more as this repository grows!
-* **package**: Contains the helper toolkit and model libraries.
-  * **healthcareai_toolkit**: Helper utilities and functions for to run the examples.
-  * **model_library**: Useful pre-defined models and related utilities.
+### External Documentation
 
-## Contributing
+- [Foundation models for healthcare AI](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/healthcare-ai/healthcare-ai-models)
+- [Azure Machine Learning](https://learn.microsoft.com/azure/machine-learning/)
+- [Azure AI Services](https://learn.microsoft.com/azure/ai-services/)
+- [Generative AI For Beginners](https://github.com/microsoft/generative-ai-for-beginners)
 
-This project welcomes contributions and suggestions.  Most contributions require you to agree to a
-Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
-the rights to use your contribution. For details, visit [https://cla.opensource.microsoft.com](https://cla.opensource.microsoft.com).
+## How to Contribute
 
-When you submit a pull request, a CLA bot will automatically determine whether you need to provide
-a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
-provided by the bot. You will only need to do this once across all repos using our CLA.
+We welcome contributions to improve this project! Please see our [Contribution Guide](./CONTRIBUTING.md) for information on how to get started with contributing code, documentation, or other improvements. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
 
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
-For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
-contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
 
-Read [CONTRIBUTING.md](./CONTRIBUTING.md) for more details.
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq) or contact <opencode@microsoft.com> with any additional questions or comments.
 
 ## License
 
 This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
 
-## Trademarks
+## Authorized Use
 
-This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
-Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
-Any use of third-party trademarks or logos are subject to those third-party's policies.
- 
\ No newline at end of file
+This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies.
diff --git a/azureml/medimageinsight/exam-parameter-demo/exam-parameter-detection.ipynb b/azureml/medimageinsight/exam-parameter-demo/exam-parameter-detection.ipynb
index 1c3f1ee..c25e40a 100644
--- a/azureml/medimageinsight/exam-parameter-demo/exam-parameter-detection.ipynb
+++ b/azureml/medimageinsight/exam-parameter-demo/exam-parameter-detection.ipynb
@@ -75,7 +75,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "a4ad3518",
    "metadata": {
     "gather": {
@@ -113,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "3c7ccf50",
    "metadata": {
     "gather": {
@@ -130,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "a1c94fef",
    "metadata": {
     "gather": {
@@ -153,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "d6980df2",
    "metadata": {
     "gather": {
@@ -185,7 +185,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "151c0b61",
    "metadata": {
     "gather": {
@@ -220,7 +220,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "09e7361b",
    "metadata": {
     "gather": {
@@ -264,7 +264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "ee8ed6a7",
    "metadata": {
     "gather": {
@@ -302,7 +302,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "1e9deaf2",
    "metadata": {
     "gather": {
@@ -317,7 +317,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "625b6c27",
    "metadata": {
     "gather": {
@@ -372,7 +372,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "f1ebf912",
    "metadata": {
     "gather": {
@@ -408,7 +408,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "66474571",
    "metadata": {
     "gather": {
@@ -453,7 +453,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "63fdcfc9",
    "metadata": {
     "gather": {
@@ -490,7 +490,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "e8786ead",
    "metadata": {
     "gather": {
@@ -575,7 +575,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "b0c15cdc",
    "metadata": {
     "gather": {
@@ -586,7 +586,7 @@
    "source": [
     "# We will call this function to submit system prompt and user prompt to our GPT4 deployment\n",
     "def get_gpt_label(client, obj):\n",
-    "    deployment = \"gpt-4o\"\n",
+    "    deployment = settings.AZURE_OPENAI_MODEL_NAME\n",
     "\n",
     "    response = client.chat.completions.create(\n",
     "        model=deployment,\n",
@@ -615,7 +615,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "29e0c8c0",
    "metadata": {
     "gather": {
@@ -640,7 +640,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "a2a34f68",
    "metadata": {
     "gather": {
@@ -683,7 +683,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "bf190e7a",
    "metadata": {},
    "outputs": [],
@@ -718,7 +718,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "e4ce8ec9",
    "metadata": {
     "gather": {
@@ -757,7 +757,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "id": "c5b7a205-e56a-4f5d-965a-dd0cda0f7d34",
    "metadata": {
     "gather": {
@@ -850,7 +850,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "id": "a9792029",
    "metadata": {},
    "outputs": [],
@@ -869,7 +869,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "c3635bbb",
    "metadata": {
     "scrolled": true
@@ -924,7 +924,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "id": "f8107a01",
    "metadata": {},
    "outputs": [
@@ -954,7 +954,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "id": "b8c1f30e",
    "metadata": {},
    "outputs": [
@@ -989,7 +989,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "id": "a6fadd1a",
    "metadata": {},
    "outputs": [
@@ -1019,7 +1019,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "id": "b6ca17ff",
    "metadata": {
     "scrolled": true
@@ -1052,7 +1052,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "id": "456126dc",
    "metadata": {},
    "outputs": [],
@@ -1081,7 +1081,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "id": "1a689d61",
    "metadata": {},
    "outputs": [],
@@ -1101,7 +1101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "id": "f57b042d",
    "metadata": {},
    "outputs": [
@@ -1141,7 +1141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "id": "a2b8e181",
    "metadata": {},
    "outputs": [
@@ -1162,7 +1162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "id": "f8837c33",
    "metadata": {},
    "outputs": [
@@ -1191,7 +1191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "b0545b0d",
    "metadata": {},
    "outputs": [
@@ -1215,7 +1215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "id": "d5e036a5",
    "metadata": {},
    "outputs": [
@@ -1236,7 +1236,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": null,
    "id": "33f24c63",
    "metadata": {
     "scrolled": true
@@ -1259,7 +1259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "id": "df7ef760",
    "metadata": {},
    "outputs": [
@@ -1315,7 +1315,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": null,
    "id": "20ef8a5b",
    "metadata": {
     "scrolled": false
@@ -1483,7 +1483,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "id": "303aef34",
    "metadata": {
     "scrolled": false
@@ -1634,7 +1634,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
    "id": "50c95a7d",
    "metadata": {
     "scrolled": false
@@ -1795,7 +1795,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": null,
    "id": "02289077",
    "metadata": {
     "scrolled": false
@@ -1938,7 +1938,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
    "id": "9c7054ec",
    "metadata": {
     "scrolled": false
@@ -2093,7 +2093,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
    "id": "16632486",
    "metadata": {
     "scrolled": false
@@ -2251,7 +2251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "id": "e507eceb",
    "metadata": {
     "scrolled": false
@@ -2417,7 +2417,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": null,
    "id": "87ac6e49",
    "metadata": {
     "scrolled": false
@@ -2575,7 +2575,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": null,
    "id": "42d991ed",
    "metadata": {
     "scrolled": false
@@ -2733,7 +2733,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": null,
    "id": "3bc083ec",
    "metadata": {
     "scrolled": false
@@ -2891,7 +2891,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "id": "dd3b4310",
    "metadata": {
     "scrolled": false
diff --git a/azureml/medimageinsight/exam-parameter-demo/exam_parameter_helpers.py b/azureml/medimageinsight/exam-parameter-demo/exam_parameter_helpers.py
index 612ea1e..c4819ef 100644
--- a/azureml/medimageinsight/exam-parameter-demo/exam_parameter_helpers.py
+++ b/azureml/medimageinsight/exam-parameter-demo/exam_parameter_helpers.py
@@ -89,7 +89,7 @@ def create_oai_assistant(client):
     """Creates assistant to keep track of prior responses"""
     # Assistant API example: https://github.com/openai/openai-python/blob/main/examples/assistant.py
     # Available in limited regions
-    deployment = "gpt-4o"
+    deployment = (settings.AZURE_OPENAI_MODEL_NAME,)
     assistant = client.beta.assistants.create(
         name="Math Tutor",
         instructions="You are a categorizer. For each question answered, extract entities related to people's names and "
diff --git a/deploy/existing/README.md b/deploy/existing/README.md
new file mode 100644
index 0000000..38bf49c
--- /dev/null
+++ b/deploy/existing/README.md
@@ -0,0 +1,166 @@
+# Deploy to Existing Environment
+
+This deployment option deploys AI model endpoints into your existing Azure ML workspace. Use this option when you already have a workspace set up and want to add healthcare AI models to it.
+
+## Prerequisites
+
+- Azure CLI and Azure Developer CLI (azd) installed
+- Existing Azure ML workspace with appropriate permissions
+- Contributor or equivalent role on the target resource group or subscription (sufficient for deploying to existing infrastructure)
+- Required quota for the models you plan to deploy (see main README Step 1)
+
+
+## What Gets Deployed
+
+### Healthcare AI Models
+- **Model Endpoints**: AI models deployed as managed online endpoints in your existing workspace
+- **Compute Resources**: Dedicated compute instances for each model
+- **No Infrastructure Changes**: Your existing workspace, storage, and other resources remain untouched
+
+### GPT Integration (when enabled)
+- **Azure AI Services**: New AI Services account in your existing resource group
+- **GPT Model Endpoints**: Ready for use alongside healthcare AI models
+
+> [!TIP]
+> **Workspace Permissions**: Ensure you have appropriate permissions in the target workspace to create endpoints and compute resources.
+
+## Deployment Steps
+
+> [!NOTE]
+> See [Deployment Configuration](../../docs/deployment-guide.md#deployment-configuration) for detailed setup options.
+
+### 1. Navigate to Existing Deployment Directory
+
+```bash
+cd deploy/existing
+```
+
+### 2. Authenticate with Azure
+
+```bash
+az login                 # add -t <TENANT_ID> if needed
+azd auth login           # add --tenant <TENANT_ID> if needed
+```
+
+### 3. Create and Configure Environment
+
+```bash
+# Create a new azd environment
+azd env new <envName>
+
+# Configure for existing workspace deployment
+azd env set AZURE_RESOURCE_GROUP <your-existing-rg>
+azd env set AZUREML_WORKSPACE_NAME <your-existing-workspace>
+azd env set AZURE_LOCATION <workspace-location>
+```
+
+> [!IMPORTANT]
+> **Azure ML Compute Instance**: If you're running from within an Azure ML compute instance, you can use our helper script to automatically detect your current workspace settings:
+> 
+> ```bash
+> # Auto-configure environment from current AML compute instance
+> ./setup_azd_env_for_aml.sh
+> 
+> # Then continue with step 4 below
+> ```
+> 
+> This script automatically detects your current workspace name, resource group, subscription ID, and workspace location.
+
+### 4. Optional: Configure GPT Model Deployment
+
+See [GPT Model Configuration](../../docs/deployment-guide.md#gpt-model-configuration) in the deployment guide for GPT model setup options.
+
+### 5. Optional: Select Specific Healthcare AI Models
+
+See [Model Selection](../../docs/deployment-guide.md#model-selection) in the deployment guide for filtering which models to deploy.
+
+### 6. Deploy Models
+
+```bash
+azd up
+```
+
+This command will:
+- Use your existing resource group and Azure ML workspace
+- Deploy the selected healthcare AI model endpoints
+- **If GPT model specified**: Deploy Azure AI Services and GPT model in the same resource group
+- Configure your `.env` file with connection details
+
+## Environment Variables
+
+After successful deployment, your root level `.env` file should contain:
+
+```bash
+# Healthcare AI model endpoints
+MI2_MODEL_ENDPOINT=<medimageinsight-endpoint-id>
+MIP_MODEL_ENDPOINT=<medimageparse-endpoint-id>
+CXRREPORTGEN_MODEL_ENDPOINT=<cxrreportgen-endpoint-id>
+
+# GPT integration variables (if GPT model was deployed)
+AZURE_OPENAI_ENDPOINT=<gpt-endpoint-uri>
+AZURE_OPENAI_MODEL_NAME=<gpt-model-name>
+AZURE_OPENAI_API_KEY=<api-key>
+```
+## Next Steps
+After successful deployment, change back to the root directory:
+
+```bash
+cd ../../
+```
+
+Then return to the main README and continue with [Step 4: Setup your local environment](../../README.md#step-4-setup-your-local-environment) to install the Healthcare AI Toolkit and explore the examples.
+## Resource Cleanup
+
+### Quick Cleanup - Model Deployments Only (Recommended)
+
+To save costs by stopping expensive GPU compute resources while keeping your infrastructure:
+
+```bash
+# Delete model deployments only (they charge per hour)
+python ../shared/scripts/cleanup.py
+
+# Delete without confirmation
+python ../shared/scripts/cleanup.py --yes
+```
+This removes only the model endpoint deployments that charge per hour, while keeping the infrastructure (workspace, storage, etc.) for future use. See [Resource Cleanup](../../docs/deployment-guide.md#resource-cleanup) in the deployment guide for more details.
+
+### Complete Resource Cleanup
+
+> [!IMPORTANT]
+> **azd down Limitation**: The `azd down` command does not work for existing deployments due to the subscription-scoped template design.
+
+#### Option 1: Delete Everything with Cleanup Script (Recommended)
+
+```bash
+# Delete all azd-tagged resources
+python ../shared/scripts/cleanup.py --all --purge
+
+# Delete everything without confirmation  
+python ../shared/scripts/cleanup.py --all --purge --yes
+```
+
+#### Option 2: Manual Cleanup
+
+If you prefer manual cleanup, all deployed resources are tagged with your environment name (`azd-env-name=<your-env-name>`).
+
+**Azure CLI Method:**
+```bash
+# List resources first to see what will be deleted (replace 'your-env-name' with your actual environment name)
+az resource list --tag "azd-env-name=your-env-name" --query "[].{Name:name, Type:type, ResourceGroup:resourceGroup, Location:location}" -o table
+
+# Delete all resources with this tag
+az resource delete --ids $(az resource list --tag "azd-env-name=your-env-name" --query "[].id" -o tsv)
+```
+
+**Azure Portal Method:**
+- Navigate to your resource group in the Azure Portal
+- Filter resources by the tag `azd-env-name` with your environment name
+- Select and delete the resources you want to remove
+
+## Important Notes
+
+
+
+## Troubleshooting
+
+For comprehensive troubleshooting including GPT deployment issues, see [Troubleshooting](../../docs/deployment-guide.md#troubleshooting) in the deployment guide.
\ No newline at end of file
diff --git a/deploy/existing/azure.yaml b/deploy/existing/azure.yaml
new file mode 100644
index 0000000..fce1a81
--- /dev/null
+++ b/deploy/existing/azure.yaml
@@ -0,0 +1,35 @@
+# azure.yaml for existing workspace deployment
+name: healthcareai-examples-existing
+metadata:
+  template: Healthcare AI Examples - Existing Workspace (azd)
+
+# Azure infrastructure to be created and configured 
+# See https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/azd-schema
+infra:
+  provider: bicep
+  path: infra
+  module: main
+
+hooks:
+  preprovision:
+    posix:
+      shell: sh
+      run: |
+        python ../shared/scripts/preprovision.py --validate-existing --yes
+      interactive: true
+    windows:
+      shell: pwsh
+      run: |
+        python ../shared/scripts/preprovision.py --validate-existing --yes
+      interactive: true
+  postprovision:
+    posix:
+      shell: sh
+      run: |
+        python ../shared/scripts/postprovision.py --yes
+      interactive: true
+    windows:
+      shell: pwsh
+      run: |
+        python ../shared/scripts/postprovision.py --yes
+      interactive: true
diff --git a/deploy/existing/infra/main.bicep b/deploy/existing/infra/main.bicep
new file mode 100644
index 0000000..81d9de6
--- /dev/null
+++ b/deploy/existing/infra/main.bicep
@@ -0,0 +1,123 @@
+// main-existing.bicep
+// Subscription-scoped template
+// Uses an existing resource group and AML workspace; no creation logic
+
+targetScope = 'subscription'
+
+@description('Name of the environment')
+param environmentName string
+
+@description('Name of the resource group to use')
+param resourceGroupName string
+
+@description('Name of the Azure ML workspace')
+param workspaceName string
+
+@description('Tags to apply to all resources')
+param tags object = {}
+
+@description('Comma-separated list of model names to include (filter)')
+param modelFilterString string = ''
+
+@description('Azure region for deployment')
+param location string
+
+@description('Unique suffix for resource naming (overrideable for consistency)')
+param uniqueSuffix string = ''
+
+@description('Gen AI model name and version to deploy, leave empty to skip')
+@allowed(['','gpt-4o;2024-08-06', 'gpt-4.1;2025-04-14'])
+param gptModel string
+
+@description('Tokens per minute capacity for the model. Units of 1000 (capacity = 100 means 100K tokens per minute)')
+param gptModelCapacity int = 50
+
+@description('Azure region for GPT deployment (can be different from main location)')
+param gptDeploymentLocation string = ''
+
+@description('Principal ID to grant access to the AI services. Leave empty to skip')
+param myPrincipalId string = ''
+
+@description('Current principal type being used')
+@allowed(['User', 'ServicePrincipal'])
+param myPrincipalType string
+
+// AI Services configurations
+@description('Name of the AI Services account. Automatically generated if left blank')
+param aiServicesName string = ''
+
+// Variables for resource naming
+var effectiveGptLocation = empty(gptDeploymentLocation) ? location : gptDeploymentLocation
+var effectiveUniqueSuffix = empty(uniqueSuffix) ? substring(uniqueString('${subscription().id}/resourceGroups/${resourceGroupName}'), 0, 6) : uniqueSuffix
+
+// Default tags to apply to all resources
+var defaultTags = {
+  'azd-env-name': environmentName
+  'azd-deployment-type': 'existing'
+  'azd-deployed-by': 'azd'
+  'azd-id': effectiveUniqueSuffix
+  Environment: 'Non-Prod'
+}
+
+// Merge user tags with default tags
+var tagsUpdated = union(defaultTags, tags)
+
+// Azure resource abbreviations
+var abbrs = loadJsonContent('../../shared/abbreviations.json')
+
+// Centralized resource names
+var names = {
+  aiServices: !empty(aiServicesName) ? aiServicesName : '${abbrs.cognitiveServicesAccounts}${environmentName}-${effectiveUniqueSuffix}'
+}
+
+// Reference existing AML workspace
+resource existingWorkspace 'Microsoft.MachineLearningServices/workspaces@2024-10-01' existing = {
+  scope: resourceGroup(resourceGroupName)
+  name: workspaceName
+}
+
+// Deploy GPT services (AI Services + GPT model) if specified
+module gptServices '../../shared/aiServicesWithGpt.bicep' = if (!empty(gptModel)) {
+  name: 'gpt-services'
+  scope: resourceGroup(resourceGroupName)
+  params: {
+    location: effectiveGptLocation
+    aiServicesName: names.aiServices
+    gptModel: gptModel
+    gptModelCapacity: gptModelCapacity
+    tags: tagsUpdated
+    grantAccessTo: [
+      {
+        id: myPrincipalId
+        type: myPrincipalType
+      }
+    ]
+    additionalIdentities: []
+  }
+}
+
+// Deploy model endpoints into the specified resource group
+module modelDeploy '../../shared/deployModel.bicep' = {
+  name: 'deploy-models'
+  scope: resourceGroup(resourceGroupName)
+  params: {
+    workspaceName: workspaceName
+    location: location
+    tags: tagsUpdated
+    modelFilterString: modelFilterString
+    uniqueSuffix: effectiveUniqueSuffix
+  }
+}
+
+// Outputs
+output AZURE_SUBSCRIPTION_ID string      = subscription().subscriptionId
+output AZURE_RESOURCE_GROUP string       = resourceGroupName
+output AZUREML_WORKSPACE_ID string       = existingWorkspace.id
+output AZUREML_WORKSPACE_NAME string     = workspaceName
+output HLS_MODEL_ENDPOINTS array         = modelDeploy.outputs.endpoints
+output UNIQUE_SUFFIX string              = effectiveUniqueSuffix
+
+// GPT deployment outputs (conditional)
+output AZURE_OPENAI_ENDPOINT string      = !empty(gptModel) ? gptServices.outputs.gptEndpoint : ''
+output AZURE_OPENAI_MODEL_NAME string         = !empty(gptModel) ? gptServices.outputs.gptModelName : ''
+output AZURE_AI_SERVICES_NAME string     = !empty(gptModel) ? gptServices.outputs.aiServicesName : ''
diff --git a/deploy/existing/infra/main.parameters.json b/deploy/existing/infra/main.parameters.json
new file mode 100644
index 0000000..ad822cc
--- /dev/null
+++ b/deploy/existing/infra/main.parameters.json
@@ -0,0 +1,60 @@
+{
+  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
+  "contentVersion": "1.0.0.0",
+  "parameters": {
+    "environmentName": {
+      "value": "${AZURE_ENV_NAME}"
+    },
+    "location": {
+      "value": "${AZURE_LOCATION}"
+    },
+    "workspaceName": {
+      "value": "${AZUREML_WORKSPACE_NAME}"
+    },
+    "uniqueSuffix": {
+      "value": "${UNIQUE_SUFFIX}"
+    },
+    "modelFilterString": {
+      "value": "${HLS_MODEL_FILTER}"
+    },
+    "resourceGroupName": {
+      "value": "${AZURE_RESOURCE_GROUP}"
+    },
+    "gptModel": {
+      "value": "${AZURE_GPT_MODEL}"
+    },
+    "gptModelCapacity": {
+      "value": "${AZURE_GPT_CAPACITY}"
+    },
+    "gptDeploymentLocation": {
+      "value": "${AZURE_GPT_LOCATION}"
+    },
+    "myPrincipalType": {
+      "value": "${AZURE_PRINCIPAL_TYPE}"
+    },
+    "aiServicesName": {
+      "value": "${AZURE_AISERVICES_NAME}"
+    },
+    "aiHubName": {
+      "value": "${AZURE_AIHUB_NAME}"
+    },
+    "aiProjectName": {
+      "value": "${AZURE_AIPROJECT_NAME}"
+    },
+    "storageName": {
+      "value": "${AZURE_STORAGE_NAME}"
+    },
+    "keyVaultName": {
+      "value": "${AZURE_KEYVAULT_NAME}"
+    },
+    "containerRegistryName": {
+      "value": "${AZURE_CONTAINER_REGISTRY_NAME}"
+    },
+    "myPrincipalId": {
+      "value": "${AZURE_PRINCIPAL_ID}"
+    },
+    "allowSharedKeyAccess": {
+      "value": "${AZURE_STORAGE_ALLOW_ACCESS_KEY}"
+    }
+  }
+}
\ No newline at end of file
diff --git a/deploy/existing/setup_azd_env_for_aml.sh b/deploy/existing/setup_azd_env_for_aml.sh
new file mode 100644
index 0000000..451b38e
--- /dev/null
+++ b/deploy/existing/setup_azd_env_for_aml.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+# Exit on error, undefined variables, and pipe failures
+set -euo pipefail
+
+#  NOTE: azd down IS NOT FUNCTIONAL for existing deployments
+#
+#  This script configures azd to deploy into your CURRENT AzureML workspace.
+#  The 'azd down' command does not work for existing deployments and will not
+#  delete any resources. Use '../shared/scripts/cleanup.py' to clean up
+#  deployed resources when finished.
+#
+#  Only use this environment for deploying endpoints. For cleanup, use the
+#  cleanup.py script provided in the shared/scripts folder.
+
+# Setup color variables if supported
+if command -v tput &>/dev/null; then
+  RED=$(tput setaf 1)
+  GREEN=$(tput setaf 2)
+  YELLOW=$(tput setaf 3)
+  BLUE=$(tput setaf 4)
+  BOLD=$(tput bold)
+  RESET=$(tput sgr0)
+else
+  RED=""; GREEN=""; YELLOW=""; BLUE=""; BOLD=""; RESET=""
+fi
+
+# Script to configure the current azd environment for deploying into the current AzureML workspace (when running from an AzureML compute instance)
+# Usage: ./setup_azd_env_for_aml.sh
+
+# Get the current azd environment
+CURRENT_ENV=$(azd env get-value AZURE_ENV_NAME 2>/dev/null | grep -v "^ERROR:" || echo "")
+
+if [ -z "$CURRENT_ENV" ]; then
+    echo "Error: No current azd environment found. Please run 'azd env select <env-name>' first or create a new environment with 'azd env new <env-name>'."
+    exit 1
+fi
+
+ENV_NAME="$CURRENT_ENV"
+echo "Using current azd environment: $ENV_NAME"
+
+# Check if running on AzureML compute instance
+if [ "${APPSETTING_WEBSITE_SITE_NAME}" != "AMLComputeInstance" ]; then
+    echo "This script is intended to be run from within an AzureML compute instance."
+    exit 1
+fi
+
+echo "Detected AzureML compute instance environment."
+
+WORKSPACE_NAME="${CI_WORKSPACE}"
+RESOURCE_GROUP="${CI_RESOURCE_GROUP}"
+
+if [ -z "$WORKSPACE_NAME" ] || [ -z "$RESOURCE_GROUP" ]; then
+    echo "Error: Could not detect workspace info from AzureML compute instance."
+    exit 1
+fi
+
+echo "Auto-detected workspace: $WORKSPACE_NAME in resource group: $RESOURCE_GROUP"
+
+# We're using the current environment, no need to create or select
+
+# Function to set azd environment variable with echo
+azd_env_set() {
+    echo "Setting $1 = $2"
+    azd env set "$1" "$2"
+}
+
+# Set azd environment variables using helper
+azd_env_set AZUREML_WORKSPACE_NAME "$WORKSPACE_NAME"
+azd_env_set AZURE_RESOURCE_GROUP "$RESOURCE_GROUP"
+
+# Get subscription ID
+SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+if [ -z "$SUBSCRIPTION_ID" ]; then
+    echo "Error: Failed to retrieve subscription ID. Please ensure you are logged into Azure CLI."
+    exit 1
+fi
+if [[ ! "$SUBSCRIPTION_ID" =~ ^[a-zA-Z0-9-]+$ ]]; then
+  echo "Error: Invalid subscription ID format '$SUBSCRIPTION_ID'. Azure subscription IDs should be alphanumeric, may include dashes (e.g., 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx')."
+  exit 1
+fi
+azd_env_set AZURE_SUBSCRIPTION_ID "$SUBSCRIPTION_ID"
+
+# Set location from AzureML workspace
+LOCATION=$(az ml workspace show --name "$WORKSPACE_NAME" --resource-group "$RESOURCE_GROUP" --subscription "$SUBSCRIPTION_ID" --query location -o tsv)
+if [ -z "$LOCATION" ]; then
+    echo "Error: Failed to retrieve workspace location. Please verify the workspace exists and you have access."
+    exit 1
+fi
+# Validate location format (Azure locations are alphanumeric, no spaces)
+if [[ ! "$LOCATION" =~ ^[a-zA-Z0-9]+$ ]]; then
+    echo "Error: Invalid location format '$LOCATION'. Azure locations should be alphanumeric with no spaces (e.g., 'eastus', 'westeurope')."
+    exit 1
+fi
+azd_env_set AZURE_LOCATION "$LOCATION"
+
+# Success info
+echo ""
+echo "${GREEN}Environment '$ENV_NAME' successfully configured for AzureML workspace '$WORKSPACE_NAME'.${RESET}"
+echo "${GREEN}Run 'azd up' to deploy endpoints into this workspace.${RESET}"
+echo "${GREEN}Or run '../shared/scripts/select_models.py' to choose specific models before deployment.${RESET}"
diff --git a/deploy/fresh/README.md b/deploy/fresh/README.md
new file mode 100644
index 0000000..3af1bcb
--- /dev/null
+++ b/deploy/fresh/README.md
@@ -0,0 +1,136 @@
+# Deploy to Fresh Environment
+
+This deployment option creates a completely new Azure environment with a fresh resource group and Azure ML workspace. Use this option when you want isolated resources or are setting up a new development environment.
+
+## Prerequisites
+
+- Azure CLI and Azure Developer CLI (azd) installed
+- **Owner** role OR **User Access Administrator + Contributor** roles on the Azure subscription (required for creating resource groups and configuring role-based access controls)
+- Required quota for the models you plan to deploy (see main README Step 1)
+
+> [!NOTE]
+> **For Admins**: You can deploy on behalf of another user by setting `AZURE_PRINCIPAL_ID` to their Azure AD object ID. This grants the target user access to the deployed resources while you maintain deployment permissions.
+
+## What Gets Created
+
+### Core Infrastructure
+- **Resource Group**: New resource group containing all resources
+- **Azure ML Workspace**: Fresh workspace with system-assigned managed identity
+- **Storage Account**: For workspace data and artifacts
+- **Key Vault**: For secrets and keys management
+- **Container Registry**: For model containers
+- **Application Insights**: For monitoring and logging
+- **Healthcare AI Model Endpoints**: Deployed models ready for inference
+
+### GPT Integration (when enabled)
+- **Azure AI Services**: Multi-service AI account for GPT models
+- **GPT Model Endpoints**: Ready for use alongside healthcare AI models
+
+> [!TIP]
+> **Resource Naming**: Resources are created with unique names using a hash of the resource group ID to avoid naming conflicts.
+
+## Deployment Steps
+
+> [!NOTE]
+> See [Deployment Configuration](../../docs/deployment-guide.md#deployment-configuration) for detailed setup options.
+
+### 1. Navigate to Fresh Deployment Directory
+
+```bash
+cd deploy/fresh
+```
+
+### 2. Authenticate with Azure
+
+```bash
+az login                 # add -t <TENANT_ID> if needed
+azd auth login           # add --tenant <TENANT_ID> if needed
+```
+
+### 3. Create and Configure Environment
+
+```bash
+# Create a new azd environment
+azd env new <envName>
+
+# Set your Azure location (use a location where you have quota)
+azd env set AZURE_LOCATION <location>
+```
+
+### 4. Optional: Select Specific Healthcare AI Models
+
+See [Model Selection](../../docs/deployment-guide.md#model-selection) in the deployment guide for filtering which models to deploy.
+
+### 5. Optional: Configure GPT Model Deployment
+
+See [GPT Model Configuration](../../docs/deployment-guide.md#gpt-model-configuration) in the deployment guide for GPT model setup options.
+
+### 6. Deploy Resources
+
+```bash
+azd up
+```
+
+This command will:
+- Create a new resource group
+- Deploy a new Azure ML workspace with associated resources (storage, key vault, container registry, etc.)
+- Deploy the selected healthcare AI model endpoints
+- **If GPT model specified**: Deploy Azure AI Services and GPT model
+- Configure your `.env` file with connection details
+
+
+## Environment Variables
+
+After successful deployment, your root level `.env` file should contain:
+
+```bash
+# Healthcare AI model endpoints
+MI2_MODEL_ENDPOINT=<medimageinsight-endpoint-id>
+MIP_MODEL_ENDPOINT=<medimageparse-endpoint-id>
+CXRREPORTGEN_MODEL_ENDPOINT=<cxrreportgen-endpoint-id>
+
+# GPT integration variables (if GPT model was deployed)
+AZURE_OPENAI_ENDPOINT=<gpt-endpoint-uri>
+AZURE_OPENAI_MODEL_NAME=<gpt-model-name>
+AZURE_OPENAI_API_KEY=<api-key>
+```
+
+## Next Steps
+After successful deployment, change back to the root directory:
+
+```bash
+cd ../../
+```
+
+Then return to the main README and continue with [Step 4: Setup your local environment](../../README.md#step-4-setup-your-local-environment) to install the Healthcare AI Toolkit and explore the examples.
+
+## Resource Cleanup
+
+### Quick Cleanup - Model Deployments Only (Recommended)
+
+To save costs by stopping expensive GPU compute resources while keeping your infrastructure:
+
+```bash
+# Delete model deployments only (they charge per hour)
+python ../shared/scripts/cleanup.py
+
+# Delete without confirmation
+python ../shared/scripts/cleanup.py --yes
+```
+This removes only the model endpoint deployments that charge per hour, while keeping the infrastructure (workspace, storage, etc.) for future use. See [Resource Cleanup](../../docs/deployment-guide.md#resource-cleanup) in the deployment guide for more details.
+
+### Complete Resource Cleanup
+
+When you're done with the deployment and want to delete all resources:
+
+```bash
+azd down --purge
+```
+This removes the entire resource group and all contained resources.
+
+> [!NOTE]
+> **Known Issue**: `azd down --purge` will not actually purge the Azure ML workspace due to a known limitation. The workspace will be soft-deleted and can be recovered within the retention period.
+
+## Troubleshooting
+
+See [Troubleshooting](../../docs/deployment-guide.md#troubleshooting)
\ No newline at end of file
diff --git a/deploy/fresh/azure.yaml b/deploy/fresh/azure.yaml
new file mode 100644
index 0000000..cae0324
--- /dev/null
+++ b/deploy/fresh/azure.yaml
@@ -0,0 +1,23 @@
+# azure.yaml for fresh workspace deployment
+name: healthcareai-examples-fresh
+metadata:
+  template: Healthcare AI Examples - Fresh Workspace (azd)
+
+# Azure infrastructure to be created and configured 
+# See https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/azd-schema
+infra:
+  provider: bicep
+  path: infra
+  module: main
+
+hooks:
+  preprovision:
+    shell: sh
+    run: |
+      python ../shared/scripts/preprovision.py --yes
+    interactive: true
+  postprovision:
+    shell: sh
+    run: |
+      python ../shared/scripts/postprovision.py --yes
+    interactive: true
diff --git a/deploy/fresh/infra/main.bicep b/deploy/fresh/infra/main.bicep
new file mode 100644
index 0000000..f929d17
--- /dev/null
+++ b/deploy/fresh/infra/main.bicep
@@ -0,0 +1,178 @@
+// Resource-group-scoped template
+// Creates a fresh AML workspace with all supporting infrastructure
+
+targetScope = 'resourceGroup'
+
+// ============================================================================
+// PARAMETERS - Basic Configuration
+// ============================================================================
+
+@description('Name of the environment')
+param environmentName string
+
+@description('Name of the Azure ML workspace')
+param workspaceName string = 'mlw-${environmentName}'
+
+@description('Tags to apply to all resources')
+param tags object = {}
+
+@description('Comma-separated list of model names to include (filter)')
+param modelFilterString string = ''
+
+@description('Azure region for deployment')
+param location string = resourceGroup().location
+
+@description('Unique suffix for resource naming (overrideable for consistency)')
+param uniqueSuffix string = ''
+
+// ============================================================================
+// PARAMETERS - GPT Configuration
+// ============================================================================
+
+@description('Gen AI model name and version to deploy, leave empty to skip')
+@allowed(['','gpt-4o;2024-08-06', 'gpt-4.1;2025-04-14'])
+param gptModel string
+
+@description('Tokens per minute capacity for the model. Units of 1000 (capacity = 100 means 100K tokens per minute)')
+param gptModelCapacity int = 100
+
+@description('Azure region for GPT deployment (can be different from main location)')
+param gptDeploymentLocation string = ''
+
+// ============================================================================
+// PARAMETERS - Access Control
+// ============================================================================
+
+@description('Principal ID to grant access to the AI services. Leave empty to skip')
+param myPrincipalId string = ''
+
+@description('Current principal type being used')
+@allowed(['User', 'ServicePrincipal'])
+param myPrincipalType string
+
+// ============================================================================
+// PARAMETERS - Resource Names (Optional Overrides)
+// ============================================================================
+
+// AI Services configurations
+@description('Name of the AI Services account. Automatically generated if left blank')
+param aiServicesName string = ''
+
+@description('Name of the Storage Account. Automatically generated if left blank')
+param storageName string = ''
+
+@description('Name of the Key Vault. Automatically generated if left blank')
+param keyVaultName string = ''
+
+@description('Name of the Container Registry. Automatically generated if left blank')
+param containerRegistryName string = ''
+
+@description('Allow shared key access to storage account (AZURE_STORAGE_ALLOW_ACCESS_KEY environment variable, default: false)')
+param allowSharedKeyAccess bool = false
+
+// ============================================================================
+// VARIABLES - Configuration and Naming
+// ============================================================================
+var effectiveUniqueSuffix = empty(uniqueSuffix) ? substring(uniqueString(resourceGroup().id), 0, 6) : uniqueSuffix
+var effectiveGptLocation = empty(gptDeploymentLocation) ? location : gptDeploymentLocation
+
+var environmentNameTrunc = substring(((replace(replace(environmentName, '-', ''), '_', ''))),0,10)
+
+
+
+// Default tags to apply to all resources
+var defaultTags = {
+  'azd-env-name': environmentName
+  'azd-deployment-type': 'fresh'
+  'azd-deployed-by': 'azd'
+  'azd-id': effectiveUniqueSuffix
+  Environment: 'Non-Prod'
+}
+
+// Merge user tags with default tags
+var tagsUpdated = union(defaultTags, tags)
+
+// Azure resource abbreviations
+var abbrs = loadJsonContent('../../shared/abbreviations.json')
+
+// Centralized resource names
+var names = {
+  storage: !empty(storageName) ? storageName : '${abbrs.storageStorageAccounts}${environmentNameTrunc}${effectiveUniqueSuffix}'
+  keyVault: !empty(keyVaultName) ? keyVaultName : '${abbrs.keyVaultVaults}${environmentNameTrunc}${effectiveUniqueSuffix}'
+  containerRegistry: !empty(containerRegistryName) ? containerRegistryName : '${abbrs.containerRegistryRegistries}${environmentNameTrunc}${effectiveUniqueSuffix}'
+  aiServices: !empty(aiServicesName) ? aiServicesName : '${abbrs.cognitiveServicesAccounts}${environmentName}-${effectiveUniqueSuffix}'
+}
+
+// Create AML workspace with all its dependencies
+module workspace '../../shared/amlWorkspace.bicep' = {
+  name: 'aml-workspace'
+  scope: resourceGroup()
+  params: {
+    location: location
+    workspaceName: workspaceName
+    tags: tagsUpdated
+    storageAccountName: names.storage
+    keyVaultName: names.keyVault
+    containerRegistryName: names.containerRegistry
+    allowSharedKeyAccess: allowSharedKeyAccess
+    grantAccessTo: [
+      {
+        id: myPrincipalId
+        type: myPrincipalType
+      }
+    ]
+    additionalIdentities: []
+  }
+}
+
+// Deploy GPT services (AI Services + GPT model) if specified
+module gptServices '../../shared/aiServicesWithGpt.bicep' = if (!empty(gptModel)) {
+  name: 'gpt-services'
+  scope: resourceGroup()
+  params: {
+    location: effectiveGptLocation
+    aiServicesName: names.aiServices
+    gptModel: gptModel
+    gptModelCapacity: gptModelCapacity
+    tags: tagsUpdated
+    grantAccessTo: [
+      {
+        id: myPrincipalId
+        type: myPrincipalType
+      }
+    ]
+    additionalIdentities: []
+  }
+}
+
+// Deploy model endpoints into this resource group
+module modelDeploy '../../shared/deployModel.bicep' = {
+  name: 'deploy-models'
+  scope: resourceGroup()
+  params: {
+    workspaceName: workspace.outputs.workspaceName
+    location: location
+    tags: tagsUpdated
+    modelFilterString: modelFilterString
+    uniqueSuffix: effectiveUniqueSuffix
+  }
+}
+
+// Outputs
+output AZURE_SUBSCRIPTION_ID string           = subscription().subscriptionId
+output AZURE_RESOURCE_GROUP_ID string         = resourceGroup().id
+output AZURE_RESOURCE_GROUP string            = resourceGroup().name
+output AZUREML_WORKSPACE_ID string            = workspace.outputs.workspaceId
+output AZUREML_WORKSPACE_NAME string          = workspace.outputs.workspaceName
+
+output AZUREML_WORKSPACE_LOCATION string       = workspace.outputs.workspaceLocation
+output AZUREML_STORAGE_ACCOUNT_ENDPOINT string = workspace.outputs.storageAccountBlobEndpoint
+output AZUREML_KEY_VAULT_ENDPOINT string       = workspace.outputs.keyVaultEndpoint
+
+output UNIQUE_SUFFIX string                   = effectiveUniqueSuffix
+output HLS_MODEL_ENDPOINTS array              = modelDeploy.outputs.endpoints
+
+// GPT deployment outputs (conditional)
+output AZURE_OPENAI_ENDPOINT string      = !empty(gptModel) ? gptServices.outputs.gptEndpoint : ''
+output AZURE_OPENAI_MODEL_NAME string         = !empty(gptModel) ? gptServices.outputs.gptModelName : ''
+output AZURE_AI_SERVICES_NAME string     = !empty(gptModel) ? gptServices.outputs.aiServicesName : ''
diff --git a/deploy/fresh/infra/main.parameters.json b/deploy/fresh/infra/main.parameters.json
new file mode 100644
index 0000000..0d888ef
--- /dev/null
+++ b/deploy/fresh/infra/main.parameters.json
@@ -0,0 +1,57 @@
+{
+  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
+  "contentVersion": "1.0.0.0",
+  "parameters": {
+    "environmentName": {
+      "value": "${AZURE_ENV_NAME}"
+    },
+    "location": {
+      "value": "${AZURE_LOCATION}"
+    },
+    "workspaceName": {
+      "value": "${AZUREML_WORKSPACE_NAME}"
+    },
+    "uniqueSuffix": {
+      "value": "${UNIQUE_SUFFIX}"
+    },
+    "modelFilterString": {
+      "value": "${HLS_MODEL_FILTER}"
+    },
+    "gptModel": {
+      "value": "${AZURE_GPT_MODEL}"
+    },
+    "gptModelCapacity": {
+      "value": "${AZURE_GPT_CAPACITY}"
+    },
+    "gptDeploymentLocation": {
+      "value": "${AZURE_GPT_LOCATION}"
+    },
+    "myPrincipalType": {
+      "value": "${AZURE_PRINCIPAL_TYPE}"
+    },
+    "aiServicesName": {
+      "value": "${AZURE_AISERVICES_NAME}"
+    },
+    "aiHubName": {
+      "value": "${AZURE_AIHUB_NAME}"
+    },
+    "aiProjectName": {
+      "value": "${AZURE_AIPROJECT_NAME}"
+    },
+    "storageName": {
+      "value": "${AZURE_STORAGE_NAME}"
+    },
+    "keyVaultName": {
+      "value": "${AZURE_KEYVAULT_NAME}"
+    },
+    "containerRegistryName": {
+      "value": "${AZURE_CONTAINER_REGISTRY_NAME}"
+    },
+    "myPrincipalId": {
+      "value": "${AZURE_PRINCIPAL_ID}"
+    },
+    "allowSharedKeyAccess": {
+      "value": "${AZURE_STORAGE_ALLOW_ACCESS_KEY}"
+    }
+  }
+}
\ No newline at end of file
diff --git a/deploy/shared/abbreviations.json b/deploy/shared/abbreviations.json
new file mode 100644
index 0000000..1759be6
--- /dev/null
+++ b/deploy/shared/abbreviations.json
@@ -0,0 +1,140 @@
+{
+  "analysisServicesServers": "as",
+  "apiManagementService": "apim-",
+  "appConfigurationConfigurationStores": "appcs-",
+  "appManagedEnvironments": "cae-",
+  "appContainerApps": "ca-",
+  "authorizationPolicyDefinitions": "policy-",
+  "automationAutomationAccounts": "aa-",
+  "blueprintBlueprints": "bp-",
+  "blueprintBlueprintsArtifacts": "bpa-",
+  "cacheRedis": "redis-",
+  "cdnProfiles": "cdnp-",
+  "cdnProfilesEndpoints": "cdne-",
+  "cognitiveServicesAccounts": "cog-",
+  "cognitiveServicesAzureAI": "cog-ai-",
+  "cognitiveServicesBing": "cog-bg-",
+  "cognitiveServicesOpenAI": "cog-oa-",
+  "cognitiveServicesSpeech": "cog-spc-",
+  "cognitiveServicesFormRecognizer": "cog-fr-",
+  "cognitiveServicesTextAnalytics": "cog-ta-",
+  "cognitiveServicesBot": "cog-bot-",
+  "computeAvailabilitySets": "avail-",
+  "computeCloudServices": "cld-",
+  "computeDiskEncryptionSets": "des",
+  "computeDisks": "disk",
+  "computeDisksOs": "osdisk",
+  "computeGalleries": "gal",
+  "computeSnapshots": "snap-",
+  "computeVirtualMachines": "vm",
+  "computeVirtualMachineScaleSets": "vmss-",
+  "containerInstanceContainerGroups": "ci",
+  "containerRegistryRegistries": "cr",
+  "containerServiceManagedClusters": "aks-",
+  "databricksWorkspaces": "dbw-",
+  "dataFactoryFactories": "adf-",
+  "dataLakeAnalyticsAccounts": "dla",
+  "dataLakeStoreAccounts": "dls",
+  "dataMigrationServices": "dms-",
+  "dBforMySQLServers": "mysql-",
+  "dBforPostgreSQLServers": "psql-",
+  "devicesIotHubs": "iot-",
+  "devicesProvisioningServices": "provs-",
+  "devicesProvisioningServicesCertificates": "pcert-",
+  "documentDBDatabaseAccounts": "cosmos-",
+  "eventGridDomains": "evgd-",
+  "eventGridDomainsTopics": "evgt-",
+  "eventGridEventSubscriptions": "evgs-",
+  "eventHubNamespaces": "evhns-",
+  "eventHubNamespacesEventHubs": "evh-",
+  "hdInsightClustersHadoop": "hadoop-",
+  "hdInsightClustersHbase": "hbase-",
+  "hdInsightClustersKafka": "kafka-",
+  "hdInsightClustersMl": "mls-",
+  "hdInsightClustersSpark": "spark-",
+  "hdInsightClustersStorm": "storm-",
+  "hybridComputeMachines": "arcs-",
+  "insightsActionGroups": "ag-",
+  "insightsComponents": "appi-",
+  "keyVaultVaults": "kv-",
+  "kubernetesConnectedClusters": "arck",
+  "kustoClusters": "dec",
+  "kustoClustersDatabases": "dedb",
+  "logicIntegrationAccounts": "ia-",
+  "logicWorkflows": "logic-",
+  "machineLearningServicesWorkspaces": "mlw-",
+  "managedIdentityUserAssignedIdentities": "id-",
+  "managementManagementGroups": "mg-",
+  "migrateAssessmentProjects": "migr-",
+  "networkApplicationGateways": "agw-",
+  "networkApplicationSecurityGroups": "asg-",
+  "networkAzureFirewalls": "afw-",
+  "networkBastionHosts": "bas-",
+  "networkConnections": "con-",
+  "networkDnsZones": "dnsz-",
+  "networkExpressRouteCircuits": "erc-",
+  "networkFirewallPolicies": "afwp-",
+  "networkFirewallPoliciesWebApplication": "waf",
+  "networkFirewallPoliciesRuleGroups": "wafrg",
+  "networkFrontDoors": "fd-",
+  "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-",
+  "networkLoadBalancersExternal": "lbe-",
+  "networkLoadBalancersInternal": "lbi-",
+  "networkLoadBalancersInboundNatRules": "rule-",
+  "networkLocalNetworkGateways": "lgw-",
+  "networkNatGateways": "ng-",
+  "networkNetworkInterfaces": "nic-",
+  "networkNetworkSecurityGroups": "nsg-",
+  "networkNetworkSecurityGroupsSecurityRules": "nsgsr-",
+  "networkNetworkWatchers": "nw-",
+  "networkPrivateDnsZones": "pdnsz-",
+  "networkPrivateLinkServices": "pl-",
+  "networkPublicIPAddresses": "pip-",
+  "networkPublicIPPrefixes": "ippre-",
+  "networkRouteFilters": "rf-",
+  "networkRouteTables": "rt-",
+  "networkRouteTablesRoutes": "udr-",
+  "networkTrafficManagerProfiles": "traf-",
+  "networkVirtualNetworkGateways": "vgw-",
+  "networkVirtualNetworks": "vnet-",
+  "networkVirtualNetworksSubnets": "snet-",
+  "networkVirtualNetworksVirtualNetworkPeerings": "peer-",
+  "networkVirtualWans": "vwan-",
+  "networkVpnGateways": "vpng-",
+  "networkVpnGatewaysVpnConnections": "vcn-",
+  "networkVpnGatewaysVpnSites": "vst-",
+  "notificationHubsNamespaces": "ntfns-",
+  "notificationHubsNamespacesNotificationHubs": "ntf-",
+  "operationalInsightsWorkspaces": "log-",
+  "portalDashboards": "dash-",
+  "powerBIDedicatedCapacities": "pbi-",
+  "purviewAccounts": "pview-",
+  "recoveryServicesVaults": "rsv-",
+  "resourcesResourceGroups": "rg-",
+  "searchSearchServices": "srch-",
+  "serviceBusNamespaces": "sb-",
+  "serviceBusNamespacesQueues": "sbq-",
+  "serviceBusNamespacesTopics": "sbt-",
+  "serviceEndPointPolicies": "se-",
+  "serviceFabricClusters": "sf-",
+  "signalRServiceSignalR": "sigr",
+  "sqlManagedInstances": "sqlmi-",
+  "sqlServers": "sql-",
+  "sqlServersDataWarehouse": "sqldw-",
+  "sqlServersDatabases": "sqldb-",
+  "sqlServersDatabasesStretch": "sqlstrdb-",
+  "storageStorageAccounts": "st",
+  "storageStorageAccountsVm": "stvm",
+  "storSimpleManagers": "ssimp",
+  "streamAnalyticsCluster": "asa-",
+  "synapseWorkspaces": "syn",
+  "synapseWorkspacesAnalyticsWorkspaces": "synw",
+  "synapseWorkspacesSqlPoolsDedicated": "syndp",
+  "synapseWorkspacesSqlPoolsSpark": "synsp",
+  "timeSeriesInsightsEnvironments": "tsi-",
+  "webServerFarms": "plan-",
+  "webSitesAppService": "app-",
+  "webSitesAppServiceEnvironment": "ase-",
+  "webSitesFunctions": "func-",
+  "webStaticSites": "stapp-"
+}
diff --git a/deploy/shared/aiServices.bicep b/deploy/shared/aiServices.bicep
new file mode 100644
index 0000000..bbcc51b
--- /dev/null
+++ b/deploy/shared/aiServices.bicep
@@ -0,0 +1,86 @@
+// AI Services module for deploying Azure Cognitive Services with GPT models
+// This module creates an AI Services account that can host GPT models
+
+targetScope = 'resourceGroup'
+
+@description('Azure region for deployment')
+param location string
+
+@description('Name of the AI Services account')
+param aiServicesName string
+
+@description('Tags to apply to all resources')
+param tags object = {}
+
+@description('List of principals to grant Cognitive Services OpenAI User role')
+param grantAccessTo array = []
+
+@description('Additional managed identities to grant access to')
+param additionalIdentities array = []
+
+@description('SKU for the AI Services account')
+@allowed(['S0', 'F0'])
+param sku string = 'S0'
+
+@description('Whether to disable local authentication')
+param disableLocalAuth bool = false
+
+@description('Public network access setting')
+@allowed(['Enabled', 'Disabled'])
+param publicNetworkAccess string = 'Enabled'
+
+// Create the AI Services account
+resource aiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' = {
+  name: aiServicesName
+  location: location
+  tags: tags
+  kind: 'AIServices'
+  sku: {
+    name: sku
+  }
+  properties: {
+    apiProperties: {}
+    customSubDomainName: toLower(aiServicesName)
+    disableLocalAuth: disableLocalAuth
+    publicNetworkAccess: publicNetworkAccess
+    networkAcls: {
+      defaultAction: 'Allow'
+    }
+  }
+  identity: {
+    type: 'SystemAssigned'
+  }
+}
+
+// Role definition for Cognitive Services OpenAI User
+var cognitiveServicesOpenAIUserRole = 'a97b65f3-24c7-4388-baec-2e87135dc908'
+
+// Grant access to specified principals (only if local auth is disabled)
+resource roleAssignments 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessTo: if (!empty(principal.id) && disableLocalAuth) {
+    name: guid(aiServices.id, principal.id, cognitiveServicesOpenAIUserRole)
+    scope: aiServices
+    properties: {
+      roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', cognitiveServicesOpenAIUserRole)
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+// Grant access to additional managed identities (only if local auth is disabled)
+resource additionalRoleAssignments 'Microsoft.Authorization/roleAssignments@2022-04-01' = [for (identity, index) in additionalIdentities: if (disableLocalAuth) {
+  name: guid(aiServices.id, identity, cognitiveServicesOpenAIUserRole, 'additional')
+  scope: aiServices
+  properties: {
+    roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', cognitiveServicesOpenAIUserRole)
+    principalId: identity
+    principalType: 'ServicePrincipal'
+  }
+}]
+
+// Outputs
+output aiServicesId string = aiServices.id
+output aiServicesName string = aiServices.name
+output aiServicesEndpoint string = aiServices.properties.endpoint
+output aiServicesPrincipalId string = aiServices.identity.principalId
diff --git a/deploy/shared/aiServicesWithGpt.bicep b/deploy/shared/aiServicesWithGpt.bicep
new file mode 100644
index 0000000..bc39dde
--- /dev/null
+++ b/deploy/shared/aiServicesWithGpt.bicep
@@ -0,0 +1,60 @@
+// aiServicesWithGpt.bicep
+// Combined module for AI Services and GPT deployment
+// This module creates AI Services and GPT model, using externally provided infrastructure
+
+@description('Azure region for deployment')
+param location string
+
+@description('Name of the AI Services account')
+param aiServicesName string
+
+@description('Tags to apply to all resources')
+param tags object = {}
+
+@description('Identities to grant access to the AI Services account')
+param grantAccessTo array = []
+
+@description('Additional managed identities to assign to the AI Services account')
+param additionalIdentities array = []
+
+@description('GPT model name and version to deploy (format: "model;version")')
+param gptModel string
+
+@description('Tokens per minute capacity for the model. Units of 1000 (capacity = 50 means 50K tokens per minute)')
+param gptModelCapacity int = 50
+
+// Note: This module only creates AI Services and GPT deployment
+// Infrastructure connections are handled by the AI Hub module
+
+// Create AI Services account
+module aiServices './aiServices.bicep' = {
+  name: 'ai-services'
+  params: {
+    location: location
+    aiServicesName: aiServicesName
+    tags: tags
+    grantAccessTo: grantAccessTo
+    additionalIdentities: additionalIdentities
+  }
+}
+
+// Deploy GPT model to the AI Services account
+module gptDeployment './gptDeployment.bicep' = {
+  name: 'gpt-deployment'
+  params: {
+    aiServicesName: aiServices.outputs.aiServicesName
+    gptModel: gptModel
+    gptModelCapacity: gptModelCapacity
+    tags: tags
+  }
+}
+
+// Outputs
+output aiServicesName string = aiServices.outputs.aiServicesName
+output aiServicesEndpoint string = aiServices.outputs.aiServicesEndpoint
+output aiServicesId string = aiServices.outputs.aiServicesId
+
+output gptEndpoint string = gptDeployment.outputs.endpoint
+output gptDeploymentName string = gptDeployment.outputs.deploymentName
+output gptModelName string = gptDeployment.outputs.modelName
+output gptModelVersion string = gptDeployment.outputs.modelVersion
diff --git a/deploy/shared/amlWorkspace.bicep b/deploy/shared/amlWorkspace.bicep
new file mode 100644
index 0000000..e80777c
--- /dev/null
+++ b/deploy/shared/amlWorkspace.bicep
@@ -0,0 +1,232 @@
+targetScope = 'resourceGroup'
+
+// -----------------------------------------------------------------------------
+// Required parameters
+// -----------------------------------------------------------------------------
+@description('Azure region for the workspace')
+param location string
+
+@description('Name of the workspace')
+param workspaceName string
+
+@description('Tags to apply to resources')
+param tags object = {}
+
+@description('Storage account name')
+param storageAccountName string
+
+@description('Key vault name')
+param keyVaultName string
+
+@description('Container registry name')
+param containerRegistryName string
+
+@description('List of principals to grant access to the workspace')
+param grantAccessTo array = []
+
+@description('Additional managed identities to assign to the workspace')
+param additionalIdentities array = []
+
+@description('Whether to allow shared key access for the storage account and use identity-based access')
+param allowSharedKeyAccess bool = true
+
+@sys.description('Optional. The authentication mode used by the workspace when connecting to the default storage account.')
+@allowed([
+  'AccessKey'
+  'Identity'
+])
+param systemDatastoresAuthMode string = allowSharedKeyAccess ? 'AccessKey' : 'Identity'
+
+// Combine grantAccessTo with additionalIdentities
+var access = [for i in range(0, length(additionalIdentities)): {
+  id: additionalIdentities[i]
+  type: 'ServicePrincipal'
+}]
+
+// Add workspace identity to the access list after workspace is created (using dependsOn in role assignments)
+var grantAccessToUpdated = concat(grantAccessTo, access)
+
+// -----------------------------------------------------------------------------
+// Resources
+// -----------------------------------------------------------------------------
+
+// Create storage account
+module storageAccount 'storageAccount.bicep' = {
+  name: 'storage-account'
+  params: {
+    storageAccountName: storageAccountName
+    location: location
+    allowSharedKeyAccess: allowSharedKeyAccess
+    tags: tags
+    grantAccessTo: grantAccessToUpdated
+    additionalIdentities: []
+  }
+}
+
+// Create key vault
+module keyVault 'keyVault.bicep' = {
+  name: 'key-vault'
+  params: {
+    keyVaultName: keyVaultName
+    location: location
+    tags: tags
+    grantAccessTo: grantAccessToUpdated
+    additionalIdentities: []
+  }
+}
+
+// Create container registry
+module containerRegistry 'containerRegistry.bicep' = {
+  name: 'container-registry'
+  params: {
+    containerRegistryName: containerRegistryName
+    location: location
+    tags: tags
+    grantAccessTo: grantAccessToUpdated
+    additionalIdentities: []
+  }
+}
+
+// Create an Application Insights instance
+resource appInsights 'Microsoft.Insights/components@2020-02-02' = {
+  name: 'ai-${uniqueString(resourceGroup().id)}'
+  location: location
+  tags: tags
+  kind: 'web'
+  properties: {
+    Application_Type: 'web'
+    DisableIpMasking: false
+    DisableLocalAuth: false
+    Flow_Type: 'Redfield'
+    ForceCustomerStorageForProfiler: false
+    ImmediatePurgeDataOn30Days: false
+    publicNetworkAccessForIngestion: 'Enabled'
+    publicNetworkAccessForQuery: 'Enabled'
+    Request_Source: 'rest'
+  }
+}
+
+// Create Azure Machine Learning workspace
+resource workspace 'Microsoft.MachineLearningServices/workspaces@2024-10-01-preview' = {
+  name: workspaceName
+  location: location
+  tags: tags
+  identity: {
+    type: 'SystemAssigned'
+  }
+  properties: {
+    friendlyName: workspaceName
+    storageAccount: storageAccount.outputs.storageAccountID
+    keyVault: keyVault.outputs.keyVaultID
+    applicationInsights: appInsights.id
+    containerRegistry: containerRegistry.outputs.containerRegistryID
+    publicNetworkAccess: 'Enabled'
+    v1LegacyMode: false
+    systemDatastoresAuthMode: systemDatastoresAuthMode
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Role assignments - correct separation of concerns:
+// 1. Infrastructure modules handle external principal access to infrastructure resources
+// 2. Workspace handles its system-assigned identity access to infrastructure resources  
+// 3. Workspace handles external principal access to workspace itself
+// -----------------------------------------------------------------------------
+
+// Grant Key Vault Secrets User role to workspace system-assigned identity
+resource keyVaultSecretsUser 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: '4633458b-17de-408a-b874-0445c86b69e6'
+}
+
+resource keyVaultRef 'Microsoft.KeyVault/vaults@2023-07-01' existing = {
+  name: keyVaultName
+}
+
+resource workspaceKeyVaultRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(keyVaultName, workspace.id, keyVaultSecretsUser.id)
+  scope: keyVaultRef
+  properties: {
+    roleDefinitionId: keyVaultSecretsUser.id
+    principalId: workspace.identity.principalId
+    principalType: 'ServicePrincipal'
+  }
+}
+
+// Grant AcrPull role to workspace system-assigned identity
+resource acrPull 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: '7f951dda-4ed3-4680-a7ca-43fe172d538d'
+}
+
+resource containerRegistryRef 'Microsoft.ContainerRegistry/registries@2023-07-01' existing = {
+  name: containerRegistryName
+}
+
+resource workspaceContainerRegistryRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(containerRegistryName, workspace.id, acrPull.id)
+  scope: containerRegistryRef
+  properties: {
+    roleDefinitionId: acrPull.id
+    principalId: workspace.identity.principalId
+    principalType: 'ServicePrincipal'
+  }
+}
+
+// Grant Managed Identity Operator role to workspace system-assigned identity (scoped to resource group)
+resource managedIdentityOperator 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: 'f1a07417-d97a-45cb-824c-7a7467783830'
+}
+
+resource managedIdentityOperatorRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(resourceGroup().id, workspace.id, managedIdentityOperator.id)
+  scope: resourceGroup()
+  properties: {
+    roleDefinitionId: managedIdentityOperator.id
+    principalId: workspace.identity.principalId
+    principalType: 'ServicePrincipal'
+  }
+}
+
+// Grant AzureML Data Scientist role to external principals for workspace access
+resource azureMLDataScientist 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: 'f6c7c914-8db3-469d-8ca1-694a8f32e121'
+}
+
+resource workspaceAccessRoleAssignments 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(workspace.id, principal.id, azureMLDataScientist.id)
+    scope: workspace
+    properties: {
+      roleDefinitionId: azureMLDataScientist.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+
+
+// -----------------------------------------------------------------------------
+// Outputs
+// -----------------------------------------------------------------------------
+output workspaceId string = workspace.id
+output workspaceName string = workspace.name
+output workspaceLocation string = workspace.location
+
+// Application Insights outputs
+output appInsightsInstrumentationKey string = appInsights.properties.InstrumentationKey
+output appInsightsId string = appInsights.id
+
+// Storage account outputs
+output storageAccountId string = storageAccount.outputs.storageAccountID
+output storageAccountName string = storageAccount.outputs.storageAccountName
+output storageAccountBlobEndpoint string = storageAccount.outputs.storageAccountBlobEndpoint
+
+// Key vault outputs
+output keyVaultId string = keyVault.outputs.keyVaultID
+output keyVaultName string = keyVault.outputs.keyVaultName
+output keyVaultEndpoint string = keyVault.outputs.keyVaultEndpoint
+
+// Container registry outputs
+output containerRegistryId string = containerRegistry.outputs.containerRegistryID
+output containerRegistryName string = containerRegistry.outputs.containerRegistryName
+output containerRegistryLoginServer string = containerRegistry.outputs.containerRegistryLoginServer
diff --git a/deploy/shared/containerRegistry.bicep b/deploy/shared/containerRegistry.bicep
new file mode 100644
index 0000000..f37336f
--- /dev/null
+++ b/deploy/shared/containerRegistry.bicep
@@ -0,0 +1,54 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+@description('Specifies the name of the Azure Container Registry.')
+param containerRegistryName string
+
+@description('Specifies the location in which the Azure Container Registry should be deployed.')
+param location string
+
+param tags object = {}
+param grantAccessTo array = []
+param additionalIdentities array = []
+
+var access = [for i in range(0, length(additionalIdentities)): {
+  id: additionalIdentities[i]
+  type: 'ServicePrincipal'
+}]
+
+var grantAccessToUpdated = concat(grantAccessTo, access)
+
+resource acr 'Microsoft.ContainerRegistry/registries@2023-07-01' = {
+  name: containerRegistryName
+  location: location
+  tags: tags
+  sku: {
+    name: 'Standard'
+  }
+  properties: {
+    adminUserEnabled: false
+    publicNetworkAccess: 'Enabled'
+    networkRuleBypassOptions: 'AzureServices'
+    zoneRedundancy: 'Disabled'
+  }
+}
+
+resource acrPull 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: '7f951dda-4ed3-4680-a7ca-43fe172d538d' // AcrPull
+}
+
+resource acrPullAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, acr.id, acrPull.id)
+    scope: acr
+    properties: {
+      roleDefinitionId: acrPull.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+output containerRegistryID string = acr.id
+output containerRegistryName string = acr.name
+output containerRegistryLoginServer string = acr.properties.loginServer
diff --git a/deploy/shared/deployModel.bicep b/deploy/shared/deployModel.bicep
new file mode 100644
index 0000000..fb87375
--- /dev/null
+++ b/deploy/shared/deployModel.bicep
@@ -0,0 +1,77 @@
+targetScope = 'resourceGroup'
+
+// -----------------------------------------------------------------------------
+// Required parameters
+// -----------------------------------------------------------------------------
+@description('Azure ML workspace name')
+param workspaceName string
+
+@description('Azure region for deployment')
+param location string
+
+// -----------------------------------------------------------------------------
+// Optional parameters with defaults
+// -----------------------------------------------------------------------------
+@description('Tags to apply to resources')
+param tags object = {}
+
+@description('Comma-separated list of model names to include (filter)')
+param modelFilterString string = ''
+
+@description('Overrideable unique suffix to pass to submodule')
+param uniqueSuffix string = ''
+
+// -----------------------------------------------------------------------------
+// Variables - Model loading, filtering and unique suffix calculation
+// -----------------------------------------------------------------------------
+
+// Load models from YAML
+var models = loadYamlContent('models.yaml')
+
+// Calculate effective unique suffix
+var effectiveUniqueSuffix = empty(uniqueSuffix) ? substring(uniqueString(resourceGroup().id), 0, 6) : uniqueSuffix
+
+// Filter models based on modelFilterString
+var effectiveModelFilter = empty(modelFilterString) ? [] : split(modelFilterString, ',')
+var filteredModels = empty(effectiveModelFilter) ? models : filter(models, 
+  m => contains(array(map(effectiveModelFilter, f => toLower(f))), toLower(m.name)))
+
+// -----------------------------------------------------------------------------
+// Resources
+// -----------------------------------------------------------------------------
+
+// Loop through each filtered model and deploy+update traffic using the new module
+module model_deploy 'modelDeployWithTraffic.bicep' = [for model in filteredModels: {
+  name: 'deploy-with-traffic-${model.name}'
+  params: {
+    location: location
+    workspaceName: workspaceName
+    modelName: model.name
+    endpointName: ''
+    deploymentName: ''
+    modelId: model.deployment.modelId
+    instanceType: model.deployment.instanceType
+    instanceCount: model.deployment.instanceCount
+    tags: tags
+    requestSettings: model.deployment.requestSettings
+    livenessProbe: model.deployment.livenessProbe
+    environmentVariables: {}
+    uniqueSuffix: effectiveUniqueSuffix
+  }
+}]
+
+// -----------------------------------------------------------------------------
+// Outputs
+// -----------------------------------------------------------------------------
+output endpoints array = [for (model, i) in filteredModels: {
+  name: model.name
+  env_name: model.env_name
+  endpointName: model_deploy[i].outputs.endpointName
+  id: model_deploy[i].outputs.endpointId
+  scoringUri: model_deploy[i].outputs.scoringUri
+  deploymentName: model_deploy[i].outputs.deploymentName
+}]
+
+output endpointSuffix string = effectiveUniqueSuffix
+
+
diff --git a/deploy/shared/gptDeployment.bicep b/deploy/shared/gptDeployment.bicep
new file mode 100644
index 0000000..31d258e
--- /dev/null
+++ b/deploy/shared/gptDeployment.bicep
@@ -0,0 +1,54 @@
+// GPT Model Deployment module
+// This module deploys GPT models to an existing AI Services account
+
+targetScope = 'resourceGroup'
+
+@description('Name of the AI Services account')
+param aiServicesName string
+
+@description('GPT model name and version (e.g., "gpt-4o;2024-08-06")')
+param gptModel string
+
+@description('Tokens per minute capacity for the model (in thousands)')
+param gptModelCapacity int = 100
+
+@description('SKU name for the deployment')
+@allowed(['GlobalStandard', 'Standard'])
+param skuName string = 'GlobalStandard'
+
+@description('Tags to apply to all resources')
+param tags object = {}
+
+// Extract model name and version from the gptModel parameter
+var modelParts = split(gptModel, ';')
+var modelName = modelParts[0]
+var modelVersion = length(modelParts) > 1 ? modelParts[1] : '2024-08-06'
+
+// Reference the existing AI Services account
+resource aiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' existing = {
+  name: aiServicesName
+}
+
+// Deploy the GPT model if gptModel is not empty
+resource gptDeployment 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = if (!empty(gptModel)) {
+  parent: aiServices
+  name: modelName
+  properties: {
+    model: {
+      format: 'OpenAI'
+      name: modelName
+      version: modelVersion
+    }
+  }
+  sku: {
+    name: skuName
+    capacity: gptModelCapacity
+  }
+  tags: tags
+}
+
+// Outputs
+output deploymentName string = !empty(gptModel) ? gptDeployment.name : ''
+output modelName string = !empty(gptModel) ? modelName : ''
+output modelVersion string = !empty(gptModel) ? modelVersion : ''
+output endpoint string = aiServices.properties.endpoint
diff --git a/deploy/shared/keyVault.bicep b/deploy/shared/keyVault.bicep
new file mode 100644
index 0000000..799d30e
--- /dev/null
+++ b/deploy/shared/keyVault.bicep
@@ -0,0 +1,69 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+@description('Azure region for deployment')
+param location string
+
+@description('Name of the Key Vault')
+param keyVaultName string
+
+@description('Tags to apply to all resources')
+param tags object = {}
+
+@description('List of principals to grant access to')
+param grantAccessTo array
+
+@description('Additional managed identities to assign access to')
+param additionalIdentities array = []
+
+var access = [for i in range(0, length(additionalIdentities)): {
+  id: additionalIdentities[i]
+  type: 'ServicePrincipal'
+}]
+
+var grantAccessToUpdated = concat(grantAccessTo, access)
+
+resource keyVault 'Microsoft.KeyVault/vaults@2023-07-01' = {
+  name: keyVaultName
+  location: location
+  tags: tags
+  properties: {
+    createMode: 'default'
+    enabledForDeployment: false
+    enabledForDiskEncryption: false
+    enabledForTemplateDeployment: false
+    enableSoftDelete: true
+    enableRbacAuthorization: true
+    publicNetworkAccess: 'Enabled'
+    networkAcls: {
+      bypass: 'AzureServices'
+      defaultAction: 'Allow' 
+    }
+    sku: {
+      family: 'A'
+      name: 'standard'
+    }
+    tenantId: subscription().tenantId
+  }
+}
+
+resource secretsOfficer 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: 'b86a8fe4-44ce-4948-aee5-eccb2c155cd7'
+}
+
+resource secretsOfficerAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, keyVault.id, secretsOfficer.id)
+    scope: keyVault
+    properties: {
+      roleDefinitionId: secretsOfficer.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+
+output keyVaultID string = keyVault.id
+output keyVaultName string = keyVault.name
+output keyVaultEndpoint string = keyVault.properties.vaultUri
diff --git a/deploy/shared/mlModelEndpoint.bicep b/deploy/shared/mlModelEndpoint.bicep
new file mode 100644
index 0000000..c5e28b6
--- /dev/null
+++ b/deploy/shared/mlModelEndpoint.bicep
@@ -0,0 +1,142 @@
+targetScope = 'resourceGroup'
+
+// -----------------------------------------------------------------------------
+// Required parameters
+// -----------------------------------------------------------------------------
+@description('Azure ML workspace name')
+param workspaceName string
+
+@description('Model name in Azure ML registry')
+param modelName string
+
+@description('Unique endpoint name')
+param endpointName string = ''
+
+@description('Deployment name')
+param deploymentName string = ''
+
+@description('Azure region for deployment')
+param location string
+
+@description('Full AzureML model ID URI (e.g. azureml://registries/azureml/models/ModelName/versions/Version)')
+param modelId string
+
+// -----------------------------------------------------------------------------
+// Optional parameters with defaults
+// -----------------------------------------------------------------------------
+@description('VM SKU for the deployment')
+param instanceType string = 'Standard_NC4as_T4_v3'
+
+@description('Number of instances to deploy')
+param instanceCount int = 1
+
+@description('Maximum concurrent requests per instance')
+param maxConcurrentRequestsPerInstance int = 1
+
+@description('Tags to apply to resources')
+param tags object = {
+  Repo: 'microsoft/healthcareai-examples-pr'
+  Environment: 'azd'
+  DeployedBy: 'azd'
+}
+
+@description('Overrideable unique suffix (provided by root through effective computation)')
+param uniqueSuffix string = ''
+
+// Set endpointName to the provided value, or fallback to toLower(modelName)-suffix if not provided
+var effectiveEndpointName = !empty(endpointName) ? endpointName : toLower(format('{0}-{1}', modelName, uniqueSuffix))
+
+var modelIdParts = split(modelId, '/')
+var parsedModelName = modelIdParts[5]
+var parsedModelVersion = modelIdParts[7]
+var effectiveDeploymentName = !empty(deploymentName) ? deploymentName : toLower(format('{0}-v{1}', parsedModelName, parsedModelVersion))
+
+@description('Request settings for the deployment (object with keys like requestTimeout, maxConcurrentRequestsPerInstance, etc.)')
+param requestSettings object = {}
+var defaultRequestSettings = {
+  requestTimeout: 'PT1M30S'
+  maxConcurrentRequestsPerInstance: maxConcurrentRequestsPerInstance
+}
+var effectiveRequestSettings = union(defaultRequestSettings, requestSettings)
+
+@description('Liveness probe settings for the deployment (object with keys like initialDelay, etc.)')
+param livenessProbe object = {}
+var defaultLivenessProbe = {
+  initialDelay: 'PT10M'
+}
+var effectiveLivenessProbe = union(defaultLivenessProbe, livenessProbe)
+
+// WORKER_COUNT: use environmentVariables.WORKER_COUNT if present, otherwise default to requestSettings_maxConcurrentRequestsPerInstance
+@description('Environment variables for the deployment (object, e.g. { WORKER_COUNT: 3 })')
+param environmentVariables object = {}
+var defaultEnvironmentVariables = {
+  WORKER_COUNT: string(effectiveRequestSettings.maxConcurrentRequestsPerInstance)
+}
+var effectiveEnvironmentVariables = union(defaultEnvironmentVariables, environmentVariables)
+
+param setTraffic bool = false
+var effectiveTraffic = setTraffic ? {'${effectiveDeploymentName}': 100} : null
+
+// -----------------------------------------------------------------------------
+// Resources
+// -----------------------------------------------------------------------------
+
+// Reference to existing Azure ML workspace
+resource workspace 'Microsoft.MachineLearningServices/workspaces@2024-10-01' existing = {
+  name: workspaceName
+}
+
+// Create the online endpoint
+resource onlineEndpoint 'Microsoft.MachineLearningServices/workspaces/onlineEndpoints@2024-10-01' = {
+  parent: workspace
+  name: effectiveEndpointName
+  location: location
+  identity: {
+    type: 'SystemAssigned'
+  }
+  properties: {
+    authMode: 'key'
+    publicNetworkAccess: 'Enabled'
+    traffic: effectiveTraffic
+  }
+  tags: union(tags, {
+    Model: modelName
+  })
+}
+
+// Create the model deployment
+resource modelDeployment 'Microsoft.MachineLearningServices/workspaces/onlineEndpoints/deployments@2024-10-01' = {
+  parent: onlineEndpoint
+  name: effectiveDeploymentName
+  location: location
+  sku: {
+    name: 'Default'
+    tier: 'Standard'
+    capacity: instanceCount
+  }
+  properties: {
+    endpointComputeType: 'Managed'
+    model: modelId
+    instanceType: instanceType
+    scaleSettings: {
+      scaleType: 'Default'
+    }
+    requestSettings: effectiveRequestSettings
+    environmentVariables: effectiveEnvironmentVariables
+    livenessProbe: effectiveLivenessProbe
+    appInsightsEnabled: true
+
+  }
+  tags: union(tags, {
+    Model: modelName
+  })
+}
+
+
+// -----------------------------------------------------------------------------
+// Outputs
+// -----------------------------------------------------------------------------
+output endpointName string = onlineEndpoint.name
+output deploymentName string = modelDeployment.name
+output scoringUri string = onlineEndpoint.properties.scoringUri
+output endpointId string = onlineEndpoint.id
diff --git a/deploy/shared/modelDeployWithTraffic.bicep b/deploy/shared/modelDeployWithTraffic.bicep
new file mode 100644
index 0000000..c40b428
--- /dev/null
+++ b/deploy/shared/modelDeployWithTraffic.bicep
@@ -0,0 +1,76 @@
+targetScope = 'resourceGroup'
+
+@description('Azure ML workspace name')
+param workspaceName string
+@description('Azure region for deployment')
+param location string
+@description('Model name in Azure ML registry')
+param modelName string
+@description('Unique endpoint name')
+param endpointName string = ''
+@description('Deployment name')
+param deploymentName string = ''
+@description('Full AzureML model ID URI (e.g. azureml://registries/azureml/models/ModelName/versions/Version)')
+param modelId string
+@description('VM SKU for the deployment')
+param instanceType string = 'Standard_NC4as_T4_v3'
+@description('Number of instances to deploy')
+param instanceCount int = 1
+@description('Tags to apply to resources')
+param tags object = {}
+@description('Request settings for the deployment (object)')
+param requestSettings object = {}
+@description('Liveness probe settings for the deployment (object)')
+param livenessProbe object = {}
+@description('Environment variables for the deployment (object)')
+param environmentVariables object = {}
+@description('Overrideable unique suffix')
+param uniqueSuffix string = ''
+
+// Deploy endpoint and deployment (no traffic update)
+module deploy 'mlModelEndpoint.bicep' = {
+  name: 'deploy-${modelName}'
+  params: {
+    location: location
+    workspaceName: workspaceName
+    modelName: modelName
+    endpointName: endpointName
+    deploymentName: deploymentName
+    modelId: modelId
+    instanceType: instanceType
+    instanceCount: instanceCount
+    tags: tags
+    requestSettings: requestSettings
+    livenessProbe: livenessProbe
+    environmentVariables: environmentVariables
+    uniqueSuffix: uniqueSuffix
+    setTraffic: false
+  }
+}
+
+// Update traffic to 100% for this deployment
+module updateTraffic 'mlModelEndpoint.bicep' = {
+  name: 'update-traffic-${modelName}'
+  params: {
+    location: location
+    workspaceName: workspaceName
+    modelName: modelName
+    endpointName: endpointName
+    deploymentName: deploymentName
+    modelId: modelId
+    instanceType: instanceType
+    instanceCount: instanceCount
+    tags: tags
+    requestSettings: requestSettings
+    livenessProbe: livenessProbe
+    environmentVariables: environmentVariables
+    uniqueSuffix: uniqueSuffix
+    setTraffic: true
+  }
+  dependsOn: [deploy]
+}
+
+output endpointName string = deploy.outputs.endpointName
+output deploymentName string = deploy.outputs.deploymentName
+output scoringUri string = deploy.outputs.scoringUri
+output endpointId string = deploy.outputs.endpointId
diff --git a/deploy/shared/models.yaml b/deploy/shared/models.yaml
new file mode 100644
index 0000000..ff752cf
--- /dev/null
+++ b/deploy/shared/models.yaml
@@ -0,0 +1,44 @@
+- name: MedImageInsight
+  env_name: MI2_MODEL_ENDPOINT
+  deployment:
+    modelId: "azureml://registries/azureml/models/MedImageInsight/versions/10"
+    instanceType: Standard_NC4as_T4_v3
+    instanceCount: 2
+    requestSettings:
+      maxConcurrentRequestsPerInstance: 3
+      requestTimeout: PT1M30S
+    livenessProbe:
+      initialDelay: PT10M
+- name: MedImageParse
+  env_name: MIP_MODEL_ENDPOINT
+  deployment:
+    modelId: "azureml://registries/azureml/models/MedImageParse/versions/10"
+    instanceType: Standard_NC40ads_H100_v5
+    instanceCount: 1
+    requestSettings:
+      maxConcurrentRequestsPerInstance: 8
+      requestTimeout: PT1M30S
+    livenessProbe:
+      initialDelay: PT10M
+- name: CXRReportGen
+  env_name: CXRREPORTGEN_MODEL_ENDPOINT
+  deployment:
+    modelId: "azureml://registries/azureml/models/CxrReportGen/versions/6"
+    instanceType: Standard_NC40ads_H100_v5
+    instanceCount: 1
+    requestSettings:
+      maxConcurrentRequestsPerInstance: 1
+      requestTimeout: PT1M30S
+    livenessProbe:
+      initialDelay: PT20M
+- name: Prov-GigaPath
+  env_name: GIGAPATH_MODEL_ENDPOINT
+  deployment:
+    modelId: "azureml://registries/azureml/models/Prov-GigaPath/versions/2"
+    instanceType: Standard_NC6s_v3
+    instanceCount: 1
+    requestSettings:
+      maxConcurrentRequestsPerInstance: 1
+      requestTimeout: PT1M30S
+    livenessProbe:
+      initialDelay: PT10M
\ No newline at end of file
diff --git a/deploy/shared/scripts/cleanup.py b/deploy/shared/scripts/cleanup.py
new file mode 100644
index 0000000..9a7f54c
--- /dev/null
+++ b/deploy/shared/scripts/cleanup.py
@@ -0,0 +1,539 @@
+#!/usr/bin/env python3
+"""
+Simple cleanup script for Azure resources created by azd deployments.
+
+This handles the limitation that 'azd down' doesn't work for existing workspace deployments.
+"""
+
+import argparse
+import asyncio
+import sys
+from collections import defaultdict
+from functools import wraps
+from itertools import groupby
+from typing import Dict, List, Set, Tuple
+
+from azure.identity import DefaultAzureCredential
+from azure.mgmt.resource import ResourceManagementClient
+from azure.ai.ml.entities import OnlineEndpoint
+
+try:
+    from azure.ai.ml import MLClient
+except (ImportError, ModuleNotFoundError) as e:
+    print(
+        "Error: azure.ai.ml (AzureML SDK v2) is not installed.\n"
+        "If you are on an AzureML VM, the 'azureml_py310_sdkv2' environment is recommended.\n"
+        "If not, you can install the SDK with: pip install azure-ai-ml"
+    )
+    raise e
+
+try:
+    from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient
+except (ImportError, ModuleNotFoundError) as e:
+    print(
+        "Warning: Azure Cognitive Services management SDK not installed.\n"
+        "Some purge operations may not be available.\n"
+        "Install with: pip install azure-mgmt-cognitiveservices"
+    )
+
+from utils import (
+    load_azd_env_vars,
+    detect_deployment_type,
+    GREEN,
+    YELLOW,
+    RED,
+    BLUE,
+    CYAN,
+    BOLD,
+    END,
+)
+
+
+def async_wrap(func):
+    """Decorator to automatically wrap blocking functions with asyncio.to_thread."""
+
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        return await asyncio.to_thread(func, *args, **kwargs)
+
+    return wrapper
+
+
+def get_deletion_order(resource: Dict) -> int:
+    """Return the deletion order for a resource. Lower numbers are deleted first."""
+    resource_type = resource.type.lower()
+
+    # Order 0: ML deployments (must be deleted before endpoints)
+    if "onlineendpoints/deployments" in resource_type:
+        return 0
+
+    # Order 1: ML endpoints (must be deleted before workspaces)
+    if "onlineendpoints" in resource_type:
+        return 1
+
+    # Order 2: Everything else (workspaces, storage, etc.)
+    return 2
+
+
+async def delete_resources_in_order(
+    resource_client: ResourceManagementClient,
+    resources: List[Dict],
+    ml_client: MLClient = None,
+    purge: bool = False,
+    credential=None,
+    subscription_id: str = None,
+    resource_group: str = None,
+):
+    """Delete resources in dependency order."""
+    if not resources:
+        return
+
+    # Sort resources by deletion order, then group by order
+    sorted_resources = sorted(resources, key=get_deletion_order)
+
+    print(f"\nDeleting {len(resources)} resources in dependency order...")
+
+    total_successful = 0
+    total_failed = 0
+
+    # Group resources by deletion order using groupby
+    for order, group in groupby(sorted_resources, key=get_deletion_order):
+        group_resources = list(group)
+        print(f"\nGroup {order}:")
+
+        # Run all resources in this order group concurrently
+        deletion_tasks = [
+            delete_resource_async(
+                resource_client,
+                resource,
+                ml_client,
+                purge,
+                credential,
+                subscription_id,
+                resource_group,
+            )
+            for resource in group_resources
+        ]
+
+        # Wait for all deletions in this group to complete
+        results = await asyncio.gather(*deletion_tasks, return_exceptions=True)
+
+        # Count successes and failures
+        group_successful = sum(1 for result in results if result is True)
+        group_failed = len(results) - group_successful
+
+        total_successful += group_successful
+        total_failed += group_failed
+
+        print(
+            f"  Group {order} completed: {len(group_resources)} resources ({group_successful} successful, {group_failed} failed)"
+        )
+
+    print(
+        f"\nOverall deletion summary: {total_successful} successful, {total_failed} failed"
+    )
+
+
+def find_azd_resources(
+    resource_client: ResourceManagementClient, resource_group: str, env_name: str
+) -> List[Dict]:
+    """Find all resources tagged with azd-env-name in the specific resource group."""
+    try:
+        resources = list(
+            resource_client.resources.list_by_resource_group(
+                resource_group,
+                filter=f"tagName eq 'azd-env-name' and tagValue eq '{env_name}'",
+            )
+        )
+        return resources
+    except Exception as e:
+        print(f"Error finding resources in resource group '{resource_group}': {e}")
+        return []
+
+
+def group_resources_by_relationships(azd_resources: List[Dict]) -> Dict:
+    """Group resources by their logical relationships and types using resource ID tree structure."""
+    # Use defaultdict to automatically create lists for each resource type
+    resources = defaultdict(list)
+
+    # Create mapping from endpoint ID to its deployments
+    endpoint_id_to_deployments = {}
+
+    for resource in azd_resources:
+        resource_type = resource.type.lower()
+
+        if "onlineendpoints/deployments" in resource_type:
+            resources["ml_deployments"].append(resource)
+            # Extract parent endpoint ID from deployment resource ID
+            # Deployment ID: /subscriptions/.../workspaces/ws/onlineEndpoints/ep/deployments/dep
+            # Parent endpoint ID: /subscriptions/.../workspaces/ws/onlineEndpoints/ep
+            endpoint_id = resource.id.rsplit("/deployments/", 1)[0]
+
+            if endpoint_id not in endpoint_id_to_deployments:
+                endpoint_id_to_deployments[endpoint_id] = []
+            endpoint_id_to_deployments[endpoint_id].append(resource)
+
+        elif "onlineendpoints" in resource_type:
+            resources["ml_endpoints"].append(resource)
+        elif (
+            "microsoft.machinelearningservices/workspaces" in resource_type
+            or "microsoft.cognitiveservices" in resource_type
+        ):
+            resources["ai_services"].append(resource)
+
+        else:
+            resources["infrastructure"].append(resource)
+
+    return resources, endpoint_id_to_deployments
+
+
+def print_resources(azd_resources: List[Dict], delete_all: bool = False) -> List[Dict]:
+    """Print summary of resources organized by relationships, showing what will be deleted.
+
+    Returns the list of resources that will actually be deleted.
+    """
+    print(f"\n{BOLD}{'=' * 60}{END}")
+    print(f"{BOLD}{BLUE}Discovered Resources{END}")
+    print(f"{BOLD}{'=' * 60}{END}")
+
+    if not azd_resources:
+        print(f"{YELLOW}No azd-tagged resources found.{END}")
+        return []
+
+    total_infrastructure = len(azd_resources)
+    grouped, deps = group_resources_by_relationships(azd_resources)
+
+    # ML Endpoints & Deployments section
+    if grouped["ml_endpoints"] or grouped["ml_deployments"]:
+        print(f"\n{CYAN}ML Endpoints & Deployments:{END}")
+
+        # Show endpoints with their deployments using ID-based mapping
+        for endpoint in grouped["ml_endpoints"]:
+            endpoint_id = endpoint.id
+            deployments = deps.get(endpoint_id, [])
+
+            endpoint_delete_marker = f" {RED}✓ WILL DELETE{END}" if delete_all else ""
+            print(
+                f"  └── {BLUE}{endpoint.name}{END} ({endpoint.type}){endpoint_delete_marker}"
+            )
+            for deployment in deployments:
+                print(
+                    f"      ├── {deployment.name} ({deployment.type}) {RED}✓ WILL DELETE{END}"
+                )
+
+        # Show any orphaned deployments (endpoints not found)
+        all_endpoint_ids = {ep.id for ep in grouped["ml_endpoints"]}
+        for endpoint_id, deployments in deps.items():
+            if endpoint_id not in all_endpoint_ids:
+                # Extract endpoint name from ID for display
+                endpoint_name = endpoint_id.split("/")[-1]
+                print(f"  └── {YELLOW}{endpoint_name} (endpoint not found){END}")
+                for deployment in deployments:
+                    print(
+                        f"      ├── {deployment.name} ({deployment.type}) {RED}✓ WILL DELETE{END}"
+                    )
+
+    # AI Workspaces section
+    if grouped["ai_services"]:
+        print(f"\n{CYAN}AI Services:{END}")
+        for workspace in grouped["ai_services"]:
+            delete_marker = f" {RED}✓ WILL DELETE{END}" if delete_all else ""
+            print(
+                f"  ├── {BLUE}{workspace.name}{END} ({workspace.type}){delete_marker}"
+            )
+
+    # Infrastructure section
+    if grouped["infrastructure"]:
+        print(f"\n{CYAN}Infrastructure:{END}")
+        for resource in grouped["infrastructure"]:
+            delete_marker = f" {RED}✓ WILL DELETE{END}" if delete_all else ""
+            print(f"  ├── {BLUE}{resource.name}{END} ({resource.type}){delete_marker}")
+
+    # Build list of resources that will actually be deleted
+    resources_to_delete = []
+
+    # Always delete deployments
+    resources_to_delete.extend(grouped["ml_deployments"])
+
+    # Delete other resources only if --all flag is used
+    if delete_all:
+        resources_to_delete.extend(grouped["ml_endpoints"])
+        resources_to_delete.extend(grouped["ai_services"])
+        resources_to_delete.extend(grouped["infrastructure"])
+
+    # Deletion summary
+    print(f"\n{BOLD}{'=' * 60}{END}")
+    print(f"\n{BOLD}Deletion Summary:{END}")
+    print(f"{BOLD}{'=' * 60}{END}")
+    if resources_to_delete:
+        print(f"{RED}✓ Resources to DELETE ({len(resources_to_delete)}):{END}")
+        for resource in resources_to_delete:  # Show first 5
+            print(f"  - {resource.name}")
+    else:
+        print(f"{GREEN}✓ No resources will be deleted{END}")
+
+    if not delete_all and (
+        grouped["ml_endpoints"] or grouped["ai_services"] or grouped["infrastructure"]
+    ):
+        print(
+            f"\n{YELLOW}Note: Use --all flag to delete infrastructure resources ({total_infrastructure} resources){END}"
+        )
+
+    print(f"{BOLD}{'=' * 60}{END}")
+
+    return resources_to_delete
+
+
+async def delete_resource_async(
+    resource_client: ResourceManagementClient,
+    resource: Dict,
+    ml_client: MLClient = None,
+    purge: bool = False,
+    credential=None,
+    subscription_id: str = None,
+    resource_group: str = None,
+):
+    """Delete a single resource asynchronously."""
+    try:
+        print(f"  Deleting {resource.name} ({resource.type})...")
+
+        if "onlineendpoints/deployments" in resource.type.lower() and ml_client:
+            await delete_ml_deployment(ml_client, resource.id)
+        elif "onlineendpoints" in resource.type.lower() and ml_client:
+            await delete_ml_endpoint(ml_client, resource.id)
+        elif "microsoft.cognitiveservices/accounts" in resource.type.lower():
+            await delete_cognitive_services(
+                resource_client,
+                resource,
+                credential,
+                subscription_id,
+                resource_group,
+                purge,
+            )
+        else:
+            print(f"  Warning: Unsupported resource type {resource.type} - skipping")
+            return False
+
+        print(f"  ✓ Deleted {resource.name}")
+        return True
+    except Exception as e:
+        print(f"  ✗ Failed to delete {resource.name}: {e}")
+        return False
+
+
+def remove_deployment_traffic(
+    ml_client: MLClient, endpoint_name: str, deployment_name: str
+):
+    """Set deployment traffic to 0 before deletion."""
+    try:
+        # Get endpoint, set traffic to 0, update endpoint
+        endpoint = ml_client.online_endpoints.get(endpoint_name)
+        endpoint.traffic[deployment_name] = 0
+        endpoint.identity.principal_id = None
+        endpoint.identity.tenant_id = None
+        ml_client.online_endpoints.begin_create_or_update(endpoint).wait()
+        print(f"      ✓ Traffic set to 0 for {deployment_name}")
+
+    except Exception as e:
+        print(f"      Warning: Could not remove traffic from {deployment_name}: {e}")
+        print(f"      Attempting deletion anyway...")
+
+
+@async_wrap
+def delete_ml_deployment(ml_client: MLClient, resource_id: str):
+    """Delete an Azure ML endpoint deployment."""
+    # Parse endpoint and deployment names from resource path
+    path_parts = resource_id.split("/")
+    endpoint_name = path_parts[path_parts.index("onlineEndpoints") + 1]
+    deployment_name = path_parts[path_parts.index("deployments") + 1]
+
+    # Remove traffic first, then delete
+    remove_deployment_traffic(ml_client, endpoint_name, deployment_name)
+    ml_client.online_deployments.begin_delete(
+        name=deployment_name, endpoint_name=endpoint_name
+    ).wait()
+
+
+@async_wrap
+def delete_ml_endpoint(ml_client: MLClient, resource_id: str):
+    """Delete an Azure ML online endpoint."""
+    # Parse endpoint name from resource path
+    path_parts = resource_id.split("/")
+    endpoint_name = path_parts[path_parts.index("onlineEndpoints") + 1]
+    ml_client.online_endpoints.begin_delete(name=endpoint_name).wait()
+
+
+def delete_resource_by_id(
+    resource_client: ResourceManagementClient, resource_id: str, api_version: str
+):
+    """Delete resource by ID - wrapped to be async."""
+    return resource_client.resources.begin_delete_by_id(
+        resource_id, api_version=api_version
+    ).wait()
+
+
+@async_wrap
+def delete_cognitive_services(
+    resource_client: ResourceManagementClient,
+    resource: Dict,
+    credential,
+    subscription_id: str,
+    resource_group: str,
+    purge: bool = False,
+):
+    """Delete a Cognitive Services account and optionally purge it."""
+    api_version = "2024-10-01"  # Latest stable API version for Cognitive Services
+    delete_resource_by_id(resource_client, resource.id, api_version)
+
+    if purge:
+        try:
+            from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient
+
+            cs_client = CognitiveServicesManagementClient(credential, subscription_id)
+            # Extract account name and location from resource
+            account_name = resource.name
+            location = resource.location
+            print(f"    Purging Cognitive Services account: {account_name}")
+            cs_client.deleted_accounts.begin_purge(
+                location, resource_group, account_name
+            ).wait()
+            print(f"    ✓ Purged Cognitive Services account: {account_name}")
+        except Exception as e:
+            print(
+                f"    Warning: Could not purge Cognitive Services account {resource.name}: {e}"
+            )
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Clean up azd deployment resources")
+    parser.add_argument("-y", "--yes", action="store_true", help="Skip confirmation")
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Delete all azd-tagged resources (default: only model deployments)",
+    )
+    parser.add_argument(
+        "--purge",
+        action="store_true",
+        help="Purge Cognitive Services immediately after deletion",
+    )
+    args = parser.parse_args()
+
+    # Prevent deleting all resources for 'fresh' deployments
+    if args.all:
+        deployment_type = detect_deployment_type()
+        if deployment_type == "fresh":
+            print(
+                "Error: For fresh deployments, use the 'azd down' command instead of this cleanup script."
+            )
+            sys.exit(1)
+
+    return asyncio.run(async_main(args))
+
+
+async def async_main(args):
+    try:
+        # Detect deployment type
+        deployment_type = detect_deployment_type()
+
+        # Load azd environment
+        print("Loading azd environment...")
+        config = load_azd_env_vars()
+
+        env_name = config.get("AZURE_ENV_NAME")
+        subscription_id = config.get("AZURE_SUBSCRIPTION_ID")
+        resource_group = config.get("AZURE_RESOURCE_GROUP")
+        workspace_name = config.get("AZUREML_WORKSPACE_NAME")
+        location = config.get("AZURE_LOCATION", "eastus")  # Default fallback
+
+        if not all([env_name, subscription_id, resource_group]):
+            print("Error: Missing required azd environment variables")
+            print(
+                "Required: AZURE_ENV_NAME, AZURE_SUBSCRIPTION_ID, AZURE_RESOURCE_GROUP"
+            )
+            return 1
+
+        print(f"Environment: {env_name}")
+        print(f"Subscription: {subscription_id}")
+        print(f"Resource Group: {resource_group}")
+        print(f"Location: {location}")
+        print(f"Deployment Type: {deployment_type}")
+
+        if not args.all:
+            print("\nMode: Delete model deployments only (they charge per hour)")
+            print("Use --all flag to also delete infrastructure resources")
+        else:
+            print("\nMode: Delete ALL azd-tagged resources")
+
+        if args.purge:
+            print("Purge mode: Will also purge soft-deleted resources")
+
+        # Initialize clients
+        credential = DefaultAzureCredential()
+        resource_client = ResourceManagementClient(credential, subscription_id)
+
+        # Initialize ML client if workspace exists
+        ml_client = None
+        if workspace_name:
+            try:
+                ml_client = MLClient(
+                    credential=credential,
+                    subscription_id=subscription_id,
+                    resource_group_name=resource_group,
+                    workspace_name=workspace_name,
+                )
+                print(f"Initialized ML client for workspace: {workspace_name}")
+            except Exception as e:
+                print(f"Warning: Could not initialize ML client: {e}")
+                print("Will fall back to Resource Management API for all resources")
+
+        # Find all azd-tagged resources in the specific resource group
+        print(
+            f"\nScanning for resources tagged with azd-env-name='{env_name}' in '{resource_group}'..."
+        )
+        azd_resources = find_azd_resources(resource_client, resource_group, env_name)
+
+        # Show what we found and what will be deleted
+        resources_to_delete = print_resources(azd_resources, args.all)
+
+        if not resources_to_delete:
+            print("\nNo resources found to delete.")
+            return 0
+
+        # Confirm deletion
+        if not args.yes:
+            purge_text = " (with purging of Cognitive Services)" if args.purge else ""
+            response = input(
+                f"\nDelete {len(resources_to_delete)} resources{purge_text}? [y/N]: "
+            )
+            if response.lower() != "y":
+                print("Cancelled.")
+                return 0
+
+        # Delete resources
+        print(f"\nStarting deletion of {len(resources_to_delete)} resources...")
+
+        await delete_resources_in_order(
+            resource_client,
+            resources_to_delete,
+            ml_client,
+            args.purge,
+            credential,
+            subscription_id,
+            resource_group,
+        )
+
+        print("\n✓ Cleanup completed!")
+
+        return 0
+
+    except KeyboardInterrupt:
+        print("\nCancelled by user.")
+        return 1
+    except Exception as e:
+        print(f"Error: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/deploy/shared/scripts/postprovision.py b/deploy/shared/scripts/postprovision.py
new file mode 100644
index 0000000..a07e215
--- /dev/null
+++ b/deploy/shared/scripts/postprovision.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+import os
+import sys
+import json
+import re
+import subprocess
+from pathlib import Path
+import traceback
+from utils import (
+    load_azd_env_vars,
+    parse_endpoints,
+    get_openai_api_key,
+    REPO_ENV_FILE,
+    REPO_EXAMPLE_ENV_FILE,
+)
+
+YELLOW = "\033[33m"
+RESET = "\033[0m"
+
+
+def load_and_backup_env():
+    """Backup existing .env (if any) and load base lines"""
+    root_env = REPO_ENV_FILE
+    if root_env.exists():
+        backup_env = root_env.with_suffix(".bak")
+        backup_env.write_bytes(root_env.read_bytes())
+        print(f"Backed up existing .env to {backup_env}")
+        base_file = root_env
+    else:
+        print(f"No existing .env found; using example at {REPO_EXAMPLE_ENV_FILE}")
+        base_file = REPO_EXAMPLE_ENV_FILE
+    lines = base_file.read_text().splitlines(True)
+    return root_env, lines
+
+
+def gather_env_values(env_vars):
+    """Construct a dict of values to update in .env"""
+
+    endpoints_str = env_vars.get("HLS_MODEL_ENDPOINTS")
+    if not endpoints_str:
+        raise RuntimeError("No endpoints found in AZD env; skipping .env update.")
+    endpoints = parse_endpoints(endpoints_str)
+    print(f"Parsed {len(endpoints)} endpoint(s) for update.")
+
+    new_values = {}
+    for ep in endpoints:
+        name = ep.get("env_name")
+        val = ep.get("id")
+        if name and val:
+            new_values[name] = val
+
+    # Add standard AZD variables
+    for key in (
+        "AZURE_SUBSCRIPTION_ID",
+        "AZURE_RESOURCE_GROUP",
+        "AZUREML_WORKSPACE_NAME",
+    ):
+        v = env_vars.get(key)
+        if v:
+            new_values[key] = v
+
+    # Add OpenAI variables if GPT deployment exists
+    openai_endpoint = env_vars.get("AZURE_OPENAI_ENDPOINT")
+    if openai_endpoint:
+        new_values["AZURE_OPENAI_ENDPOINT"] = openai_endpoint
+        new_values["AZURE_OPENAI_MODEL_NAME"] = env_vars.get("AZURE_OPENAI_MODEL_NAME")
+        print(f"Found OpenAI endpoint: {openai_endpoint}")
+
+        # Get AI Services name directly from deployment outputs
+        ai_services_name = env_vars.get("AZURE_AI_SERVICES_NAME")
+        rg_name = env_vars.get("AZURE_RESOURCE_GROUP", "")
+
+        if ai_services_name and rg_name:
+            print(f"Retrieving API key for AI Services: {ai_services_name}")
+
+            try:
+                api_key = get_openai_api_key(ai_services_name, rg_name)
+                if api_key:
+                    new_values["AZURE_OPENAI_API_KEY"] = api_key
+                    print("Successfully retrieved OpenAI API key")
+                else:
+                    print("Warning: API key retrieval returned empty result")
+
+            except Exception as e:
+                print(f"Warning: Failed to retrieve OpenAI API key: {e}")
+                print(
+                    "You may need to retrieve this manually using: az cognitiveservices account keys list"
+                )
+        else:
+            print(
+                "Warning: AI Services name or resource group not found in deployment outputs"
+            )
+
+    return new_values
+
+
+def merge_env_lines(lines, new_values):
+    """Merge existing env lines with new values"""
+    out = []
+    seen = set()
+    updates = []
+    for line_no, line in enumerate(lines, 1):
+        if "=" in line and not line.strip().startswith("#"):
+            k, old_value = line.split("=", 1)
+            key = k.strip()
+            if key in new_values:
+                seen.add(key)
+                new_value = f'"{new_values[key]}"'
+                if new_value == old_value:
+                    continue
+                new_line = f"{key}={new_value}\n"
+                out.append(new_line)
+
+                updates.append(f"- {line.strip()}")
+                updates.append(f"+ {new_line}")
+                continue
+        out.append(line)
+    out.append("\n\n")
+    for key, val in new_values.items():
+        if key not in seen:
+            new_value = f'"{new_values[key]}"'
+            new_line = f"{key}={new_value}\n"
+            out.append(new_line)
+            updates.append(f"+ {new_line}")
+    return out, updates
+
+
+def write_env(root_env, lines):
+    """Write out the updated env file"""
+    Path(root_env).write_text("".join(lines))
+    print(f"Wrote updated .env to {root_env}\n")
+
+
+def main():
+    # Load current AZD environment
+    env_vars = load_azd_env_vars()
+
+    # Prepare and write .env update
+    print("=== Updating repository .env file ===")
+    root_env, base_lines = load_and_backup_env()
+    new_values = gather_env_values(env_vars)
+
+    print("Variables to update/add:")
+    for k, v in new_values.items():
+        print(f"  {k}={v}")
+    merged, updates = merge_env_lines(base_lines, new_values)
+
+    print("\nEnv file changes:")
+    for line in updates:
+        print(f"  {line}")
+    print()
+    write_env(root_env, merged)
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        code = main()
+    except Exception as e:
+        traceback.print_exc()
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
diff --git a/deploy/shared/scripts/preprovision.py b/deploy/shared/scripts/preprovision.py
new file mode 100644
index 0000000..1639368
--- /dev/null
+++ b/deploy/shared/scripts/preprovision.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+import sys
+import argparse
+from utils import (
+    get_model_filter,
+    ensure_azd_env,
+    load_models,
+    load_azd_env_vars,
+    get_ml_workspace,
+)
+import traceback
+
+# ANSI colors
+YELLOW = "\033[93m"
+RED = "\033[91m"
+END = "\033[0m"
+
+
+def main(yes: bool = True, validate_existing: bool = False):
+    ensure_azd_env()
+
+    # Gather environment info first
+    env = load_azd_env_vars()
+    env_name = env["AZURE_ENV_NAME"]
+    subscription = env["AZURE_SUBSCRIPTION_ID"]
+
+    rg_name = env.get("AZURE_RESOURCE_GROUP", f"rg-{env_name} (assumed)")
+    ws_name = env.get("AZUREML_WORKSPACE_NAME", f"mlw-{env_name} (assumed)")
+
+    # Validate existing workspace if requested
+    if validate_existing:
+        print(
+            f"Validating existing workspace '{ws_name}' in resource group '{rg_name}'..."
+        )
+        try:
+            ws_obj = get_ml_workspace(ws_name, rg_name, subscription)
+            print(f"✓ Workspace found: {ws_obj['name']} in {ws_obj['location']}")
+
+            # Validate location matches
+            ws_location = ws_obj["location"]
+            current_loc = env.get("AZURE_LOCATION")
+            if current_loc and current_loc.lower() != ws_location.lower():
+                print(
+                    f"{RED}ERROR: AZURE_LOCATION ({current_loc}) does not match workspace location ({ws_location}){END}"
+                )
+                return 1
+
+        except RuntimeError as e:
+            print(f"{RED}ERROR: {e}{END}")
+            print(
+                f"{RED}Please ensure the workspace exists or check your configuration.{END}"
+            )
+            return 1
+
+    models = load_models()
+    model_filter = get_model_filter()
+    models_to_deploy = []
+    for model in models:
+        name = model.get("name", "<unknown>")
+        if not model_filter or name in model_filter:
+            deployment = model.get("deployment", {})
+            instance_type = deployment.get("instanceType", "<not set>")
+            instance_count = deployment.get("instanceCount", "<not set>")
+            models_to_deploy.append((name, instance_type, instance_count))
+
+    # Check for GPT deployment configuration
+    gpt_model = env.get("gptModel", "").strip()
+    gpt_capacity = env.get("gptModelCapacity", "50")
+    gpt_location = env.get("gptDeploymentLocation", env.get("AZURE_LOCATION", ""))
+
+    print(f"AZD Environement: {env_name}")
+    print(f"\nThe following models will be deployed to Azure ML workspace: {ws_name}")
+    print(f"Resource group: {rg_name}")
+    print(f"Subscription: {subscription}")
+    print(f"These models will incur Azure charges.\n")
+
+    # Display healthcare AI models
+    for name, instance_type, instance_count in models_to_deploy:
+        print(f"- {name}: {instance_type} x {instance_count}")
+
+    # Display GPT deployment if configured
+    if gpt_model:
+        model_name, model_version = (
+            gpt_model.split(";") if ";" in gpt_model else (gpt_model, "latest")
+        )
+        print(f"\nGPT model deployment:")
+        print(f"- {model_name} (version: {model_version})")
+        print(f"  Capacity: {gpt_capacity}K tokens per minute")
+        print(f"  Location: {gpt_location}")
+
+    # Continue to confirmation prompt
+    if not yes:
+        print("\nContinue with deployment? [y/N]: ", end="")
+        choice = input().strip().lower()
+        if choice not in ("y", "yes"):
+            print(f"{RED}Aborting deployment.{END}")
+            return 1
+    else:
+        print(
+            f"{YELLOW}Skipping confirmation (--yes). Proceeding with deployment...{END}"
+        )
+    print(f"{YELLOW}Proceeding with deployment...{END}")
+    return 0
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-y",
+        "--yes",
+        action="store_true",
+        help="Assume yes for all confirmation prompts",
+    )
+    parser.add_argument(
+        "--validate-existing",
+        action="store_true",
+        help="Validate that the workspace exists in the specified resource group",
+    )
+    args = parser.parse_args()
+    try:
+        code = main(args.yes, args.validate_existing)
+    except Exception as e:
+        traceback.print_exc()
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+    sys.exit(code)
diff --git a/deploy/shared/scripts/select_models.py b/deploy/shared/scripts/select_models.py
new file mode 100644
index 0000000..7cca107
--- /dev/null
+++ b/deploy/shared/scripts/select_models.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+import yaml
+import sys
+import os
+import json
+import traceback
+from utils import ensure_azd_env, set_azd_env_value, load_models
+
+# Parse and validate selection using a function
+def parse_and_validate_selection(selection, available_models_len):
+    try:
+        indices = [int(x.strip()) for x in selection.split(",") if x.strip()]
+    except Exception:
+        raise ValueError(
+            "Invalid input. Please enter numbers separated by commas, or '*'."
+        )
+    if not indices or any(i < 1 or i > available_models_len for i in indices):
+        raise ValueError(
+            f"Invalid selection. Indices must be between 1 and {available_models_len}"
+        )
+    return indices
+
+
+def main():
+    # Ensure azd environment is active
+    ensure_azd_env()
+    # Load model definitions
+    models = load_models()
+    if not models:
+        raise ValueError("No models found in models.yaml.")
+
+    # Build and print available models in one loop
+    available_models = []
+    print("Available models:")
+    for idx, model in enumerate(models, 1):
+        name = model.get("name", "")
+        if not name:
+            continue
+        deployment = model.get("deployment", {})
+        instance_type = deployment.get("instanceType", "<not set>")
+        instance_count = deployment.get("instanceCount", "<not set>")
+        available_models.append((name, instance_type, instance_count))
+        print(f"  {len(available_models)}: {name}: {instance_type} x {instance_count}")
+    if not available_models:
+        raise ValueError("No valid models found in models.yaml.")
+    print()
+    print(
+        "Enter a comma-separated list of model numbers to deploy (e.g. 1,3,4), or '*' to deploy all:"
+    )
+    selection = input("Models to deploy: ").strip()
+    if selection == "*":
+        print("Deploying all models.")
+        set_azd_env_value("modelFilter", "[]")
+        return 0
+
+    indices = parse_and_validate_selection(selection, len(available_models))
+    selected_names = [available_models[i - 1][0] for i in indices]
+    print(f"Selected models: {selected_names}")
+
+    filter_json = json.dumps(selected_names)
+    set_azd_env_value("modelFilter", filter_json)
+    print("Set modelFilter in azd environment.")
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        code = main()
+    except Exception as e:
+        traceback.print_exc()
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+    sys.exit(code)
diff --git a/deploy/shared/scripts/utils.py b/deploy/shared/scripts/utils.py
new file mode 100644
index 0000000..d714580
--- /dev/null
+++ b/deploy/shared/scripts/utils.py
@@ -0,0 +1,195 @@
+import subprocess
+import json
+from pathlib import Path
+import re
+import yaml
+import os
+from typing import Dict, List
+from azureml.core import Workspace
+
+MODELS_YAML = Path(__file__).parent.parent / "models.yaml"
+
+REPO_ROOT = Path(__file__).parents[3]
+REPO_ENV_FILE = REPO_ROOT / ".env"
+REPO_EXAMPLE_ENV_FILE = REPO_ROOT / "env.example"
+
+MODEL_FILTER_ENV_VAR = "HLS_MODEL_FILTER"
+
+# ANSI colors for better readability
+GREEN = "\033[92m"
+YELLOW = "\033[93m"
+RED = "\033[91m"
+BLUE = "\033[94m"
+CYAN = "\033[96m"
+BOLD = "\033[1m"
+END = "\033[0m"
+
+
+def get_model_filter():
+    val = get_azd_env_value(MODEL_FILTER_ENV_VAR)
+    if not val:
+        return []
+    return [item.strip() for item in val.split(",") if item.strip()]
+
+
+def get_azd_env_value(key, default=None):
+    result = subprocess.run(
+        ["azd", "env", "get-value", key], capture_output=True, text=True
+    )
+    if result.returncode != 0 or not result.stdout.strip():
+        return default
+    return result.stdout.strip().strip('"')
+
+
+def set_azd_env_value(key, value):
+    result = subprocess.run(["azd", "env", "set", key, value])
+    return result.returncode == 0
+
+
+def load_azd_env_vars():
+    """
+    Load all AZD environment variables by invoking `azd env get-values`.
+    """
+    # `azd env get-values` outputs JSON of all key/value pairs
+    result = subprocess.run(
+        ["azd", "env", "get-values", "--output", "json"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    return json.loads(result.stdout)
+
+
+def parse_endpoints(endpoints_str):
+    """Parse a JSON string of endpoints, raising ValueError on parse errors."""
+    if not endpoints_str:
+        return []
+    try:
+        cleaned = re.sub(r'^"|"$', "", endpoints_str).replace('\\"', '"')
+        return json.loads(cleaned)
+    except Exception as e:
+        raise ValueError(f"Failed to parse endpoints JSON: {e}")
+
+
+def ensure_azd_env():
+    """
+    Ensure an azd environment is active. Returns the environment name or raises RuntimeError if none.
+    """
+    env_name = get_azd_env_value("AZURE_ENV_NAME")
+    if not env_name:
+        raise RuntimeError(
+            "No active azd environment detected. "
+            "Please create (azd env new <env>) or select (azd env select <env>) an environment."
+        )
+    return env_name
+
+
+def load_models():
+    """Load models from YAML, returning a list of model dicts."""
+    path = Path(MODELS_YAML)
+    if not path.exists():
+        raise FileNotFoundError(f"models.yaml not found at {path}")
+    data = yaml.safe_load(path.read_text())
+    if isinstance(data, dict):
+        for v in data.values():
+            if isinstance(v, list):
+                return v
+        raise ValueError("No model list found in YAML file.")
+    if isinstance(data, list):
+        return data
+    raise ValueError("models.yaml is not a list or dict of lists.")
+
+
+def get_ml_workspace(name: str, resource_group: str, subscription: str) -> dict:
+    """
+    Returns the Azure ML workspace object using the Python SDK, or raises RuntimeError if not found.
+    """
+    try:
+        ws = Workspace.get(
+            name=name, resource_group=resource_group, subscription_id=subscription
+        )
+    except Exception as e:
+        raise RuntimeError(
+            f"Failed to retrieve workspace '{name}' in RG '{resource_group}': {e}"
+        )
+    # Construct the ARM resource ID since Workspace object doesn't expose .id
+    arm_id = f"/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.MachineLearningServices/workspaces/{name}"
+    return {
+        "location": ws.location,
+        "resourceGroup": ws.resource_group,
+        "id": arm_id,
+        "name": ws.name,
+    }
+
+
+def get_openai_api_key(ai_services_name: str, resource_group: str) -> str:
+    """
+    Retrieve the OpenAI API key for an AI Services resource using Azure CLI.
+
+    Args:
+        ai_services_name: Name of the Azure AI Services resource
+        resource_group: Name of the resource group containing the AI Services
+
+    Returns:
+        The primary API key for the AI Services resource
+
+    Raises:
+        RuntimeError: If the API key retrieval fails
+    """
+    try:
+        cmd = [
+            "az",
+            "cognitiveservices",
+            "account",
+            "keys",
+            "list",
+            "--name",
+            ai_services_name,
+            "--resource-group",
+            resource_group,
+            "--query",
+            "key1",
+            "--output",
+            "tsv",
+        ]
+
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        api_key = result.stdout.strip()
+
+        if not api_key:
+            raise RuntimeError("API key retrieval returned empty result")
+
+        return api_key
+
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(f"Azure CLI command failed: {e}")
+    except Exception as e:
+        raise RuntimeError(f"Failed to retrieve API key: {e}")
+
+
+def detect_deployment_type():
+    """Detect if we're in a 'fresh' or 'existing' deployment context."""
+    # Start from current directory and walk up looking for azure.yaml
+    current_dir = Path.cwd()
+
+    azure_yaml = current_dir / "azure.yaml"
+    if not azure_yaml.exists():
+        raise RuntimeError(
+            "This script should not be run directly in the root directory. "
+            "Please run it from within the deploy/fresh or deploy/existing directories."
+        )
+
+    # Check if we're in or under deploy/fresh or deploy/existing
+    for parent in [current_dir] + list(current_dir.parents):
+        if parent.name in ["fresh", "existing"] and (parent.parent / "shared").exists():
+            return parent.name
+
+        # Also check if azure.yaml exists and we can infer from path
+        azure_yaml = parent / "azure.yaml"
+        if azure_yaml.exists():
+            if "fresh" in str(parent):
+                return "fresh"
+            elif "existing" in str(parent):
+                return "existing"
+
+    return "unknown"
diff --git a/deploy/shared/storageAccount.bicep b/deploy/shared/storageAccount.bicep
new file mode 100644
index 0000000..c02aac2
--- /dev/null
+++ b/deploy/shared/storageAccount.bicep
@@ -0,0 +1,154 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+@description('Specifies the name of the Azure Storage account.')
+param storageAccountName string
+
+@description('Specifies the location in which the Azure Storage resources should be deployed.')
+param location string
+
+@description('Allow shared key access to the storage account (default: true for compatibility)')
+param allowSharedKeyAccess bool = true
+
+@description('Tags to apply to all resources')
+param tags object = {}
+
+@description('List of principals to grant access to')
+param grantAccessTo array = []
+
+@description('Additional managed identities to assign access to')
+param additionalIdentities array = []
+
+var access = [for i in range(0, length(additionalIdentities)): {
+  id: additionalIdentities[i]
+  type: 'ServicePrincipal'
+}]
+
+var grantAccessToUpdated = concat(grantAccessTo, access)
+
+resource sa 'Microsoft.Storage/storageAccounts@2023-01-01' = {
+  name: storageAccountName
+  location: location
+  tags: tags
+  sku: {
+    name: 'Standard_LRS'
+  }
+  kind: 'StorageV2'
+  properties: {
+    allowSharedKeyAccess: allowSharedKeyAccess
+    accessTier: 'Hot'
+    minimumTlsVersion: 'TLS1_2'
+    supportsHttpsTrafficOnly: true
+  }
+}
+
+resource blobServices 'Microsoft.Storage/storageAccounts/blobServices@2023-01-01' = {
+  parent: sa
+  name: 'default'
+}
+
+// Storage Blob Data Owner
+resource storageBlobDataOwner 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b'
+}
+
+resource blobDataAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, sa.id, storageBlobDataOwner.id)
+    scope: sa
+    properties: {
+      roleDefinitionId: storageBlobDataOwner.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+// Storage Table Data Contributor
+resource storageTableDataContributor 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: '0a9a7e1f-b9d0-4cc4-a60d-0319b160aaa3'
+}
+
+resource tableDataAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, sa.id, storageTableDataContributor.id)
+    scope: sa
+    properties: {
+      roleDefinitionId: storageTableDataContributor.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+// Storage Queue Data Contributor
+resource storageQueueDataContributor 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: '974c5e8b-45b9-4653-ba55-5f855dd0fb88'
+}
+
+resource queueDataAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, sa.id, storageQueueDataContributor.id)
+    scope: sa
+    properties: {
+      roleDefinitionId: storageQueueDataContributor.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+// Storage File Data SMB Share Elevated Contributor
+resource storageFileDataSMBShareElevatedContributor 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: 'a7264617-510b-434b-a828-9731dc254ea7'
+}
+
+resource fileShareAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, sa.id, storageFileDataSMBShareElevatedContributor.id)
+    scope: sa
+    properties: {
+      roleDefinitionId: storageFileDataSMBShareElevatedContributor.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+// Storage File Data Privileged Contributor
+resource storageFileDataPrivilegedContributor 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: '69566ab7-960f-475b-8e7c-b3118f30c6bd'
+}
+
+resource fileDataPrivilegedAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, sa.id, storageFileDataPrivilegedContributor.id)
+    scope: sa
+    properties: {
+      roleDefinitionId: storageFileDataPrivilegedContributor.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+// Reader and Data Access
+resource readerAndDataAccess 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = {
+  name: 'c12c1c16-33a1-487b-954d-41c89c60f349'
+}
+
+resource keyAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
+  for principal in grantAccessToUpdated: if (!empty(principal.id)) {
+    name: guid(principal.id, sa.id, readerAndDataAccess.id)
+    scope: sa
+    properties: {
+      roleDefinitionId: readerAndDataAccess.id
+      principalId: principal.id
+      principalType: principal.type
+    }
+  }
+]
+
+output storageAccountID string = sa.id
+output storageAccountName string = sa.name
+output storageAccountBlobEndpoint string = sa.properties.primaryEndpoints.blob
diff --git a/docs/deployment-guide.md b/docs/deployment-guide.md
new file mode 100644
index 0000000..a94d254
--- /dev/null
+++ b/docs/deployment-guide.md
@@ -0,0 +1,160 @@
+# Healthcare AI Deployment Guide
+
+This comprehensive guide covers all deployment methods for Healthcare AI models, including optional GPT model deployment.
+
+## Quick Start
+
+The fastest way to get started is with the Azure Developer CLI (azd), which automatically provisions all required infrastructure and deploys the models.
+
+### Prerequisites
+
+- [Azure Developer CLI](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/install-azd) installed
+- Azure subscription with sufficient quota
+- Azure CLI logged in (`az login`)
+
+### Basic Deployment (Healthcare AI Only)
+
+```bash
+# Clone and navigate to deployment folder
+cd deploy/fresh
+
+# Create new environment
+azd env new healthcareai-examples-env
+
+# Set your preferred Azure region
+azd env set AZURE_LOCATION "eastus2"
+
+# Deploy everything
+azd up
+```
+
+This deploys all three healthcare AI models (MedImageInsight, MedImageParse, CXRReportGen) with a new Azure ML workspace.
+
+### Output Environment Variables
+
+After successful deployment, your root level `.env` file should contain:
+
+```bash
+# Healthcare AI model endpoints
+MI2_MODEL_ENDPOINT=<medimageinsight-endpoint-id>
+MIP_MODEL_ENDPOINT=<medimageparse-endpoint-id>
+CXRREPORTGEN_MODEL_ENDPOINT=<cxrreportgen-endpoint-id>
+
+# GPT integration variables (if GPT model was deployed)
+AZURE_OPENAI_ENDPOINT=<gpt-endpoint-uri>
+AZURE_OPENAI_API_KEY=<api-key>
+```
+
+## Deployment Configuration
+
+Choose the deployment method that best fits your environment and requirements:
+
+- **[Fresh Deployment](../deploy/fresh/README.md)** - Creates new resource group and Azure ML workspace
+- **[Existing Workspace Deployment](../deploy/existing/README.md)** - Uses your existing Azure ML workspace
+
+> [!NOTE]
+> **Manual Deployment**: For users who prefer manual deployment, see the [Manual Deployment Guide](manual-deployment.md) which covers Azure Portal and Python SDK deployment methods.
+
+### Model Selection
+
+By default, all three healthcare AI models (MedImageInsight, MedImageParse, CXRReportGen) are deployed. You can optionally select specific models:
+
+```bash
+# Interactive model selection
+python ../shared/scripts/select_models.py
+
+# Or set via environment variable
+azd env set HLS_MODEL_FILTER "medimageinsight,cxrreportgen"
+```
+
+### GPT Model Configuration
+
+#### GPT Model Options
+
+| Model | Model String | Recommended Capacity | Description |
+|-------|-------------|---------------------|-------------|
+| GPT-4o | `"gpt-4o;2024-08-06"` | 50-100K TPM | Latest multimodal model |
+| GPT-4.1 | `"gpt-4.1;2025-04-14"` | 50-100K TPM | Advanced reasoning model |
+
+#### Environment Variables
+
+| Variable | Default | Description | Example Values |
+|----------|---------|-------------|----------------|
+| `AZURE_GPT_MODEL` | `""` (skip) | GPT model and version | `"gpt-4o;2024-08-06"`, `"gpt-4.1;2025-04-14"` |
+| `AZURE_GPT_CAPACITY` | `"100"` | Tokens per minute (thousands) | `"100"`, `"200"` |
+| `AZURE_GPT_LOCATION` | `""` (main location) | Deployment region | `"southcentralus"`, `"westus3"` |
+
+#### Example Configurations
+
+**Deploy GPT-4o with default capacity:**
+```bash
+azd env set AZURE_GPT_MODEL "gpt-4o;2024-08-06"
+azd up
+```
+
+**Deploy GPT-4.1 with custom capacity:**
+```bash
+azd env set AZURE_GPT_MODEL "gpt-4.1;2025-04-14"
+azd env set AZURE_GPT_CAPACITY "100"
+azd up
+```
+
+**Deploy GPT in different region:**
+```bash
+azd env set AZURE_GPT_MODEL "gpt-4o;2024-08-06"
+azd env set AZURE_GPT_LOCATION "southcentralus"
+azd up
+```
+
+#### Tips for GPT Deployment
+
+- **Quota**: Ensure you have Azure OpenAI quota in your target region before deployment
+- **Capacity Planning**: Start with 50K TPM and adjust based on usage patterns
+- **Region Selection**: Some GPT models may have better availability in specific regions
+- **Integration Ready**: GPT endpoints work seamlessly with healthcare AI models for multimodal workflows
+
+## Next Steps
+
+Once deployed, return to the main README and continue with [Step 4: Setup your local environment](../README.md#step-4-setup-your-local-environment)
+
+## Resource Cleanup
+
+### Quick Cleanup - Model Deployments Only (Recommended)
+
+To save costs by stopping expensive GPU compute resources while keeping your infrastructure:
+
+```bash
+# Delete only model endpoint deployments (they charge per hour)
+python cleanup.py
+
+# Delete without confirmation
+python cleanup.py --yes
+```
+
+This removes only the model endpoint deployments that charge per hour while running, keeping the infrastructure (workspace, storage, etc.) for future use.
+
+### Complete Resource Cleanup
+
+For complete cleanup instructions specific to your deployment method:
+
+- **Fresh deployments**: See [Fresh Deployment Cleanup](../deploy/fresh/README.md#resource-cleanup)
+- **Existing deployments**: See [Existing Deployment Cleanup](../deploy/existing/README.md#resource-cleanup)
+
+## Troubleshooting
+
+### Azure Developer CLI Issues
+- **Permission Issues**: Ensure your account has Contributor role on the subscription or resource group
+- **Quota Issues**: Verify you have sufficient quota in your selected region
+
+### GPT Deployment Issues
+- **Quota**: Ensure you have Azure OpenAI quota in the target region
+- **Region**: Try different regions if quota is unavailable
+- **Model availability**: Verify the model version is available in your region
+
+### Common Error Messages
+- `"Insufficient quota"`: Request more quota in Azure portal for the specific VM family or OpenAI TPM
+- `"Model not found"`: Check model name/version spelling and regional availability
+- `"Region not supported"`: Try a different Azure region
+- `"Permission denied"`: Verify you have Contributor access to the resource group or subscription
+
+For manual deployment troubleshooting, see the [Manual Deployment Guide](manual-deployment.md#troubleshooting).
\ No newline at end of file
diff --git a/docs/manual-deployment.md b/docs/manual-deployment.md
new file mode 100644
index 0000000..6f86aac
--- /dev/null
+++ b/docs/manual-deployment.md
@@ -0,0 +1,105 @@
+# Manual Deployment Guide
+
+For users who prefer manual deployment, you can deploy healthcare AI models using either the Azure portal or Python SDK.
+
+## Prerequisites
+
+- Azure subscription with sufficient quota for the models you want to deploy
+
+## Step 1: Create Azure ML Workspace (if needed)
+
+If you don't have an existing Azure ML workspace, create one first: [Azure ML workspace creation guide](https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources)
+
+
+## Step 2: Deploy Healthcare AI Models
+
+Choose one of the following deployment methods:
+
+### Option 1: Azure Portal Deployment
+
+Follow the official Microsoft documentation to deploy the healthcare AI models you need:
+
+- **[Overview of Foundation models for healthcare AI](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/healthcare-ai/healthcare-ai-models)** - General overview and concepts
+- **[MedImageInsight](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/healthcare-ai/deploy-medimageinsight)** - Medical image analysis deployment guide
+- **[CXRReportGen](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/healthcare-ai/deploy-cxrreportgen)** - Chest X-ray report generation deployment guide  
+- **[MedImageParse](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/healthcare-ai/deploy-medimageparse?tabs=medimageparse)** - Medical image segmentation deployment guide
+
+These guides provide step-by-step instructions for deploying models through Azure AI Foundry Studio, including SKU recommendations and configuration options.
+
+### Option 2: Python SDK Deployment
+
+For programmatic deployment in existing Azure ML workspaces, use our deployment notebooks:
+
+* [MedImageInsight deployment](https://aka.ms/healthcare-ai-examples-mi2-deploy)
+* [MedImageParse deployment](https://aka.ms/healthcare-ai-examples-mip-deploy)
+* [CXRReportGen deployment](https://aka.ms/healthcare-ai-examples-cxr-deploy)
+
+## Step 3: Get Endpoint Resource ID
+
+After deployment completes, you'll need the Azure resource ID for each endpoint. There are three ways to obtain this:
+
+### Option 1: Azure CLI (Recommended)
+
+Use the Azure CLI to list and get endpoint details:
+
+```bash
+# List all endpoints in your workspace
+az ml online-endpoint list --resource-group {your-resource-group} --workspace-name {your-workspace}
+
+# Get specific endpoint details (including resource ID)
+az ml online-endpoint show --name {your-endpoint-name} --resource-group {your-resource-group} --workspace-name {your-workspace}
+```
+
+The resource ID will be in the `id` field of the output.
+
+### Option 2: Azure Portal
+
+1. Go to the [Azure Portal](https://portal.azure.com)
+2. Navigate to your deployed endpoint resource
+3. Copy the resource ID from the browser URL
+
+The URL will look like:
+```
+https://portal.azure.com/#@yourtenant.onmicrosoft.com/resource/subscriptions/12345678-1234-1234-1234-123456789abc/resourceGroups/your-resource-group/providers/Microsoft.MachineLearningServices/workspaces/your-workspace/onlineEndpoints/your-endpoint-name/overview
+```
+
+The resource ID is the part after `/resource/`:
+```
+/subscriptions/12345678-1234-1234-1234-123456789abc/resourceGroups/your-resource-group/providers/Microsoft.MachineLearningServices/workspaces/your-workspace/onlineEndpoints/your-endpoint-name
+```
+
+### Option 3: Python SDK
+
+If you deployed using the Python SDK, you can get the resource ID programmatically:
+
+```python
+# After creating your endpoint object
+print(f"Endpoint resource ID: {endpoint.id}")
+```
+
+## Step 4: Update Environment Variables
+
+Create a `.env` file for environment variables:
+
+```sh
+cp env.example .env
+```
+
+Add the endpoint resource IDs to your `.env` file:
+
+```bash
+# Replace with your actual endpoint resource IDs (with leading slash)
+MI2_MODEL_ENDPOINT=/subscriptions/{your-sub-id}/resourceGroups/{your-rg}/providers/Microsoft.MachineLearningServices/workspaces/{your-workspace}/onlineEndpoints/{your-medimageinsight-endpoint}
+MIP_MODEL_ENDPOINT=/subscriptions/{your-sub-id}/resourceGroups/{your-rg}/providers/Microsoft.MachineLearningServices/workspaces/{your-workspace}/onlineEndpoints/{your-medimageparse-endpoint}
+CXRREPORTGEN_MODEL_ENDPOINT=/subscriptions/{your-sub-id}/resourceGroups/{your-rg}/providers/Microsoft.MachineLearningServices/workspaces/{your-workspace}/onlineEndpoints/{your-cxrreportgen-endpoint}
+```
+
+**Note**: Use the full resource ID path (with the leading slash) as shown above. Replace the placeholder values in curly braces with your actual resource names and IDs. See `env.example` for more examples and detailed formatting instructions.
+
+## Next Steps
+
+Once deployed, return to the main README and continue with [Step 4: Setup your local environment](../README.md#step-4-setup-your-local-environment)
+
+## Troubleshooting
+
+For additional troubleshooting, see the main [Deployment Guide](deployment-guide.md#troubleshooting).
diff --git a/package/healthcareai_toolkit/cli/__init__.py b/package/healthcareai_toolkit/cli/__init__.py
new file mode 100644
index 0000000..4d1d6fb
--- /dev/null
+++ b/package/healthcareai_toolkit/cli/__init__.py
@@ -0,0 +1 @@
+"""CLI commands for healthcareai_toolkit."""
diff --git a/package/healthcareai_toolkit/cli/test_endpoints.py b/package/healthcareai_toolkit/cli/test_endpoints.py
new file mode 100644
index 0000000..a4c80cd
--- /dev/null
+++ b/package/healthcareai_toolkit/cli/test_endpoints.py
@@ -0,0 +1,538 @@
+#!/usr/bin/env python3
+"""
+Test healthcare AI model endpoints connectivity and functionality.
+
+This command tests the deployed model endpoints to ensure they are accessible
+and responding correctly.
+"""
+
+import sys
+import os
+import glob
+from pathlib import Path
+import argparse
+import traceback
+from typing import Optional
+import numpy as np
+
+# Import healthcare AI toolkit components
+from healthcareai_toolkit import settings
+from healthcareai_toolkit.clients.openai import create_openai_client
+
+# Color codes for terminal output
+class Colors:
+    RED = "\033[91m"
+    GREEN = "\033[92m"
+    YELLOW = "\033[93m"
+    BLUE = "\033[94m"
+    PURPLE = "\033[95m"
+    CYAN = "\033[96m"
+    WHITE = "\033[97m"
+    BOLD = "\033[1m"
+    UNDERLINE = "\033[4m"
+    END = "\033[0m"  # End formatting
+
+
+from healthcareai_toolkit.clients import (
+    MedImageInsightClient,
+    MedImageParseClient,
+    CxrReportGenClient,
+    GigaPathClient,
+)
+
+
+def pretty_print_array(array, display_name="Vector"):
+    """Pretty print arrays with truncation for readability."""
+    array = np.array(array)
+
+    if array.ndim < 2:
+        array = array.reshape(1, -1)
+
+    def format_row(row):
+        return np.array2string(
+            row,
+            separator=", ",
+            precision=3,
+            threshold=10,
+            edgeitems=3,
+            formatter={"float_kind": lambda x: f"{x:.3f}"},
+        )
+
+    def format_array(arr):
+        if arr.ndim == 1:
+            return [format_row(arr)]
+        top = format_array(arr[0])
+        bottom = format_array(arr[-1])
+        top[0] = "[" + top[0]
+        bottom[-1] = bottom[-1] + "]"
+        return top + ["..."] + bottom
+
+    for i, arr in enumerate(array):
+        print(f"  {display_name} {i} (shape: {arr.shape}, dtype: {arr.dtype})")
+        print("\n".join(f"    {l}" for l in format_array(arr)))
+        if i > 3:
+            break
+
+
+def pretty_print_response(response):
+    """Pretty print API responses."""
+    if isinstance(response, dict):
+        for key, value in response.items():
+            pretty_print_array(value, display_name=key)
+    elif isinstance(response, list):
+        for i, item in enumerate(response):
+            print(f"  == Response {i} ==")
+            pretty_print_response(item)
+    else:
+        print(f"  {response}")
+
+
+def test_medimageinsight_endpoint(quiet: bool = False) -> Optional[bool]:
+    """Test MedImageInsight endpoint connectivity."""
+    print(f"\n{Colors.BLUE}Testing MedImageInsight endpoint...{Colors.END}")
+
+    # Check if endpoint is configured
+    if not settings.MI2_MODEL_ENDPOINT:
+        print(
+            f"{Colors.YELLOW}⚠ MI2_MODEL_ENDPOINT not configured - skipping test{Colors.END}"
+        )
+        return None
+
+    try:
+        # Find test data
+        data_root = settings.DATA_ROOT
+        input_folder = os.path.join(
+            data_root, "medimageinsight-classification", "images"
+        )
+
+        if not os.path.exists(input_folder):
+            print(f"{Colors.YELLOW}⚠ Test data not found at {input_folder}{Colors.END}")
+            print(
+                f"{Colors.GREEN}✓ Skipping functional test (no test data){Colors.END}"
+            )
+            return True
+
+        image_files = list(glob.glob(input_folder + "/*.dcm"))
+        if not image_files:
+            print(
+                f"{Colors.YELLOW}⚠ No DICOM files found in {input_folder}{Colors.END}"
+            )
+            print(
+                f"{Colors.GREEN}✓ Skipping functional test (no test data){Colors.END}"
+            )
+            return True
+
+        test_image = image_files[0]
+        print(
+            f"{Colors.GREEN}✓ Found test image: {os.path.basename(test_image)}{Colors.END}"
+        )
+
+        # Test the endpoint
+        client = MedImageInsightClient()
+        response = client.submit(
+            image_list=[test_image],
+            text_list=["x-ray chest anteroposterior No Finding"],
+        )
+
+        if not all(
+            key in response[0]
+            for key in ["image_features", "text_features", "scaling_factor"]
+        ):
+            print(f"{Colors.RED}✗ Response does not contain expected keys{Colors.END}")
+            return False
+
+        print(f"{Colors.GREEN}✓ Endpoint responded with expected format{Colors.END}")
+
+        if not quiet:
+            pretty_print_response(response)
+
+        return True
+
+    except Exception as e:
+        print(f"{Colors.RED}✗ Endpoint test failed: {str(e)}{Colors.END}")
+        traceback.print_exc()
+        return False
+
+
+def test_medimageparse_endpoint(quiet: bool = False) -> bool:
+    """Test MedImageParse endpoint connectivity."""
+    print(f"\n{Colors.BLUE}Testing MedImageParse endpoint...{Colors.END}")
+
+    # Check if endpoint is configured
+    if not settings.MIP_MODEL_ENDPOINT:
+        print(
+            f"{Colors.YELLOW}⚠ MIP_MODEL_ENDPOINT not configured - skipping test{Colors.END}"
+        )
+        return None
+
+    try:
+        # Find test data
+        data_root = settings.DATA_ROOT
+        input_folder = os.path.join(data_root, "segmentation-examples")
+        test_image = os.path.join(input_folder, "covid_1585.png")
+
+        if not os.path.exists(test_image):
+            print(f"{Colors.YELLOW}⚠ Test data not found at {test_image}{Colors.END}")
+            print(
+                f"{Colors.GREEN}✓ Skipping functional test (no test data){Colors.END}"
+            )
+            return True
+
+        print(
+            f"{Colors.GREEN}✓ Found test image: {os.path.basename(test_image)}{Colors.END}"
+        )
+
+        # Test the endpoint
+        text_prompt = "left lung & right lung & COVID-19 infection"
+        num_masks = len(text_prompt.split("&"))
+
+        client = MedImageParseClient()
+        response = client.submit(image_list=[test_image], prompts=[text_prompt])
+
+        if not all(key in response[0] for key in ["image_features", "text_features"]):
+            print(f"{Colors.RED}✗ Response does not contain expected keys{Colors.END}")
+            return False
+
+        if response[0]["image_features"].shape[0] != num_masks:
+            print(
+                f"{Colors.RED}✗ Expected {num_masks} masks, but got {response[0]['image_features'].shape[0]} masks{Colors.END}"
+            )
+            return False
+
+        print(f"{Colors.GREEN}✓ Endpoint responded with expected format{Colors.END}")
+
+        if not quiet:
+            pretty_print_response(response)
+
+        return True
+
+    except Exception as e:
+        print(f"{Colors.RED}✗ Endpoint test failed: {str(e)}{Colors.END}")
+        traceback.print_exc()
+        return False
+
+
+def test_cxrreportgen_endpoint(quiet: bool = False) -> Optional[bool]:
+    """Test CXRReportGen endpoint connectivity."""
+    print(f"\n{Colors.BLUE}Testing CXRReportGen endpoint...{Colors.END}")
+
+    # Check if endpoint is configured
+    if not settings.CXRREPORTGEN_MODEL_ENDPOINT:
+        print(
+            f"{Colors.YELLOW}⚠ CXRREPORTGEN_MODEL_ENDPOINT not configured - skipping test{Colors.END}"
+        )
+        return None
+
+    try:
+        # Find test data
+        data_root = settings.DATA_ROOT
+        input_folder = os.path.join(data_root, "cxrreportgen-images")
+        frontal = os.path.join(input_folder, "cxr_frontal.jpg")
+        lateral = os.path.join(input_folder, "cxr_lateral.jpg")
+
+        if not (os.path.exists(frontal) and os.path.exists(lateral)):
+            print(f"{Colors.YELLOW}⚠ Test data not found at {input_folder}{Colors.END}")
+            print(
+                f"{Colors.GREEN}✓ Skipping functional test (no test data){Colors.END}"
+            )
+            return True
+
+        print(
+            f"{Colors.GREEN}✓ Found test images: {os.path.basename(frontal)}, {os.path.basename(lateral)}{Colors.END}"
+        )
+
+        # Test the endpoint
+        indication = ""
+        technique = ""
+        comparison = "None"
+
+        client = CxrReportGenClient()
+        response = client.submit(
+            frontal_image=frontal,
+            lateral_image=lateral,
+            indication=indication,
+            technique=technique,
+            comparison=comparison,
+        )
+
+        if not all(key in response[0] for key in ["output"]):
+            print(f"{Colors.RED}✗ Response does not contain expected keys{Colors.END}")
+            return False
+
+        print(f"{Colors.GREEN}✓ Endpoint responded with expected format{Colors.END}")
+
+        if not quiet:
+            for i, r in enumerate(response):
+                print(f"  == Response {i} ==")
+                output = r["output"]
+                print(f"  output:")
+                if output is not None:
+                    for row in output:
+                        print(f"    {row}")
+
+        return True
+
+    except Exception as e:
+        print(f"{Colors.RED}✗ Endpoint test failed: {str(e)}{Colors.END}")
+        traceback.print_exc()
+        return False
+
+
+def test_gigapath_endpoint(quiet: bool = False) -> Optional[bool]:
+    """Test GigaPath endpoint connectivity."""
+    print(f"\n{Colors.BLUE}Testing GigaPath endpoint...{Colors.END}")
+
+    # Check if endpoint is configured
+    if not settings.GIGAPATH_MODEL_ENDPOINT:
+        print(
+            f"{Colors.YELLOW}⚠ GIGAPATH_MODEL_ENDPOINT not configured - skipping test{Colors.END}"
+        )
+        return None
+
+    try:
+        # Find test data
+        data_root = settings.DATA_ROOT
+        input_folder = os.path.join(
+            data_root, "advanced-radpath-demo", "sample_images", "pathology"
+        )
+        test_image = os.path.join(input_folder, "TCGA-19-2631.png")
+
+        if not os.path.exists(test_image):
+            print(f"{Colors.YELLOW}⚠ Test data not found at {test_image}{Colors.END}")
+            print(
+                f"{Colors.GREEN}✓ Skipping functional test (no test data){Colors.END}"
+            )
+            return True
+
+        print(
+            f"{Colors.GREEN}✓ Found test image: {os.path.basename(test_image)}{Colors.END}"
+        )
+
+        # Test the endpoint
+        client = GigaPathClient()
+        response = client.submit(image_list=[test_image])
+
+        print(f"{Colors.GREEN}✓ Endpoint responded successfully{Colors.END}")
+
+        if not quiet:
+            pretty_print_response(response)
+
+        return True
+
+    except Exception as e:
+        print(f"{Colors.RED}✗ Endpoint test failed: {str(e)}{Colors.END}")
+        traceback.print_exc()
+        return False
+
+
+def test_gpt_endpoint(quiet: bool = False) -> Optional[bool]:
+    """Test GPT endpoint connectivity (optional)."""
+
+    # Check if endpoint is configured
+    if not settings.AZURE_OPENAI_ENDPOINT:
+        print(
+            f"{Colors.YELLOW}⚠ AZURE_OPENAI_ENDPOINT not configured - skipping test{Colors.END}"
+        )
+        return None
+
+    try:
+        print(f"\n{Colors.BLUE}Testing GPT endpoint...{Colors.END}")
+        # Check if API key is also available
+        if not settings.AZURE_OPENAI_API_KEY:
+            print(f"{Colors.RED}⚠ AZURE_OPENAI_API_KEY not configured!{Colors.END}")
+            return False
+
+        if not settings.AZURE_OPENAI_MODEL_NAME:
+            print(f"{Colors.RED}⚠ AZURE_OPENAI_MODEL_NAME not configured!{Colors.END}")
+            return False
+
+        print(f"{Colors.GREEN}✓ Creating OpenAI client...{Colors.END}")
+        client = create_openai_client()
+
+        # Simple test - get available models
+        print(f"{Colors.GREEN}✓ Testing basic connectivity...{Colors.END}")
+
+        # Try a simple completion request
+        response = client.chat.completions.create(
+            model=settings.AZURE_OPENAI_MODEL_NAME,  # This should be the deployed model name
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Hello! Can you respond with just 'Hello from GPT'?",
+                }
+            ],
+            max_tokens=100,
+            temperature=0.0,
+        )
+
+        if response and response.choices:
+            response_text = response.choices[0].message.content
+            print(f"{Colors.GREEN}✓ GPT responded: {response_text}{Colors.END}")
+
+            if not quiet:
+                print(f"  Model: {response.model}")
+                print(f"  Usage: {response.usage}")
+
+            return True
+        else:
+            print(f"{Colors.RED}✗ GPT response was empty or invalid{Colors.END}")
+            return False
+
+    except Exception as e:
+        print(f"{Colors.RED}✗ GPT endpoint test failed: {str(e)}{Colors.END}")
+        return False
+
+
+def print_configuration():
+    """Print current configuration values from settings."""
+    print(f"{Colors.CYAN}Configuration{Colors.END}")
+    print("=" * 40)
+
+    # Print endpoint configurations
+    print(f"\n{Colors.BLUE}Model Endpoints:{Colors.END}")
+    print(
+        f"  MI2_MODEL_ENDPOINT:\n    {settings.MI2_MODEL_ENDPOINT or f'{Colors.YELLOW}(not set){Colors.END}'}"
+    )
+    print(
+        f"  MIP_MODEL_ENDPOINT:\n    {settings.MIP_MODEL_ENDPOINT or f'{Colors.YELLOW}(not set){Colors.END}'}"
+    )
+    print(
+        f"  GIGAPATH_MODEL_ENDPOINT:\n    {settings.GIGAPATH_MODEL_ENDPOINT or f'{Colors.YELLOW}(not set){Colors.END}'}"
+    )
+    print(
+        f"  CXRREPORTGEN_MODEL_ENDPOINT:\n    {settings.CXRREPORTGEN_MODEL_ENDPOINT or f'{Colors.YELLOW}(not set){Colors.END}'}"
+    )
+
+    # Print Azure OpenAI configuration
+    print(f"\n{Colors.PURPLE}Azure OpenAI Configuration:{Colors.END}")
+    print(
+        f"  AZURE_OPENAI_ENDPOINT:\n    {settings.AZURE_OPENAI_ENDPOINT or f'{Colors.YELLOW}(not set){Colors.END}'}"
+    )
+
+    if settings.AZURE_OPENAI_ENDPOINT:
+        print(
+            f"  AZURE_OPENAI_MODEL_NAME:\n    {settings.AZURE_OPENAI_MODEL_NAME or f'{Colors.YELLOW}(not set){Colors.END}'}"
+        )
+        if settings.AZURE_OPENAI_API_KEY:
+            # Mask the API key for security
+            masked_key = (
+                settings.AZURE_OPENAI_API_KEY[:8]
+                + "*" * (len(settings.AZURE_OPENAI_API_KEY) - 16)
+                + settings.AZURE_OPENAI_API_KEY[-8:]
+                if len(settings.AZURE_OPENAI_API_KEY) > 16
+                else "***HIDDEN***"
+            )
+            print(f"  AZURE_OPENAI_API_KEY:\n    {masked_key}")
+
+    # Print data configuration
+    print(f"\n{Colors.GREEN}Data Configuration:{Colors.END}")
+    print(f"  DATA_ROOT: {settings.DATA_ROOT}")
+
+    print()  # Empty line for readability
+
+
+def main():
+    """Main entry point for the test command."""
+    parser = argparse.ArgumentParser(
+        description="Test healthcare AI model endpoints",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  healthcareai-test                        # Test all endpoints with detailed output
+  healthcareai-test --models mi2           # Test only MedImageInsight
+  healthcareai-test --models mi2,mip       # Test MedImageInsight and MedImageParse
+  healthcareai-test --quiet                # Test with minimal output
+        """,
+    )
+
+    parser.add_argument(
+        "--models",
+        type=str,
+        help="Comma-delimited list of model endpoints to test. Options: mi2,mip,cxr,gpt,pgp. If not specified, tests all models.",
+    )
+
+    parser.add_argument(
+        "--quiet",
+        "-q",
+        action="store_true",
+        help="Suppress detailed response output and use minimal logging",
+    )
+
+    args = parser.parse_args()
+
+    # Parse models list
+    if args.models:
+        # Split by comma and strip whitespace
+        selected_models = [model.strip().lower() for model in args.models.split(",")]
+        # Validate model names
+        valid_models = {"mi2", "mip", "cxr", "gpt", "pgp"}
+        invalid_models = set(selected_models) - valid_models
+        if invalid_models:
+            print(
+                f"{Colors.RED}Error: Invalid model(s): {', '.join(invalid_models)}{Colors.END}"
+            )
+            print(f"Valid models are: {', '.join(sorted(valid_models))}")
+            sys.exit(1)
+    else:
+        # Default to all models if none specified
+        selected_models = ["mi2", "mip", "cxr", "gpt", "pgp"]
+
+    print(f"{Colors.BOLD}Healthcare AI Endpoint Tester{Colors.END}")
+    print("=" * 40)
+
+    # Show configuration if requested
+    print_configuration()
+
+    # Run tests based on selected models
+    test_results = {}
+
+    if "mi2" in selected_models:
+        test_results["mi2"] = test_medimageinsight_endpoint(args.quiet)
+
+    if "mip" in selected_models:
+        test_results["mip"] = test_medimageparse_endpoint(args.quiet)
+
+    if "cxr" in selected_models:
+        test_results["cxr"] = test_cxrreportgen_endpoint(args.quiet)
+
+    if "pgp" in selected_models:
+        test_results["pgp"] = test_gigapath_endpoint(args.quiet)
+
+    if "gpt" in selected_models:
+        test_results["gpt"] = test_gpt_endpoint(args.quiet)
+
+    # Summary
+    if test_results:  # Only show summary if tests were run
+        print("\n" + "=" * 40)
+        print(f"{Colors.BOLD}Test Summary:{Colors.END}")
+
+        passed = sum(1 for result in test_results.values() if result is True)
+        failed = sum(1 for result in test_results.values() if result is False)
+        skipped = sum(1 for result in test_results.values() if result is None)
+        total = len(test_results)
+
+        for model, result in test_results.items():
+            if result is True:
+                status = f"{Colors.GREEN}✓ PASS{Colors.END}"
+            elif result is False:
+                status = f"{Colors.RED}✗ FAIL{Colors.END}"
+            else:  # result is None
+                status = f"{Colors.YELLOW}- SKIP{Colors.END}"
+            print(f"  {model.upper()}: {status}")
+
+        print(
+            f"\nOverall: {passed} passed, {failed} failed, {skipped} skipped ({total} total)"
+        )
+
+        if failed == 0:
+            print(f"{Colors.GREEN}All configured endpoint tests passed!{Colors.END}")
+            sys.exit(0)
+        else:
+            print(
+                f"{Colors.YELLOW}Some endpoint tests failed. Check your configuration.{Colors.END}"
+            )
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/package/healthcareai_toolkit/clients/medimage/cxrreportgen.py b/package/healthcareai_toolkit/clients/medimage/cxrreportgen.py
index 04350b0..776c8ea 100644
--- a/package/healthcareai_toolkit/clients/medimage/cxrreportgen.py
+++ b/package/healthcareai_toolkit/clients/medimage/cxrreportgen.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT License.
 
 from .medimagebase import MedImageBaseClient
+import json
 
 
 class CxrReportGenClient(MedImageBaseClient):
@@ -46,3 +47,7 @@ def create_payload(
             "params": {},
         }
         return payload
+
+    def decode_response(self, response):
+        response = super().decode_response(response)
+        return [{**r, "output": json.loads(r["output"])} for r in response]
diff --git a/package/healthcareai_toolkit/clients/medimage/gigapath.py b/package/healthcareai_toolkit/clients/medimage/gigapath.py
index 24e904f..fbc8092 100644
--- a/package/healthcareai_toolkit/clients/medimage/gigapath.py
+++ b/package/healthcareai_toolkit/clients/medimage/gigapath.py
@@ -18,7 +18,7 @@ def _decode_image_features(feature):
     """
     feature_bytes = base64.b64decode(feature)
     buffer = io.BytesIO(feature_bytes)
-    tmp = torch.load(buffer)
+    tmp = torch.load(buffer, weights_only=True, map_location="cpu")
     feature_output = tmp.cpu().data.numpy()
     return feature_output
 
diff --git a/package/healthcareai_toolkit/clients/openai.py b/package/healthcareai_toolkit/clients/openai.py
index bf7b1d0..fe43c8f 100644
--- a/package/healthcareai_toolkit/clients/openai.py
+++ b/package/healthcareai_toolkit/clients/openai.py
@@ -1,34 +1,14 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import os
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
 
-from azureml.core import Workspace
 from openai import AzureOpenAI
+from healthcareai_toolkit import settings
 
 
 def create_openai_client():
     """Plumbing to create the OpenAI client"""
-
-    # Try to load endpoint URL and API key from the JSON file
-    # (and load as environment variables)
-    load_environment_variables("environment.json")
-
-    # Try to get the key from environment
-    endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT", "")
-    api_key = os.environ.get("AZURE_OPENAI_API_KEY", "")
-
-    if api_key == "":
-        # Try to get the key from AML workspace
-
-        # Load the workspace
-        ws = Workspace.from_config()
-
-        # Access the linked key vault
-        keyvault = ws.get_default_keyvault()
-
-        # Get the secret
-        api_key = keyvault.get_secret("azure-openai-api-key-westus")
+    endpoint = settings.AZURE_OPENAI_ENDPOINT
+    api_key = settings.AZURE_OPENAI_API_KEY
 
     client = AzureOpenAI(
         azure_endpoint=endpoint,
@@ -36,18 +16,3 @@ def create_openai_client():
         api_version="2024-02-01",
     )
     return client
-
-
-def create_oai_assistant(client):
-    """Creates assistant to keep track of prior responses"""
-    # Assistant API example: https://github.com/openai/openai-python/blob/main/examples/assistant.py
-    # Available in limited regions
-    deployment = "gpt-4o"
-    assistant = client.beta.assistants.create(
-        name="Math Tutor",
-        instructions="You are a categorizer. For each question answered, extract entities related to people's names and "
-        " jobs and categorize them. You always return result in JSON. You reuse categories from past responses when possible",
-        model=deployment,
-        tools=[{"type": "code_interpreter"}],
-    )
-    return assistant.id
diff --git a/package/healthcareai_toolkit/settings.py b/package/healthcareai_toolkit/settings.py
index b192780..a405905 100644
--- a/package/healthcareai_toolkit/settings.py
+++ b/package/healthcareai_toolkit/settings.py
@@ -16,6 +16,7 @@
 
 AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT", None)
 AZURE_OPENAI_API_KEY = os.environ.get("AZURE_OPENAI_API_KEY", None)
+AZURE_OPENAI_MODEL_NAME = os.environ.get("AZURE_OPENAI_MODEL_NAME", None)
 
 
 _constants = {
diff --git a/package/healthcareai_toolkit/util/azureml_managers.py b/package/healthcareai_toolkit/util/azureml_managers.py
index 5f5977c..a250789 100644
--- a/package/healthcareai_toolkit/util/azureml_managers.py
+++ b/package/healthcareai_toolkit/util/azureml_managers.py
@@ -89,12 +89,12 @@ def initialize_client(self, resource_id):
                 ml_client.online_endpoints._online_operation._client._base_url = f"{os.environ.get('AZUREML_SERVICE_ENDPOINT')}/rp/workspaces"  # noqa: E501
                 ml_client.online_endpoints._online_deployment_operation._client._base_url = f"{os.environ.get('AZUREML_SERVICE_ENDPOINT')}/rp/workspaces"  # noqa: E501
 
-            print(
-                f"Using ml_client base_url 1: {ml_client.online_endpoints._online_operation._client._base_url}"
-            )
-            print(
-                f"Using ml_client base_url 2: {ml_client.online_endpoints._online_deployment_operation._client._base_url}"
-            )
+                print(
+                    f"Using ml_client base_url 1: {ml_client.online_endpoints._online_operation._client._base_url}"
+                )  # noqa: E501
+                print(
+                    f"Using ml_client base_url 2: {ml_client.online_endpoints._online_deployment_operation._client._base_url}"
+                )  # noqa: E501
 
             self.ml_client = ml_client
             self.endpoint = ml_client.online_endpoints.get(name=endpoint_name)
diff --git a/package/pyproject.toml b/package/pyproject.toml
index 65c6994..99ddf23 100644
--- a/package/pyproject.toml
+++ b/package/pyproject.toml
@@ -40,7 +40,7 @@ jupyter = "~1.1.1"
 pillow = "~10.4.0"
 matplotlib = "~3.7.5"
 numpy = "~1.24.4"
-openai = "~1.51.2"
+openai = "~1.89.0"
 umap-learn = "~0.5.6"
 scipy = "~1.10.1"
 azureml-core = "~1.57.0.post3"
@@ -50,6 +50,9 @@ scikit-image = "~0.24.0"
 python-dotenv = "~1.0.1"
 nibabel = "~5.3.1"
 
+[tool.poetry.scripts]
+healthcareai-test = "healthcareai_toolkit.cli.test_endpoints:main"
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"