diff --git a/src/images/blogs/olive-shared-cache-user-flow.png b/src/images/blogs/olive-shared-cache-user-flow.png new file mode 100644 index 0000000000000..96b399fe05d33 Binary files /dev/null and b/src/images/blogs/olive-shared-cache-user-flow.png differ diff --git a/src/routes/blogs/+page.svelte b/src/routes/blogs/+page.svelte index b7eac777573c7..793ee5d562a7b 100644 --- a/src/routes/blogs/+page.svelte +++ b/src/routes/blogs/+page.svelte @@ -18,6 +18,7 @@ import Phi3OnDeviceImage from '../../images/blogs/phi-3-on-device_blog_thumbnail.png'; import Phi3SmallMediumImage from '../../images/blogs/accelerating-phi-3-medium-thumbnail.png'; import LightGlueImage from '../../images/blogs/lightglue-community-blog.png'; + import OliveSharedCache from '../../images/blogs/olive-shared-cache-user-flow.png'; onMount(() => { anime({ targets: '.border-primary', @@ -45,6 +46,16 @@ dispatch('switchTab', tab); } let featuredblog = [ + { + title: 'Enhancing team collaboration during AI model optimization with the Olive Shared Cache', + date: 'October 30th, 2024', + blurb: + "Learn how to use Olive's shared cache to enhance team collaboration when optimizing AI models", + link: 'blogs/olive-shared-cache', + image: OliveSharedCache, + imgalt: + 'Team Flow for Olive shared cache' + }, { title: 'Accelerating LightGlue Inference with ONNX Runtime and TensorRT', date: 'July 17th, 2024', @@ -65,6 +76,10 @@ imgalt: 'Image of the different steps of an ML pipeline on a mobile device, running using NimbleEdge and ONNX Runtime.' }, + + + ]; + let blogs = [ { title: 'Background Removal in the Browser Using ONNX Runtime with WebGPU', date: 'June 12th, 2024', @@ -75,9 +90,6 @@ imgalt: 'Image of a skateboarder with a sky background, with half of the background being alternating grey and white squares indicating it has been removed.' }, - - ]; - let blogs = [ { title: 'Phi-3 Small and Medium Models are now Optimized with ONNX Runtime and DirectML', date: 'May 21th, 2024', diff --git a/src/routes/blogs/olive-shared-cache/+page.svx b/src/routes/blogs/olive-shared-cache/+page.svx new file mode 100644 index 0000000000000..281f17b5a9152 --- /dev/null +++ b/src/routes/blogs/olive-shared-cache/+page.svx @@ -0,0 +1,162 @@ +--- +title: 'Enhance team collaboration during AI model optimization with the Olive Shared Cache feature' +date: '30th October, 2024' +description: 'Learn how to use the shared cache feature in Olive to enhance team collaboration when optimizing AI models' +keywords: 'GenAI , LLM, ONNXRuntime, ORT, Phi, DirectML, Windows, phi3, phi-3, llama-3.2, ONNX, SLM, edge, gpu' +authors: + [ + 'Xiaoyu Zhang', + 'Devang Patel', + 'Sam Kemp' + ] +authorsLink: + [ + 'https://www.linkedin.com/in/xiaoyu-zhang/', + 'https://www.linkedin.com/in/devangpatel/', + 'https://www.linkedin.com/in/samuel-kemp-a9253724/' + ] +image: 'https://iili.io/2nxtC57.png' +imageSquare: 'https://iili.io/2nxtC57.png' +url: 'https://onnxruntime.ai/blogs/olive-shared-cache' +--- + + +## 👋 Introduction + +In the ever-evolving realm of machine learning, optimization stands as a crucial pillar for enhancing model performance, reducing latency, and cutting down costs. Enter Olive, a powerful tool designed to streamline the optimization process through its innovative shared cache feature. + +Efficiency in machine learning not only relies on the effectiveness of algorithms but also on the efficiency of the processes involved. Olive’s shared cache feature – backed by Azure Storage - embodies this principle by seamlessly allowing intermediate models to be stored and reused within a team, avoiding redundant computations. + +This blog post delves into how Olive’s shared cache feature can help you save time and costs, illustrated with practical examples. + +### Prerequisites + +- An Azure Storage Account. For details on how to create an Azure Storage Account, read [Create an Azure Storage Account](https://learn.microsoft.com/azure/storage/common/storage-account-create?tabs=azure-portal). +- Once you have created your Azure Storage Account, you'll need to create a storage container (a container organizes a set of blobs, similar to a directory in a file system). For more details on how to create a storage container, read [Create a container](https://learn.microsoft.com/azure/storage/blobs/blob-containers-portal#create-a-container). + +## 🤝 Team collaboration during optimization process + +User A begins the optimization process by employing Olive’s quantize command to optimize the [Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) model using the AWQ algorithm. This step is marked by the following command line execution: + +
olive quantize \
+ --model_name_or_path Microsoft/Phi-3-mini-4k-instruct \
+ --algorithm awq \
+ --account_name {AZURE_STORAGE_ACCOUNT} \
+ --container_name {STORAGE_CONTAINER_NAME} \
+ --log_level 1
+
+
+> **Note:**
+> - The `--account_name` should be set to your Azure Storage Account name.
+> - The `--container_name` should be set to the container name in the Azure Storage Account.
+
+The optimization process generates a log that confirms the cache has been saved in a shared location in Azure:
+
+olive quantize \
+ --model_name_or_path Microsoft/Phi-3-mini-4k-instruct \
+ --algorithm awq \
+ --account_name {AZURE_STORAGE_ACCOUNT} \
+ --container_name {STORAGE_CONTAINER_NAME} \
+ --log_level 1
+
+
+A critical part of this step is the following log output highlights the retrieval of the quantized model from the shared cache rather than re-computing the AWQ quantization.
+
+olive auto-opt \
+ --model_name_or_path meta-llama/Llama-3.2-1B-Instruct \
+ --trust_remote_code \
+ --output_path optimized-model \
+ --device cpu \
+ --provider CPUExecutionProvider \
+ --precision int4 \
+ --account_name {AZURE_STORAGE_ACCOUNT} \
+ --container_name {STORAGE_CONTAINER_NAME} \
+ --log_level 1
+
+
+For each task executed in the automatic optimizer - for example, model download, ONNX Conversion, ONNX graph optimization, Quantization, etc - the intermediate model will be stored in the shared cache for reuse on different hardware targets. For example, if later User B wants to optimize the same model for a different target (say, the GPU of a Windows device) they would execute the following command:
+
+olive auto-opt \
+ --model_name_or_path meta-llama/Llama-3.2-1B-Instruct \
+ --trust_remote_code \
+ --output_path optimized-model \
+ --device gpu \
+ --provider DmlExecutionProvider \
+ --precision int4 \
+ --account_name {AZURE_STORAGE_ACCOUNT} \
+ --container_name {STORAGE_CONTAINER_NAME} \
+ --log_level 1
+
+
+The common intermediate steps User A's CPU optimization - such as ONNX conversion and ONNX graph optimization - will be reused, which will save User B time and cost.
+
+This underscores Olive’s versatility, not only in optimizing different models but also in applying a variety of algorithms and exporters. The shared cache again plays a critical role by storing these optimized intermediate models for subsequent use.
+
+## ➕ Benefits of the Olive shared cache feature
+
+The examples above showcase Olive’s shared cache as a game-changer in model optimization. Here are the key benefits:
+
+- **Time Efficiency:** By storing optimized models, the shared cache eliminates the need for repetitive optimizations, drastically reducing time consumption.
+- **Cost Reduction:** Computational resources are expensive. By minimizing redundant processes, the shared cache cuts down on the associated costs, making machine learning more affordable.
+- **Resource Optimization:** Efficient use of computational power leads to better resource management, ensuring that resources are available for other critical tasks.
+- **Collaboration:** The shared cache fosters a collaborative environment where different users can benefit from each other’s optimization efforts, promoting knowledge sharing and teamwork.
+
+## Conclusion
+
+By saving and reusing optimized models, Olive’s shared cache feature paves the way for a more efficient, cost-effective, and collaborative environment. As AI continues to grow and evolve, tools like Olive will be instrumental in driving innovation and efficiency.
+Whether you are a seasoned data scientist or a newcomer to the field, embracing Olive can significantly enhance your workflow. By reducing the time and costs associated with model optimization, you can focus on what truly matters: developing groundbreaking AI models that push the boundaries of what is possible.
+Embark on your optimization journey with Olive today and experience the future of machine learning efficiency.
+
+## ⏭️ Try Olive
+
+To try the quantization and Auto Optimizer commands with the shared-cache feature execute the following pip install:
+
+```bash
+pip install olive-ai[auto-opt,shared-cache] autoawq
+```
+
+Quantizing a model using the AWQ algorithm requires a CUDA GPU device. If you only have access to a CPU device, and do not have an Azure subscription you can execute the automatic optimizer with a CPU and use local disk as the cache:
+
+olive auto-opt \
+ --model_name_or_path meta-llama/Llama-3.2-1B-Instruct \
+ --trust_remote_code \
+ --output_path optimized-model \
+ --device cpu \
+ --provider CPUExecutionProvider \
+ --precision int4 \
+ --log_level 1
+
diff --git a/src/routes/blogs/olive-shared-cache/retrieve-quant-model.png b/src/routes/blogs/olive-shared-cache/retrieve-quant-model.png
new file mode 100644
index 0000000000000..5c708d0e77505
Binary files /dev/null and b/src/routes/blogs/olive-shared-cache/retrieve-quant-model.png differ
diff --git a/src/routes/blogs/olive-shared-cache/upload-quant-model.png b/src/routes/blogs/olive-shared-cache/upload-quant-model.png
new file mode 100644
index 0000000000000..716be5f36b7ed
Binary files /dev/null and b/src/routes/blogs/olive-shared-cache/upload-quant-model.png differ