diff --git a/src/images/blogs/Quantize-finetune.jpg b/src/images/blogs/Quantize-finetune.jpg new file mode 100644 index 0000000000000..d0d6d65badafd Binary files /dev/null and b/src/images/blogs/Quantize-finetune.jpg differ diff --git a/src/routes/blogs/+page.svelte b/src/routes/blogs/+page.svelte index ea9d43e374263..fc3a9f47a016a 100644 --- a/src/routes/blogs/+page.svelte +++ b/src/routes/blogs/+page.svelte @@ -21,6 +21,7 @@ import OliveSharedCache from '../../images/blogs/olive-shared-cache-user-flow.png'; import GoodnotesThumbnail from '../../images/blogs/goodnotes-scribble-to-erase/Thumbnail.png'; import OliveCli from '../../images/blogs/olive-flow.png'; + import QuantizeFinetune from '../../images/blogs/Quantize-finetune.jpg'; onMount(() => { anime({ targets: '.border-primary', @@ -49,37 +50,46 @@ } let featuredblog = [ { - title: 'Scribble to Erase on Goodnotes for Windows, Web, and Android, Powered by ONNX Runtime', + title: + 'Is it better to quantize before or after finetuning?', + date: '19th November, 2024', + blurb: + 'Learn how to quickly and easily experiment in your model optimization workflow using Olive.', + link: 'blogs/olive-quant-ft', + image: QuantizeFinetune, + imgalt: 'Quantize or finetune first for better model performance?' + }, + { + title: + 'Scribble to Erase on Goodnotes for Windows, Web, and Android, Powered by ONNX Runtime', date: '18th November, 2024', blurb: - "Discover how Goodnotes brings the popular scribble-to-erase feature from iPad to Windows, Web, and Android with the help of ONNX Runtime, enabling seamless, high-performance AI integration across platforms.", + 'Discover how Goodnotes brings the popular scribble-to-erase feature from iPad to Windows, Web, and Android with the help of ONNX Runtime, enabling seamless, high-performance AI integration across platforms.', link: 'blogs/goodnotes-scribble-to-erase', image: GoodnotesThumbnail, - imgalt: - 'Scribble to Erase feature on Goodnotes for Windows, Web, and Android' - }, - { + imgalt: 'Scribble to Erase feature on Goodnotes for Windows, Web, and Android' + }, + { title: 'Democratizing AI Model optimization with the new Olive CLI', date: 'November 11th, 2024', blurb: - "Learn how to use the new Olive CLI to easily optimize AI Models for on-device inference", + 'Learn how to use the new Olive CLI to easily optimize AI Models for on-device inference', link: 'blogs/olive-cli', image: OliveCli, - imgalt: - 'Olive Flow' - }, + imgalt: 'Olive Flow' + } + ]; + let blogs = [ { - title: 'Enhancing team collaboration during AI model optimization with the Olive Shared Cache', + title: + 'Enhancing team collaboration during AI model optimization with the Olive Shared Cache', date: 'October 30th, 2024', blurb: "Learn how to use Olive's shared cache to enhance team collaboration when optimizing AI models", link: 'blogs/olive-shared-cache', image: OliveSharedCache, - imgalt: - 'Team Flow for Olive shared cache' + imgalt: 'Team Flow for Olive shared cache' }, - ]; - let blogs = [ { title: 'Accelerating LightGlue Inference with ONNX Runtime and TensorRT', date: 'July 17th, 2024', @@ -90,7 +100,7 @@ imgalt: 'Speedup for ONNX Runtime with TensorRT and CUDA vs. torch.compile for difference batch sizes and sequence lengths.' }, - { + { title: 'High performance on-device real-time ML with NimbleEdge, using ONNX Runtime', date: 'June 17th, 2024', blurb: @@ -104,7 +114,7 @@ title: 'Background Removal in the Browser Using ONNX Runtime with WebGPU', date: 'June 12th, 2024', blurb: - "Using ONNX Runtime with WebGPU and WebAssembly leads to 20x speedup over multi-threaded and 550x speedup over single-threaded CPU performance. Thus achieving interactive speeds for state-of-the-art background removal directly in the browser.", + 'Using ONNX Runtime with WebGPU and WebAssembly leads to 20x speedup over multi-threaded and 550x speedup over single-threaded CPU performance. Thus achieving interactive speeds for state-of-the-art background removal directly in the browser.', link: 'https://img.ly/blog/browser-background-removal-using-onnx-runtime-webgpu/', image: 'https://imgly-blog-prod.storage.googleapis.com/2024/06/onnx-runtime-imgly.jpg', imgalt: @@ -113,8 +123,7 @@ { title: 'Phi-3 Small and Medium Models are now Optimized with ONNX Runtime and DirectML', date: 'May 21th, 2024', - blurb: - "You can now run the Phi-3 medium, small models on device of your choice.", + blurb: 'You can now run the Phi-3 medium, small models on device of your choice.', link: 'blogs/accelerating-phi-3-small-medium', image: Phi3SmallMediumImage, imgalt: @@ -123,13 +132,13 @@ { title: 'Enjoy the Power of Phi-3 with ONNX Runtime on your device', date: 'May 20th, 2024', - blurb: - "Harness ONNX Runtime to run Phi-3-mini on mobile phones and in the browser.", + blurb: 'Harness ONNX Runtime to run Phi-3-mini on mobile phones and in the browser.', link: 'https://huggingface.co/blog/Emma-N/enjoy-the-power-of-phi-3-with-onnx-runtime', image: Phi3OnDeviceImage, imgalt: 'Chart comparing model size (in GB) of ONNX Phi-3-mini for web and mobile with original Phi-3-mini' - },{ + }, + { title: 'ONNX Runtime supports Phi-3 mini models across platforms and devices', date: 'April 22nd, 2024', blurb: @@ -148,7 +157,7 @@ image: WebGPUImage, imgalt: 'Comparison of ONNX Runtime Web with WebGPU EP on GPU vs. WASM EP on CPU for segment anything example' - }, + }, { title: 'ONNX Runtime 1.17: CUDA 12 support, Phi-2 optimizations, WebGPU, and more!', date: 'February 28th, 2024', @@ -399,20 +408,21 @@ ]; let blogsCommunity = [ { - title:'Sentence Transformers 3.2.0: 2x-3x Faster Inference with ONNX Runtime', + title: 'Sentence Transformers 3.2.0: 2x-3x Faster Inference with ONNX Runtime', date: 'October 10, 2024', link: 'https://github.com/UKPLab/sentence-transformers/releases/tag/v3.2.0', - blurb: 'This update brings 2x-3x speedups with a new ONNX backends, plus static embeddings offering 50x-500x faster performance with a slight accuracy trade-off. Install with pip install sentence-transformers==3.2.0.' + blurb: + 'This update brings 2x-3x speedups with a new ONNX backends, plus static embeddings offering 50x-500x faster performance with a slight accuracy trade-off. Install with pip install sentence-transformers==3.2.0.' }, { - title:'Running Phi-3 Mistral 7B LLMs on Raspberry Pi 5: A Step-by-Step Guide', + title: 'Running Phi-3 Mistral 7B LLMs on Raspberry Pi 5: A Step-by-Step Guide', date: 'September 5, 2024', link: 'https://medium.com/@vadikus/running-phi-3-mistral-7b-llms-on-raspberry-pi-5-a-step-by-step-guide-185e8102e35b', - blurb: 'Learn how to run Phi-3 Mistral 7B on Raspberry Pi 5 using the ONNX Runtime Gen AI library.' + blurb: + 'Learn how to run Phi-3 Mistral 7B on Raspberry Pi 5 using the ONNX Runtime Gen AI library.' }, { - title: - 'Deploying a Production-Ready RAG Server: A Comprehensive Guide with LlamaIndex', + title: 'Deploying a Production-Ready RAG Server: A Comprehensive Guide with LlamaIndex', date: 'March 27, 2024', link: 'https://python.plainenglish.io/deploying-a-production-ready-rag-server-a-comprehensive-guide-with-llamaindex-dbe57cc960df', blurb: @@ -448,12 +458,15 @@ link: 'https://www.linkedin.com/pulse/hcm-sentence-similarity-language-model-using-java-jonathon-palmieri-tdlpc%3FtrackingId=CN2PPVO4Toqh8r6JsAYMIw%253D%253D/?trackingId=ByNomo0pQFKM%2F%2BWEknVs7Q%3D%3D' } ]; - let description = 'ONNX Runtime Blogs - your source for the latest ONNX Runtime updates and information.' - let image = 'https://i.ibb.co/0YBy62j/ORT-icon-for-light-bg.png' - let imageSquare = 'https://i.ibb.co/0YBy62j/ORT-icon-for-light-bg.png' - let authors = [''] - let keywords = 'onnxruntime, onnx runtime blogs, onnx runtime community blogs, onnx runtime community posts, onnx runtime community announcements' + let description = + 'ONNX Runtime Blogs - your source for the latest ONNX Runtime updates and information.'; + let image = 'https://i.ibb.co/0YBy62j/ORT-icon-for-light-bg.png'; + let imageSquare = 'https://i.ibb.co/0YBy62j/ORT-icon-for-light-bg.png'; + let authors = ['']; + let keywords = + 'onnxruntime, onnx runtime blogs, onnx runtime community blogs, onnx runtime community posts, onnx runtime community announcements'; + @@ -461,9 +474,9 @@ - + - + diff --git a/src/routes/blogs/olive-quant-ft/+page.svx b/src/routes/blogs/olive-quant-ft/+page.svx index 1f922bc7c0aa6..7f4487685c6e4 100644 --- a/src/routes/blogs/olive-quant-ft/+page.svx +++ b/src/routes/blogs/olive-quant-ft/+page.svx @@ -1,8 +1,8 @@ --- title: 'Is it better to quantize before or after finetuning?' -date: '18th November, 2024' -description: 'Learn how to use the shared cache feature in Olive to enhance team collaboration when optimizing AI models' -keywords: 'GenAI , LLM, ONNXRuntime, ORT, Phi, DirectML, Windows, phi3, phi-3, llama-3.2, ONNX, SLM, edge, gpu' +date: '19th November, 2024' +description: 'Learn how to quickly and easily experiment in your model optimization workflow using Olive.' +keywords: 'quantization, fine-tuning, Olive toolkit, model optimization, ONNX runtime, AI model efficiency, AWQ, GPTQ, model deployment, low-precision, LoRA, language models, quantize before fine-tune, quantization sequence, Phi-3.5, Llama, memory reduction' authors: [ 'Jambay Kinley', @@ -13,8 +13,8 @@ authorsLink: 'https://www.linkedin.com/in/jambayk/', 'https://www.linkedin.com/in/samuel-kemp-a9253724/' ] -image: '' -imageSquare: '' +image: 'https://iili.io/251Z3ts.jpg' +imageSquare: 'https://iili.io/251Z3ts.jpg' url: 'https://onnxruntime.ai/blogs/olive-quant-ft' ---