From a28c20cd29d73d37008041c8f4eee6c915458a3a Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Thu, 13 Jun 2024 11:22:30 -0700 Subject: [PATCH] [MISC] Remove FP8 warning (#5472) Co-authored-by: Philipp Moritz --- vllm/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config.py b/vllm/config.py index 2513d43ce8e6..76c10d464aa2 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -212,7 +212,7 @@ def _verify_quantization(self) -> None: f"{self.quantization} quantization is currently not " f"supported in ROCm.") if (self.quantization - not in ["marlin", "gptq_marlin_24", "gptq_marlin"]): + not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin")): logger.warning( "%s quantization is not fully " "optimized yet. The speed can be slower than "