Adjust used dtypes for hunyuan video VAE and diffusion model.

Comfy-Org · Dec 17, 2024 · 39b1fc4 · 39b1fc4
1 parent 0b25f47
commit 39b1fc4
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 1 deletion.
diff --git a/comfy/sd.py b/comfy/sd.py
@@ -323,6 +323,7 @@ def __init__(self, sd=None, device=None, config=None, dtype=None):
                 self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
                 self.memory_used_decode = lambda shape, dtype: (1500 * shape[2] * shape[3] * shape[4] * (4 * 8 * 8)) * model_management.dtype_size(dtype)
                 self.memory_used_encode = lambda shape, dtype: (900 * max(shape[2], 2) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
 
             elif "decoder.layers.1.layers.0.beta" in sd:
                 self.first_stage_model = AudioOobleckVAE()

diff --git a/comfy/supported_models.py b/comfy/supported_models.py
@@ -753,7 +753,7 @@ class HunyuanVideo(supported_models_base.BASE):
 
     memory_usage_factor = 2.0 #TODO
 
-    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+    supported_inference_dtypes = [torch.bfloat16, torch.float32]
 
     vae_key_prefix = ["vae."]
     text_encoder_key_prefix = ["text_encoders."]