comfyanonymous
diff --git a/‎.github/workflows/test-unit.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test-unit.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CODEOWNERS‎
Lines changed: 1 addition & 23 deletions b/‎CODEOWNERS‎
Lines changed: 1 addition & 23 deletions
diff --git a/‎comfy/samplers.py‎
Lines changed: 2 additions & 2 deletions b/‎comfy/samplers.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎comfy/text_encoders/hunyuan_image.py‎
Lines changed: 7 additions & 1 deletion b/‎comfy/text_encoders/hunyuan_image.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎comfy/text_encoders/qwen_image.py‎
Lines changed: 29 additions & 19 deletions b/‎comfy/text_encoders/qwen_image.py‎
Lines changed: 29 additions & 19 deletions
diff --git a/‎comfy_extras/nodes_clip_sdxl.py‎
Lines changed: 57 additions & 40 deletions b/‎comfy_extras/nodes_clip_sdxl.py‎
Lines changed: 57 additions & 40 deletions
diff --git a/‎comfy_extras/nodes_fresca.py‎
Lines changed: 36 additions & 25 deletions b/‎comfy_extras/nodes_fresca.py‎
Lines changed: 36 additions & 25 deletions
diff --git a/‎comfy_extras/nodes_mask.py‎
Lines changed: 12 additions & 9 deletions b/‎comfy_extras/nodes_mask.py‎
Lines changed: 12 additions & 9 deletions
@@ -10,7 +10,7 @@ jobs:
   test:
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-2022, macos-latest]
     runs-on: ${{ matrix.os }}
     continue-on-error: true
     steps:
 
@@ -1,25 +1,3 @@
 # Admins
 * @comfyanonymous
-
-# Note: Github teams syntax cannot be used here as the repo is not owned by Comfy-Org.
-# Inlined the team members for now.
-
-# Maintainers
-*.md @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/tests/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/tests-unit/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/notebooks/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/script_examples/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/.github/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/requirements.txt @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-/pyproject.toml @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
-
-# Python web server
-/api_server/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
-/app/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
-/utils/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
-
-# Node developers
-/comfy_extras/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
-/comfy/comfy_types/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
-/comfy_api_nodes/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
+* @kosinkadink
@@ -360,7 +360,7 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
 def cfg_function(model, cond_pred, uncond_pred, cond_scale, x, timestep, model_options={}, cond=None, uncond=None):
     if "sampler_cfg_function" in model_options:
         args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep,
-                "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options}
+                "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options, "input_cond": cond, "input_uncond": uncond}
         cfg_result = x - model_options["sampler_cfg_function"](args)
     else:
         cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale
@@ -390,7 +390,7 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option
     for fn in model_options.get("sampler_pre_cfg_function", []):
         args = {"conds":conds, "conds_out": out, "cond_scale": cond_scale, "timestep": timestep,
                 "input": x, "sigma": timestep, "model": model, "model_options": model_options}
-        out  = fn(args)
+        out = fn(args)
 
     return cfg_function(model, out[0], out[1], cond_scale, x, timestep, model_options=model_options, cond=cond, uncond=uncond_)
 
 
@@ -63,7 +63,13 @@ def __init__(self, byt5=True, device="cpu", dtype=None, model_options={}):
             self.byt5_small = None
 
     def encode_token_weights(self, token_weight_pairs):
-        cond, p, extra = super().encode_token_weights(token_weight_pairs)
+        tok_pairs = token_weight_pairs["qwen25_7b"][0]
+        template_end = -1
+        if tok_pairs[0][0] == 27:
+            if len(tok_pairs) > 36:  # refiner prompt uses a fixed 36 template_end
+                template_end = 36
+
+        cond, p, extra = super().encode_token_weights(token_weight_pairs, template_end=template_end)
         if self.byt5_small is not None and "byt5" in token_weight_pairs:
             out = self.byt5_small.encode_token_weights(token_weight_pairs["byt5"])
             extra["conditioning_byt5small"] = out[0]
 
@@ -18,13 +18,22 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
         self.llama_template_images = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
 
     def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], **kwargs):
-        if llama_template is None:
-            if len(images) > 0:
-                llama_text = self.llama_template_images.format(text)
-            else:
-                llama_text = self.llama_template.format(text)
+        skip_template = False
+        if text.startswith('<|im_start|>'):
+            skip_template = True
+        if text.startswith('<|start_header_id|>'):
+            skip_template = True
+
+        if skip_template:
+            llama_text = text
         else:
-            llama_text = llama_template.format(text)
+            if llama_template is None:
+                if len(images) > 0:
+                    llama_text = self.llama_template_images.format(text)
+                else:
+                    llama_text = self.llama_template.format(text)
+            else:
+                llama_text = llama_template.format(text)
         tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
         key_name = next(iter(tokens))
         embed_count = 0
@@ -47,22 +56,23 @@ class QwenImageTEModel(sd1_clip.SD1ClipModel):
     def __init__(self, device="cpu", dtype=None, model_options={}):
         super().__init__(device=device, dtype=dtype, name="qwen25_7b", clip_model=Qwen25_7BVLIModel, model_options=model_options)
 
-    def encode_token_weights(self, token_weight_pairs):
+    def encode_token_weights(self, token_weight_pairs, template_end=-1):
         out, pooled, extra = super().encode_token_weights(token_weight_pairs)
         tok_pairs = token_weight_pairs["qwen25_7b"][0]
         count_im_start = 0
-        for i, v in enumerate(tok_pairs):
-            elem = v[0]
-            if not torch.is_tensor(elem):
-                if isinstance(elem, numbers.Integral):
-                    if elem == 151644 and count_im_start < 2:
-                        template_end = i
-                        count_im_start += 1
-
-        if out.shape[1] > (template_end + 3):
-            if tok_pairs[template_end + 1][0] == 872:
-                if tok_pairs[template_end + 2][0] == 198:
-                    template_end += 3
+        if template_end == -1:
+            for i, v in enumerate(tok_pairs):
+                elem = v[0]
+                if not torch.is_tensor(elem):
+                    if isinstance(elem, numbers.Integral):
+                        if elem == 151644 and count_im_start < 2:
+                            template_end = i
+                            count_im_start += 1
+
+            if out.shape[1] > (template_end + 3):
+                if tok_pairs[template_end + 1][0] == 872:
+                    if tok_pairs[template_end + 2][0] == 198:
+                        template_end += 3
 
         out = out[:, template_end:]
 
 
@@ -1,43 +1,52 @@
-from nodes import MAX_RESOLUTION
+from typing_extensions import override
 
-class CLIPTextEncodeSDXLRefiner:
+import nodes
+from comfy_api.latest import ComfyExtension, io
+
+
+class CLIPTextEncodeSDXLRefiner(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
-            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "text": ("STRING", {"multiline": True, "dynamicPrompts": True}), "clip": ("CLIP", ),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
-
-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, ascore, width, height, text):
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeSDXLRefiner",
+            category="advanced/conditioning",
+            inputs=[
+                io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01),
+                io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.String.Input("text", multiline=True, dynamic_prompts=True),
+                io.Clip.Input("clip"),
+            ],
+            outputs=[io.Conditioning.Output()],
+        )
+
+    @classmethod
+    def execute(cls, clip, ascore, width, height, text) -> io.NodeOutput:
         tokens = clip.tokenize(text)
-        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}))
 
-class CLIPTextEncodeSDXL:
+class CLIPTextEncodeSDXL(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP", ),
-            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
-            "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
-            "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "text_g": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "text_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
-
-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l):
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeSDXL",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("crop_w", default=0, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("crop_h", default=0, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("target_width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.Int.Input("target_height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
+                io.String.Input("text_g", multiline=True, dynamic_prompts=True),
+                io.String.Input("text_l", multiline=True, dynamic_prompts=True),
+            ],
+            outputs=[io.Conditioning.Output()],
+        )
+
+    @classmethod
+    def execute(cls, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l) -> io.NodeOutput:
         tokens = clip.tokenize(text_g)
         tokens["l"] = clip.tokenize(text_l)["l"]
         if len(tokens["l"]) != len(tokens["g"]):
@@ -46,9 +55,17 @@ def encode(self, clip, width, height, crop_w, crop_h, target_width, target_heigh
                 tokens["l"] += empty["l"]
             while len(tokens["l"]) > len(tokens["g"]):
                 tokens["g"] += empty["g"]
-        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}))
+
+
+class ClipSdxlExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            CLIPTextEncodeSDXLRefiner,
+            CLIPTextEncodeSDXL,
+        ]
+
 
-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner,
-    "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL,
-}
+async def comfy_entrypoint() -> ClipSdxlExtension:
+    return ClipSdxlExtension()
@@ -1,6 +1,8 @@
 # Code based on https://github.com/WikiChao/FreSca (MIT License)
 import torch
 import torch.fft as fft
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
 
 
 def Fourier_filter(x, scale_low=1.0, scale_high=1.5, freq_cutoff=20):
@@ -51,25 +53,31 @@ def Fourier_filter(x, scale_low=1.0, scale_high=1.5, freq_cutoff=20):
     return x_filtered
 
 
-class FreSca:
+class FreSca(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "model": ("MODEL",),
-                "scale_low": ("FLOAT", {"default": 1.0, "min": 0, "max": 10, "step": 0.01,
-                                        "tooltip": "Scaling factor for low-frequency components"}),
-                "scale_high": ("FLOAT", {"default": 1.25, "min": 0, "max": 10, "step": 0.01,
-                                        "tooltip": "Scaling factor for high-frequency components"}),
-                "freq_cutoff": ("INT", {"default": 20, "min": 1, "max": 10000, "step": 1,
-                                        "tooltip": "Number of frequency indices around center to consider as low-frequency"}),
-            }
-        }
-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "patch"
-    CATEGORY = "_for_testing"
-    DESCRIPTION = "Applies frequency-dependent scaling to the guidance"
-    def patch(self, model, scale_low, scale_high, freq_cutoff):
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FreSca",
+            display_name="FreSca",
+            category="_for_testing",
+            description="Applies frequency-dependent scaling to the guidance",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("scale_low", default=1.0, min=0, max=10, step=0.01,
+                               tooltip="Scaling factor for low-frequency components"),
+                io.Float.Input("scale_high", default=1.25, min=0, max=10, step=0.01,
+                               tooltip="Scaling factor for high-frequency components"),
+                io.Int.Input("freq_cutoff", default=20, min=1, max=10000, step=1,
+                             tooltip="Number of frequency indices around center to consider as low-frequency"),
+            ],
+            outputs=[
+                io.Model.Output(),
+            ],
+            is_experimental=True,
+        )
+
+    @classmethod
+    def execute(cls, model, scale_low, scale_high, freq_cutoff):
         def custom_cfg_function(args):
             conds_out = args["conds_out"]
             if len(conds_out) <= 1 or None in args["conds"][:2]:
@@ -91,13 +99,16 @@ def custom_cfg_function(args):
         m = model.clone()
         m.set_model_sampler_pre_cfg_function(custom_cfg_function)
 
-        return (m,)
+        return io.NodeOutput(m)
+
 
+class FreScaExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            FreSca,
+        ]
 
-NODE_CLASS_MAPPINGS = {
-    "FreSca": FreSca,
-}
 
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "FreSca": "FreSca",
-}
+async def comfy_entrypoint() -> FreScaExtension:
+    return FreScaExtension()
@@ -12,35 +12,38 @@
 def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
     source = source.to(destination.device)
     if resize_source:
-        source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")
+        source = torch.nn.functional.interpolate(source, size=(destination.shape[-2], destination.shape[-1]), mode="bilinear")
 
     source = comfy.utils.repeat_to_batch_size(source, destination.shape[0])
 
-    x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier))
-    y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier))
+    x = max(-source.shape[-1] * multiplier, min(x, destination.shape[-1] * multiplier))
+    y = max(-source.shape[-2] * multiplier, min(y, destination.shape[-2] * multiplier))
 
     left, top = (x // multiplier, y // multiplier)
-    right, bottom = (left + source.shape[3], top + source.shape[2],)
+    right, bottom = (left + source.shape[-1], top + source.shape[-2],)
 
     if mask is None:
         mask = torch.ones_like(source)
     else:
         mask = mask.to(destination.device, copy=True)
-        mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear")
+        mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[-2], source.shape[-1]), mode="bilinear")
         mask = comfy.utils.repeat_to_batch_size(mask, source.shape[0])
 
     # calculate the bounds of the source that will be overlapping the destination
     # this prevents the source trying to overwrite latent pixels that are out of bounds
     # of the destination
-    visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
+    visible_width, visible_height = (destination.shape[-1] - left + min(0, x), destination.shape[-2] - top + min(0, y),)
 
     mask = mask[:, :, :visible_height, :visible_width]
+    if mask.ndim < source.ndim:
+        mask = mask.unsqueeze(1)
+
     inverse_mask = torch.ones_like(mask) - mask
 
-    source_portion = mask * source[:, :, :visible_height, :visible_width]
-    destination_portion = inverse_mask  * destination[:, :, top:bottom, left:right]
+    source_portion = mask * source[..., :visible_height, :visible_width]
+    destination_portion = inverse_mask  * destination[..., top:bottom, left:right]
 
-    destination[:, :, top:bottom, left:right] = source_portion + destination_portion
+    destination[..., top:bottom, left:right] = source_portion + destination_portion
     return destination
 
 class LatentCompositeMasked: