Skip to content

Commit 9f4c0f3

Browse files
committed
Merge branch 'master' into asset-management
2 parents ca39552 + 196954a commit 9f4c0f3

File tree

13 files changed

+407
-359
lines changed

13 files changed

+407
-359
lines changed

.github/workflows/test-unit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
test:
1111
strategy:
1212
matrix:
13-
os: [ubuntu-latest, windows-latest, macos-latest]
13+
os: [ubuntu-latest, windows-2022, macos-latest]
1414
runs-on: ${{ matrix.os }}
1515
continue-on-error: true
1616
steps:

CODEOWNERS

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,3 @@
11
# Admins
22
* @comfyanonymous
3-
4-
# Note: Github teams syntax cannot be used here as the repo is not owned by Comfy-Org.
5-
# Inlined the team members for now.
6-
7-
# Maintainers
8-
*.md @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
9-
/tests/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
10-
/tests-unit/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
11-
/notebooks/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
12-
/script_examples/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
13-
/.github/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
14-
/requirements.txt @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
15-
/pyproject.toml @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @Kosinkadink @christian-byrne @guill
16-
17-
# Python web server
18-
/api_server/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
19-
/app/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
20-
/utils/ @yoland68 @robinjhuang @webfiltered @pythongosssss @ltdrdata @christian-byrne @guill
21-
22-
# Node developers
23-
/comfy_extras/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
24-
/comfy/comfy_types/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
25-
/comfy_api_nodes/ @yoland68 @robinjhuang @pythongosssss @ltdrdata @Kosinkadink @webfiltered @christian-byrne @guill
3+
* @kosinkadink

comfy/samplers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
360360
def cfg_function(model, cond_pred, uncond_pred, cond_scale, x, timestep, model_options={}, cond=None, uncond=None):
361361
if "sampler_cfg_function" in model_options:
362362
args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep,
363-
"cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options}
363+
"cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options, "input_cond": cond, "input_uncond": uncond}
364364
cfg_result = x - model_options["sampler_cfg_function"](args)
365365
else:
366366
cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale
@@ -390,7 +390,7 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option
390390
for fn in model_options.get("sampler_pre_cfg_function", []):
391391
args = {"conds":conds, "conds_out": out, "cond_scale": cond_scale, "timestep": timestep,
392392
"input": x, "sigma": timestep, "model": model, "model_options": model_options}
393-
out = fn(args)
393+
out = fn(args)
394394

395395
return cfg_function(model, out[0], out[1], cond_scale, x, timestep, model_options=model_options, cond=cond, uncond=uncond_)
396396

comfy/text_encoders/hunyuan_image.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,13 @@ def __init__(self, byt5=True, device="cpu", dtype=None, model_options={}):
6363
self.byt5_small = None
6464

6565
def encode_token_weights(self, token_weight_pairs):
66-
cond, p, extra = super().encode_token_weights(token_weight_pairs)
66+
tok_pairs = token_weight_pairs["qwen25_7b"][0]
67+
template_end = -1
68+
if tok_pairs[0][0] == 27:
69+
if len(tok_pairs) > 36: # refiner prompt uses a fixed 36 template_end
70+
template_end = 36
71+
72+
cond, p, extra = super().encode_token_weights(token_weight_pairs, template_end=template_end)
6773
if self.byt5_small is not None and "byt5" in token_weight_pairs:
6874
out = self.byt5_small.encode_token_weights(token_weight_pairs["byt5"])
6975
extra["conditioning_byt5small"] = out[0]

comfy/text_encoders/qwen_image.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,22 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
1818
self.llama_template_images = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
1919

2020
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], **kwargs):
21-
if llama_template is None:
22-
if len(images) > 0:
23-
llama_text = self.llama_template_images.format(text)
24-
else:
25-
llama_text = self.llama_template.format(text)
21+
skip_template = False
22+
if text.startswith('<|im_start|>'):
23+
skip_template = True
24+
if text.startswith('<|start_header_id|>'):
25+
skip_template = True
26+
27+
if skip_template:
28+
llama_text = text
2629
else:
27-
llama_text = llama_template.format(text)
30+
if llama_template is None:
31+
if len(images) > 0:
32+
llama_text = self.llama_template_images.format(text)
33+
else:
34+
llama_text = self.llama_template.format(text)
35+
else:
36+
llama_text = llama_template.format(text)
2837
tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
2938
key_name = next(iter(tokens))
3039
embed_count = 0
@@ -47,22 +56,23 @@ class QwenImageTEModel(sd1_clip.SD1ClipModel):
4756
def __init__(self, device="cpu", dtype=None, model_options={}):
4857
super().__init__(device=device, dtype=dtype, name="qwen25_7b", clip_model=Qwen25_7BVLIModel, model_options=model_options)
4958

50-
def encode_token_weights(self, token_weight_pairs):
59+
def encode_token_weights(self, token_weight_pairs, template_end=-1):
5160
out, pooled, extra = super().encode_token_weights(token_weight_pairs)
5261
tok_pairs = token_weight_pairs["qwen25_7b"][0]
5362
count_im_start = 0
54-
for i, v in enumerate(tok_pairs):
55-
elem = v[0]
56-
if not torch.is_tensor(elem):
57-
if isinstance(elem, numbers.Integral):
58-
if elem == 151644 and count_im_start < 2:
59-
template_end = i
60-
count_im_start += 1
61-
62-
if out.shape[1] > (template_end + 3):
63-
if tok_pairs[template_end + 1][0] == 872:
64-
if tok_pairs[template_end + 2][0] == 198:
65-
template_end += 3
63+
if template_end == -1:
64+
for i, v in enumerate(tok_pairs):
65+
elem = v[0]
66+
if not torch.is_tensor(elem):
67+
if isinstance(elem, numbers.Integral):
68+
if elem == 151644 and count_im_start < 2:
69+
template_end = i
70+
count_im_start += 1
71+
72+
if out.shape[1] > (template_end + 3):
73+
if tok_pairs[template_end + 1][0] == 872:
74+
if tok_pairs[template_end + 2][0] == 198:
75+
template_end += 3
6676

6777
out = out[:, template_end:]
6878

comfy_extras/nodes_clip_sdxl.py

Lines changed: 57 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,52 @@
1-
from nodes import MAX_RESOLUTION
1+
from typing_extensions import override
22

3-
class CLIPTextEncodeSDXLRefiner:
3+
import nodes
4+
from comfy_api.latest import ComfyExtension, io
5+
6+
7+
class CLIPTextEncodeSDXLRefiner(io.ComfyNode):
48
@classmethod
5-
def INPUT_TYPES(s):
6-
return {"required": {
7-
"ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
8-
"width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
9-
"height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
10-
"text": ("STRING", {"multiline": True, "dynamicPrompts": True}), "clip": ("CLIP", ),
11-
}}
12-
RETURN_TYPES = ("CONDITIONING",)
13-
FUNCTION = "encode"
14-
15-
CATEGORY = "advanced/conditioning"
16-
17-
def encode(self, clip, ascore, width, height, text):
9+
def define_schema(cls):
10+
return io.Schema(
11+
node_id="CLIPTextEncodeSDXLRefiner",
12+
category="advanced/conditioning",
13+
inputs=[
14+
io.Float.Input("ascore", default=6.0, min=0.0, max=1000.0, step=0.01),
15+
io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
16+
io.Int.Input("height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
17+
io.String.Input("text", multiline=True, dynamic_prompts=True),
18+
io.Clip.Input("clip"),
19+
],
20+
outputs=[io.Conditioning.Output()],
21+
)
22+
23+
@classmethod
24+
def execute(cls, clip, ascore, width, height, text) -> io.NodeOutput:
1825
tokens = clip.tokenize(text)
19-
return (clip.encode_from_tokens_scheduled(tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}), )
26+
return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"aesthetic_score": ascore, "width": width, "height": height}))
2027

21-
class CLIPTextEncodeSDXL:
28+
class CLIPTextEncodeSDXL(io.ComfyNode):
2229
@classmethod
23-
def INPUT_TYPES(s):
24-
return {"required": {
25-
"clip": ("CLIP", ),
26-
"width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
27-
"height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
28-
"crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
29-
"crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
30-
"target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
31-
"target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
32-
"text_g": ("STRING", {"multiline": True, "dynamicPrompts": True}),
33-
"text_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
34-
}}
35-
RETURN_TYPES = ("CONDITIONING",)
36-
FUNCTION = "encode"
37-
38-
CATEGORY = "advanced/conditioning"
39-
40-
def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l):
30+
def define_schema(cls):
31+
return io.Schema(
32+
node_id="CLIPTextEncodeSDXL",
33+
category="advanced/conditioning",
34+
inputs=[
35+
io.Clip.Input("clip"),
36+
io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
37+
io.Int.Input("height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
38+
io.Int.Input("crop_w", default=0, min=0, max=nodes.MAX_RESOLUTION),
39+
io.Int.Input("crop_h", default=0, min=0, max=nodes.MAX_RESOLUTION),
40+
io.Int.Input("target_width", default=1024, min=0, max=nodes.MAX_RESOLUTION),
41+
io.Int.Input("target_height", default=1024, min=0, max=nodes.MAX_RESOLUTION),
42+
io.String.Input("text_g", multiline=True, dynamic_prompts=True),
43+
io.String.Input("text_l", multiline=True, dynamic_prompts=True),
44+
],
45+
outputs=[io.Conditioning.Output()],
46+
)
47+
48+
@classmethod
49+
def execute(cls, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l) -> io.NodeOutput:
4150
tokens = clip.tokenize(text_g)
4251
tokens["l"] = clip.tokenize(text_l)["l"]
4352
if len(tokens["l"]) != len(tokens["g"]):
@@ -46,9 +55,17 @@ def encode(self, clip, width, height, crop_w, crop_h, target_width, target_heigh
4655
tokens["l"] += empty["l"]
4756
while len(tokens["l"]) > len(tokens["g"]):
4857
tokens["g"] += empty["g"]
49-
return (clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}), )
58+
return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}))
59+
60+
61+
class ClipSdxlExtension(ComfyExtension):
62+
@override
63+
async def get_node_list(self) -> list[type[io.ComfyNode]]:
64+
return [
65+
CLIPTextEncodeSDXLRefiner,
66+
CLIPTextEncodeSDXL,
67+
]
68+
5069

51-
NODE_CLASS_MAPPINGS = {
52-
"CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner,
53-
"CLIPTextEncodeSDXL": CLIPTextEncodeSDXL,
54-
}
70+
async def comfy_entrypoint() -> ClipSdxlExtension:
71+
return ClipSdxlExtension()

comfy_extras/nodes_fresca.py

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Code based on https://github.com/WikiChao/FreSca (MIT License)
22
import torch
33
import torch.fft as fft
4+
from typing_extensions import override
5+
from comfy_api.latest import ComfyExtension, io
46

57

68
def Fourier_filter(x, scale_low=1.0, scale_high=1.5, freq_cutoff=20):
@@ -51,25 +53,31 @@ def Fourier_filter(x, scale_low=1.0, scale_high=1.5, freq_cutoff=20):
5153
return x_filtered
5254

5355

54-
class FreSca:
56+
class FreSca(io.ComfyNode):
5557
@classmethod
56-
def INPUT_TYPES(s):
57-
return {
58-
"required": {
59-
"model": ("MODEL",),
60-
"scale_low": ("FLOAT", {"default": 1.0, "min": 0, "max": 10, "step": 0.01,
61-
"tooltip": "Scaling factor for low-frequency components"}),
62-
"scale_high": ("FLOAT", {"default": 1.25, "min": 0, "max": 10, "step": 0.01,
63-
"tooltip": "Scaling factor for high-frequency components"}),
64-
"freq_cutoff": ("INT", {"default": 20, "min": 1, "max": 10000, "step": 1,
65-
"tooltip": "Number of frequency indices around center to consider as low-frequency"}),
66-
}
67-
}
68-
RETURN_TYPES = ("MODEL",)
69-
FUNCTION = "patch"
70-
CATEGORY = "_for_testing"
71-
DESCRIPTION = "Applies frequency-dependent scaling to the guidance"
72-
def patch(self, model, scale_low, scale_high, freq_cutoff):
58+
def define_schema(cls):
59+
return io.Schema(
60+
node_id="FreSca",
61+
display_name="FreSca",
62+
category="_for_testing",
63+
description="Applies frequency-dependent scaling to the guidance",
64+
inputs=[
65+
io.Model.Input("model"),
66+
io.Float.Input("scale_low", default=1.0, min=0, max=10, step=0.01,
67+
tooltip="Scaling factor for low-frequency components"),
68+
io.Float.Input("scale_high", default=1.25, min=0, max=10, step=0.01,
69+
tooltip="Scaling factor for high-frequency components"),
70+
io.Int.Input("freq_cutoff", default=20, min=1, max=10000, step=1,
71+
tooltip="Number of frequency indices around center to consider as low-frequency"),
72+
],
73+
outputs=[
74+
io.Model.Output(),
75+
],
76+
is_experimental=True,
77+
)
78+
79+
@classmethod
80+
def execute(cls, model, scale_low, scale_high, freq_cutoff):
7381
def custom_cfg_function(args):
7482
conds_out = args["conds_out"]
7583
if len(conds_out) <= 1 or None in args["conds"][:2]:
@@ -91,13 +99,16 @@ def custom_cfg_function(args):
9199
m = model.clone()
92100
m.set_model_sampler_pre_cfg_function(custom_cfg_function)
93101

94-
return (m,)
102+
return io.NodeOutput(m)
103+
95104

105+
class FreScaExtension(ComfyExtension):
106+
@override
107+
async def get_node_list(self) -> list[type[io.ComfyNode]]:
108+
return [
109+
FreSca,
110+
]
96111

97-
NODE_CLASS_MAPPINGS = {
98-
"FreSca": FreSca,
99-
}
100112

101-
NODE_DISPLAY_NAME_MAPPINGS = {
102-
"FreSca": "FreSca",
103-
}
113+
async def comfy_entrypoint() -> FreScaExtension:
114+
return FreScaExtension()

comfy_extras/nodes_mask.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,35 +12,38 @@
1212
def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
1313
source = source.to(destination.device)
1414
if resize_source:
15-
source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")
15+
source = torch.nn.functional.interpolate(source, size=(destination.shape[-2], destination.shape[-1]), mode="bilinear")
1616

1717
source = comfy.utils.repeat_to_batch_size(source, destination.shape[0])
1818

19-
x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier))
20-
y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier))
19+
x = max(-source.shape[-1] * multiplier, min(x, destination.shape[-1] * multiplier))
20+
y = max(-source.shape[-2] * multiplier, min(y, destination.shape[-2] * multiplier))
2121

2222
left, top = (x // multiplier, y // multiplier)
23-
right, bottom = (left + source.shape[3], top + source.shape[2],)
23+
right, bottom = (left + source.shape[-1], top + source.shape[-2],)
2424

2525
if mask is None:
2626
mask = torch.ones_like(source)
2727
else:
2828
mask = mask.to(destination.device, copy=True)
29-
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear")
29+
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[-2], source.shape[-1]), mode="bilinear")
3030
mask = comfy.utils.repeat_to_batch_size(mask, source.shape[0])
3131

3232
# calculate the bounds of the source that will be overlapping the destination
3333
# this prevents the source trying to overwrite latent pixels that are out of bounds
3434
# of the destination
35-
visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
35+
visible_width, visible_height = (destination.shape[-1] - left + min(0, x), destination.shape[-2] - top + min(0, y),)
3636

3737
mask = mask[:, :, :visible_height, :visible_width]
38+
if mask.ndim < source.ndim:
39+
mask = mask.unsqueeze(1)
40+
3841
inverse_mask = torch.ones_like(mask) - mask
3942

40-
source_portion = mask * source[:, :, :visible_height, :visible_width]
41-
destination_portion = inverse_mask * destination[:, :, top:bottom, left:right]
43+
source_portion = mask * source[..., :visible_height, :visible_width]
44+
destination_portion = inverse_mask * destination[..., top:bottom, left:right]
4245

43-
destination[:, :, top:bottom, left:right] = source_portion + destination_portion
46+
destination[..., top:bottom, left:right] = source_portion + destination_portion
4447
return destination
4548

4649
class LatentCompositeMasked:

0 commit comments

Comments
 (0)