Skip to content

Commit 15b5814

Browse files
committed
optimize QwenPrompt2Image node
1 parent 9ab6875 commit 15b5814

File tree

4 files changed

+158
-85
lines changed

4 files changed

+158
-85
lines changed

py/Qwen_image2prompt.py

+12-77
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,11 @@
11
import os.path
22
from pathlib import Path
3-
from transformers import AutoModel, AutoProcessor, StoppingCriteria, StoppingCriteriaList
43
import torch
54
from PIL import Image
5+
import math
66
from torchvision.transforms import ToPILImage
7-
from huggingface_hub import snapshot_download
87
import folder_paths
9-
10-
files_for_uform_gen2_qwen = Path(os.path.join(folder_paths.models_dir, "LLavacheckpoints", "files_for_uform_gen2_qwen"))
11-
class StopOnTokens(StoppingCriteria):
12-
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
13-
stop_ids = [151645] # Define stop tokens as per your model's specifics
14-
for stop_id in stop_ids:
15-
if input_ids[0][-1] == stop_id:
16-
return True
17-
return False
18-
19-
class UformGen2QwenChat:
20-
def __init__(self):
21-
# self.model_path = snapshot_download("unum-cloud/uform-gen2-qwen-500m",
22-
# local_dir=files_for_uform_gen2_qwen,
23-
# force_download=False, # Set to True if you always want to download, regardless of local copy
24-
# local_files_only=False, # Set to False to allow downloading if not available locally
25-
# local_dir_use_symlinks="auto") # or set to True/False based on your symlink preference
26-
self.model_path = files_for_uform_gen2_qwen
27-
print("Model path:", self.model_path)
28-
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
29-
self.model = AutoModel.from_pretrained(self.model_path, trust_remote_code=True).to(self.device)
30-
self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
31-
32-
def chat_response(self, message, history, image_path):
33-
stop = StopOnTokens()
34-
messages = [{"role": "system", "content": "You are a helpful Assistant."}]
35-
36-
for user_msg, assistant_msg in history:
37-
messages.append({"role": "user", "content": user_msg})
38-
messages.append({"role": "assistant", "content": assistant_msg})
39-
40-
if len(messages) == 1:
41-
message = f" <image>{message}"
42-
43-
messages.append({"role": "user", "content": message})
44-
45-
model_inputs = self.processor.tokenizer.apply_chat_template(
46-
messages,
47-
add_generation_prompt=True,
48-
return_tensors="pt"
49-
)
50-
51-
image = Image.open(image_path) # Load image using PIL
52-
image_tensor = (
53-
self.processor.feature_extractor(image)
54-
.unsqueeze(0)
55-
)
56-
57-
attention_mask = torch.ones(
58-
1, model_inputs.shape[1] + self.processor.num_image_latents - 1
59-
)
60-
61-
model_inputs = {
62-
"input_ids": model_inputs,
63-
"images": image_tensor,
64-
"attention_mask": attention_mask
65-
}
66-
67-
model_inputs = {k: v.to(self.device) for k, v in model_inputs.items()}
68-
69-
output = self.model.generate(
70-
**model_inputs,
71-
max_new_tokens=1024,
72-
stopping_criteria=StoppingCriteriaList([stop])
73-
)
74-
75-
response_text = self.processor.tokenizer.decode(output[0], skip_special_tokens=True)
76-
return response_text
8+
from .imagefunc import files_for_uform_gen2_qwen, StopOnTokens, UformGen2QwenChat, clear_memory
779

7810
# Example of integrating UformGen2QwenChat into a node-like structure
7911
class QWenImage2Prompt:
@@ -96,19 +28,22 @@ def uform_gen2_qwen_chat(self, image, question):
9628
chat_model = UformGen2QwenChat()
9729
history = [] # Example empty history
9830
pil_image = ToPILImage()(image[0].permute(2, 0, 1))
31+
width, height = pil_image.size
32+
ratio = width / height
33+
if width * height > 1024 * 1024:
34+
target_width = math.sqrt(ratio * 1024 * 1024)
35+
target_height = target_width / ratio
36+
target_width = int(target_width)
37+
target_height = int(target_height)
38+
pil_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
9939
temp_path = files_for_uform_gen2_qwen / "temp.png"
10040
pil_image.save(temp_path)
101-
41+
question = f"{question} but output no more then 80 words."
10242
response = chat_model.chat_response(question, history, temp_path)
10343

10444
# Cleanup
10545
del chat_model
106-
import gc
107-
gc.collect()
108-
if torch.cuda.is_available():
109-
torch.cuda.empty_cache()
110-
torch.cuda.ipc_collect()
111-
46+
clear_memory()
11247
return (response.split("assistant\n", 1)[1], )
11348

11449
NODE_CLASS_MAPPINGS = {

py/imagefunc.py

+143-1
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@
3030
from skimage import img_as_float, img_as_ubyte
3131
import torchvision.transforms.functional as TF
3232
import torch.nn.functional as F
33+
from transformers import AutoModel, AutoProcessor, StoppingCriteria, StoppingCriteriaList
3334
import colorsys
3435
from typing import Union
3536
import folder_paths
3637
from .briarmbg import BriaRMBG
3738
from .filmgrainer import processing as processing_utils
3839
from .filmgrainer import filmgrainer as filmgrainer
3940
import wget
41+
import gc
4042

4143
from .blendmodes import *
4244

@@ -1972,6 +1974,13 @@ def extract_all_numbers_from_str(string, checkint:bool=False):
19721974

19731975
return number_list
19741976

1977+
def clear_memory():
1978+
# Cleanup
1979+
gc.collect()
1980+
if torch.cuda.is_available():
1981+
torch.cuda.empty_cache()
1982+
torch.cuda.ipc_collect()
1983+
19751984
def tensor_info(tensor:object) -> str:
19761985
value = ''
19771986
if isinstance(tensor, torch.Tensor):
@@ -1986,6 +1995,91 @@ def tensor_info(tensor:object) -> str:
19861995
value = f"tensor_info: Not tensor, type is {type(tensor)}"
19871996
return value
19881997

1998+
# 去除重复的句子
1999+
def remove_duplicate_string(text:str) -> str:
2000+
sentences = re.split(r'(?<=[:;,.!?])\s+', text)
2001+
unique_sentences = []
2002+
seen = set()
2003+
for sentence in sentences:
2004+
if sentence not in seen:
2005+
seen.add(sentence)
2006+
unique_sentences.append(sentence)
2007+
return ' '.join(unique_sentences)
2008+
2009+
files_for_uform_gen2_qwen = Path(os.path.join(folder_paths.models_dir, "LLavacheckpoints", "files_for_uform_gen2_qwen"))
2010+
class StopOnTokens(StoppingCriteria):
2011+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
2012+
stop_ids = [151645] # Define stop tokens as per your model's specifics
2013+
for stop_id in stop_ids:
2014+
if input_ids[0][-1] == stop_id:
2015+
return True
2016+
return False
2017+
2018+
class UformGen2QwenChat:
2019+
def __init__(self):
2020+
from huggingface_hub import snapshot_download
2021+
# self.model_path = snapshot_download("unum-cloud/uform-gen2-qwen-500m",
2022+
# local_dir=files_for_uform_gen2_qwen,
2023+
# force_download=False, # Set to True if you always want to download, regardless of local copy
2024+
# local_files_only=False, # Set to False to allow downloading if not available locally
2025+
# local_dir_use_symlinks="auto") # or set to True/False based on your symlink preference
2026+
self.model_path = files_for_uform_gen2_qwen
2027+
print("Model path:", self.model_path)
2028+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
2029+
self.model = AutoModel.from_pretrained(self.model_path, trust_remote_code=True).to(self.device)
2030+
self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
2031+
2032+
def chat_response(self, message, history, image_path):
2033+
stop = StopOnTokens()
2034+
messages = [{"role": "system", "content": "You are a helpful Assistant."}]
2035+
2036+
for user_msg, assistant_msg in history:
2037+
messages.append({"role": "user", "content": user_msg})
2038+
messages.append({"role": "assistant", "content": assistant_msg})
2039+
2040+
if len(messages) == 1:
2041+
message = f" <image>{message}"
2042+
2043+
messages.append({"role": "user", "content": message})
2044+
2045+
model_inputs = self.processor.tokenizer.apply_chat_template(
2046+
messages,
2047+
add_generation_prompt=True,
2048+
return_tensors="pt"
2049+
)
2050+
2051+
image = Image.open(image_path) # Load image using PIL
2052+
image_tensor = (
2053+
self.processor.feature_extractor(image)
2054+
.unsqueeze(0)
2055+
)
2056+
2057+
attention_mask = torch.ones(
2058+
1, model_inputs.shape[1] + self.processor.num_image_latents - 1
2059+
)
2060+
2061+
model_inputs = {
2062+
"input_ids": model_inputs,
2063+
"images": image_tensor,
2064+
"attention_mask": attention_mask
2065+
}
2066+
2067+
model_inputs = {k: v.to(self.device) for k, v in model_inputs.items()}
2068+
2069+
with torch.inference_mode():
2070+
output = self.model.generate(
2071+
**model_inputs,
2072+
max_new_tokens=512,
2073+
do_sample=True,
2074+
temperature=0.7,
2075+
repetition_penalty=1.2,
2076+
stopping_criteria=StoppingCriteriaList([stop])
2077+
)
2078+
2079+
response_text = self.processor.tokenizer.decode(output[0], skip_special_tokens=True)
2080+
response_text = remove_duplicate_string(response_text)
2081+
return response_text
2082+
19892083
'''CLASS'''
19902084

19912085
class AnyType(str):
@@ -2145,4 +2239,52 @@ def get_api_key(api_name:str) -> str:
21452239
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
21462240
"threshold": "BLOCK_NONE"
21472241
}
2148-
]
2242+
]
2243+
2244+
minicpm_llama3_v25_prompts = """
2245+
# MISSION
2246+
You are an imagine generator for a slide deck tool. You will be given the text or description of a slide and you'll generate a few image descriptions that will be fed to an AI image generator. It will need to have a particular format (seen below). You will also be given some examples below. Think metaphorically and symbolically.
2247+
2248+
# FORMAT
2249+
The format should follow this general pattern:
2250+
2251+
<MAIN SUBJECT>, <DESCRIPTION OF MAIN SUBJECT>, <BACKGROUND OR CONTEXT, LOCATION, ETC>, <STYLE, GENRE, MOTIF, ETC>, <COLOR SCHEME>, <CAMERA DETAILS>
2252+
2253+
It's not strictly required, as you'll see below, you can pick and choose various aspects, but this is the general order of operations
2254+
2255+
# EXAMPLES
2256+
2257+
a Shakespeare stage play, yellow mist, atmospheric, set design by Michel Crête, Aerial acrobatics design by André Simard, hyperrealistic, 4K, Octane render, unreal engine
2258+
2259+
The Moon Knight dissolving into swirling sand, volumetric dust, cinematic lighting, close up portrait
2260+
2261+
ethereal Bohemian Waxwing bird, Bombycilla garrulus :: intricate details, ornate, detailed illustration, octane render :: Johanna Rupprecht style, William Morris style :: trending on artstation
2262+
2263+
steampunk cat, octane render, hyper realistic
2264+
2265+
Hyper detailed movie still that fuses the iconic tea party scene from Alice in Wonderland showing the hatter and an adult alice. a wooden table is filled with teacups and cannabis plants. The scene is surrounded by flying weed. Some playcards flying around in the air. Captured with a Hasselblad medium format camera
2266+
2267+
venice in a carnival picture 3, in the style of fantastical compositions, colorful, eye-catching compositions, symmetrical arrangements, navy and aquamarine, distinctive noses, gothic references, spiral group –style expressive
2268+
2269+
Beautiful and terrifying Egyptian mummy, flirting and vamping with the viewer, rotting and decaying climbing out of a sarcophagus lunging at the viewer, symmetrical full body Portrait photo, elegant, highly detailed, soft ambient lighting, rule of thirds, professional photo HD Photography, film, sony, portray, kodak Polaroid 3200dpi scan medium format film Portra 800, vibrantly colored portrait photo by Joel – Peter Witkin + Diane Arbus + Rhiannon + Mike Tang, fashion shoot
2270+
2271+
A grandmotherly Fate sits on a cozy cosmic throne knitting with mirrored threads of time, the solar system spins like clockwork behind her as she knits the futures of people together like an endless collage of destiny, maximilism, cinematic quality, sharp – focus, intricate details
2272+
2273+
A cloud with several airplanes flying around on top, in the style of detailed fantasy art, nightcore, quiet moments captured in paint, radiant clusters, i cant believe how beautiful this is, detailed character design, dark cyan and light crimson
2274+
2275+
An incredibly detailed close up macro beauty photo of an Asian model, hands holding a bouquet of pink roses, surrounded by scary crows from hell. Shot on a Hasselblad medium format camera with a 100mm lens. Unmistakable to a photograph. Cinematic lighting. Photographed by Tim Walker, trending on 500px
2276+
2277+
Game-Art | An island with different geographical properties and multiple small cities floating in space ::10 Island | Floating island in space – waterfalls over the edge of the island falling into space – island fragments floating around the edge of the island, Mountain Ranges – Deserts – Snowy Landscapes – Small Villages – one larger city ::8 Environment | Galaxy – in deep space – other universes can be seen in the distance ::2 Style | Unreal Engine 5 – 8K UHD – Highly Detailed – Game-Art
2278+
2279+
a warrior sitting on a giant creature and riding it in the water, with wings spread wide in the water, camera positioned just above the water to capture this beautiful scene, surface showing intricate details of the creature’s scales, fins, and wings, majesty, Hero rides on the creature in the water, digitally enhanced, enhanced graphics, straight, sharp focus, bright lighting, closeup, cinematic, Bronze, Azure, blue, ultra highly detailed, 18k, sharp focus, bright photo with rich colors, full coverage of a scene, straight view shot
2280+
2281+
A real photographic landscape painting with incomparable reality,Super wide,Ominous sky,Sailing boat,Wooden boat,Lotus,Huge waves,Starry night,Harry potter,Volumetric lighting,Clearing,Realistic,James gurney,artstation
2282+
2283+
Tiger monster with monstera plant over him, back alley in Bangkok, art by Otomo Katsuhiro crossover Yayoi Kusama and Hayao Miyazaki
2284+
2285+
An elderly Italian woman with wrinkles, sitting in a local cafe filled with plants and wood decorations, looking out the window, wearing a white top with light purple linen blazer, natural afternoon light shining through the window
2286+
2287+
# OUTPUT
2288+
Your output should just be an plain list of descriptions. No numbers, no extraneous labels, no hyphens.
2289+
Create only one prompt.
2290+
"""

py/purge_vram.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import comfy.model_management as mm
2-
from .imagefunc import AnyType
2+
from .imagefunc import AnyType, clear_memory
33

44
any = AnyType("*")
55
NODE_NAME = 'PurgeVRAM'
@@ -29,11 +29,7 @@ def purge_vram(self, anything, purge_cache, purge_models):
2929
import torch.cuda
3030
import gc
3131
import comfy.model_management
32-
gc.collect()
33-
if purge_cache:
34-
if torch.cuda.is_available():
35-
torch.cuda.empty_cache()
36-
torch.cuda.ipc_collect()
32+
clear_memory()
3733
if purge_models:
3834
comfy.model_management.unload_all_models()
3935
comfy.model_management.soft_empty_cache()

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "comfyui_layerstyle"
33
description = "A set of nodes for ComfyUI it generate image like Adobe Photoshop's Layer Style. the Drop Shadow is first completed node, and follow-up work is in progress."
4-
version = "1.0.15"
4+
version = "1.0.16"
55
license = "MIT"
66
dependencies = ["numpy", "pillow", "torch", "matplotlib", "Scipy", "scikit_image", "opencv-contrib-python", "pymatting", "segment_anything", "timm", "addict", "yapf", "colour-science", "wget", "mediapipe", "loguru", "typer_config", "fastapi", "rich", "google-generativeai", "diffusers", "omegaconf", "tqdm", "transformers", "kornia", "image-reward", "ultralytics", "blend_modes", "blind-watermark", "qrcode", "pyzbar", "psd-tools"]
77

0 commit comments

Comments
 (0)