Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #111: add helpful generation metadata #123

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ This will launch gradio on port 7860 with txt2img. You can also use `docker comp

- Should generally be a multiple of 2x(n_samples)

## `--skip_metadata`

**Whether to embed generation metadata (prompt, seed, size, etc.) in the generated file.**

- Uses the [XMP](https://en.wikipedia.org/wiki/Extensible_Metadata_Platform) Description field to embed most of the command-line parameters that were used to generate the image. Excludes potentially privacy-sensitive parameters such as `outdir`. Enabled by default.

<h1 align="center">Weighted Prompts</h1>

- Prompts can also be weighted to put relative emphasis on certain words.
Expand Down
1 change: 1 addition & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies:
- transformers==4.19.2
- torchmetrics==0.6.0
- kornia==0.6
- -e git+https://github.com/sowbug/tinyxmp.git@master#egg=tinyxmp
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
- -e git+https://github.com/openai/CLIP.git@main#egg=clip
- -e .
46 changes: 43 additions & 3 deletions optimizedSD/optimized_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from optimUtils import split_weighted_subprompts, logger
from transformers import logging
import pandas as pd
import tinyxmp
import xml.dom.minidom
import json
logging.set_verbosity_error()


Expand Down Expand Up @@ -53,6 +56,38 @@ def load_img(path, h0, w0):
return 2.0 * image - 1.0


def add_metadata(filename, opt):
if opt.skip_metadata:
return

SKIP_OPT_KEYS = ['outdir', 'init_img', 'from_file']
safe_opts = {}
for k, v in vars(opt).items():
if k in SKIP_OPT_KEYS:
continue
safe_opts[k] = v
metadata = json.dumps(safe_opts)

xmp_file = tinyxmp.Metadata.load(filename)
# Since we just generated this file, we know there's no meaningful XMP data in it.
# So we create an empty template.
xmp = '''
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:description>
<rdf:Seq><rdf:li></rdf:li>
</rdf:Seq>
</dc:description>
</rdf:Description>
</rdf:RDF>'''

doc = xml.dom.minidom.parseString(xmp)
e = doc.getElementsByTagName("rdf:li")[0]
textnode = doc.createTextNode(metadata)
e.appendChild(textnode)
xmp_file.write_xmp(doc.childNodes[0].toxml().encode("utf-8"))


config = "optimizedSD/v1-inference.yaml"
ckpt = "models/ldm/stable-diffusion-v1/model.ckpt"

Expand Down Expand Up @@ -174,6 +209,11 @@ def load_img(path, h0, w0):
choices=["ddim"],
default="ddim",
)
parser.add_argument(
"--skip_metadata",
action='store_true',
help="do not add generation metadata to image file.",
)
opt = parser.parse_args()

tic = time.time()
Expand Down Expand Up @@ -332,9 +372,9 @@ def load_img(path, h0, w0):
x_samples_ddim = modelFS.decode_first_stage(samples_ddim[i].unsqueeze(0))
x_sample = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
x_sample = 255.0 * rearrange(x_sample[0].cpu().numpy(), "c h w -> h w c")
Image.fromarray(x_sample.astype(np.uint8)).save(
os.path.join(sample_path, "seed_" + str(opt.seed) + "_" + f"{base_count:05}.{opt.format}")
)
filename = os.path.join(sample_path, "seed_" + str(opt.seed) + "_" + f"{base_count:05}.{opt.format}")
Image.fromarray(x_sample.astype(np.uint8)).save(filename)
add_metadata(filename, opt)
seeds += str(opt.seed) + ","
opt.seed += 1
base_count += 1
Expand Down
47 changes: 44 additions & 3 deletions optimizedSD/optimized_txt2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
from ldm.util import instantiate_from_config
from optimUtils import split_weighted_subprompts, logger
from transformers import logging
import tinyxmp
import xml.dom.minidom
import json

# from samplers import CompVisDenoiser
logging.set_verbosity_error()

Expand All @@ -33,6 +37,38 @@ def load_model_from_config(ckpt, verbose=False):
return sd


def add_metadata(filename, opt):
if opt.skip_metadata:
return

SKIP_OPT_KEYS = ['outdir']
safe_opts = {}
for k, v in vars(opt).items():
if k in SKIP_OPT_KEYS:
continue
safe_opts[k] = v
metadata = json.dumps(safe_opts)

xmp_file = tinyxmp.Metadata.load(filename)
# Since we just generated this file, we know there's no meaningful XMP data in it.
# So we create an empty template.
xmp = '''
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:description>
<rdf:Seq><rdf:li></rdf:li>
</rdf:Seq>
</dc:description>
</rdf:Description>
</rdf:RDF>'''

doc = xml.dom.minidom.parseString(xmp)
e = doc.getElementsByTagName("rdf:li")[0]
textnode = doc.createTextNode(metadata)
e.appendChild(textnode)
xmp_file.write_xmp(doc.childNodes[0].toxml().encode("utf-8"))


config = "optimizedSD/v1-inference.yaml"
ckpt = "models/ldm/stable-diffusion-v1/model.ckpt"

Expand Down Expand Up @@ -167,6 +203,11 @@ def load_model_from_config(ckpt, verbose=False):
choices=["ddim", "plms"],
default="plms",
)
parser.add_argument(
"--skip_metadata",
action='store_true',
help="do not add generation metadata to image file.",
)
opt = parser.parse_args()

tic = time.time()
Expand Down Expand Up @@ -309,9 +350,9 @@ def load_model_from_config(ckpt, verbose=False):
x_samples_ddim = modelFS.decode_first_stage(samples_ddim[i].unsqueeze(0))
x_sample = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
x_sample = 255.0 * rearrange(x_sample[0].cpu().numpy(), "c h w -> h w c")
Image.fromarray(x_sample.astype(np.uint8)).save(
os.path.join(sample_path, "seed_" + str(opt.seed) + "_" + f"{base_count:05}.{opt.format}")
)
filename = os.path.join(sample_path, "seed_" + str(opt.seed) + "_" + f"{base_count:05}.{opt.format}")
Image.fromarray(x_sample.astype(np.uint8)).save(filename)
add_metadata(filename, opt)
seeds += str(opt.seed) + ","
opt.seed += 1
base_count += 1
Expand Down