improvements to gen_images/gen_cond_samplesheet. Added multi-modal truncation'

xl-sr · xl-sr · commit c0d27b1f9987 · 2022-04-04T18:25:51.000+02:00
diff --git a/README.md b/README.md
@@ -102,7 +102,7 @@ If you have enough compute, a good tactic is to train several stages in parallel
 
 To generate samples and interpolation videos, run
 ```
-python gen_images.py --outdir=out --trunc=0.7 --seeds=10-15 \
+python gen_images.py --outdir=out --trunc=0.7 --seeds=10-15 --batch-sz 1 \
   --network=https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/pokemon256.pkl
 ```
 and
@@ -112,27 +112,24 @@ python gen_video.py --output=lerp.mp4 --trunc=0.7 --seeds=0-31 --grid=4x2 \
 ```
 For class-conditional models, you can pass the class index via ```--class```, a index-to-label dictionary for Imagenet can be found [here](https://github.com/xl-sr/stylegan_xl_release/blob/main/media/imagenet_idx2labels.txt).
 
-Generating large sample sheets:
+Generate a conditional sample sheets:
 ```
-# unconditional model
-python gen_samplesheet.py --outdir=sample_sheets --trunc=1.0 \
-  --network=https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/pokemon256.pkl \
-  --samples-per-class 128
-
-# conditional model
 python gen_samplesheet.py --outdir=sample_sheets --trunc=1.0 \
   --network=https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/imagenet128.pkl \
-  --max-classes 100 --samples-per-class 4 --classes-per-row 5
+  --samples-per-class 4 --classes 0-32 --grid-width 32 \\
 ```
 
+For the ImageNet models, we enable multi-modal truncation (as proposed by [self-distilled
+GAN](https://self-distilled-stylegan.github.io/)). To switch from uni- to multi-modal truncation, pass ``` centroids-path=https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/imagenet_centroids.npy```.
+
 We provide the following pretrained models (pass the url as `PATH_TO_NETWORK_PKL`):
 
 |Dataset| Res | FID | PATH
  :---  |  ---:  |  ---:  | :---
 ImageNet| 16<sup>2</sup>  |0.74|  <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/imagenet16.pkl`</sub><br>
 ImageNet| 32<sup>2</sup>  |1.11|  <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/imagenet32.pkl`</sub><br>
 ImageNet| 64<sup>2</sup>  |1.52|  <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/imagenet64.pkl`</sub><br>
-ImageNet| 128<sup>2</sup> |1.82|  <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/imagenet128.pkl`</sub><br>
+ImageNet| 128<sup>2</sup> |1.77|  <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/imagenet128.pkl`</sub><br>
 CIFAR10 | 32<sup>2</sup>  |1.85|  <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/cifar10.pkl`</sub><br>
 FFHQ    | 256<sup>2</sup> |2.19|  <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/ffhq256.pkl`</sub><br>
 Pokemon | 256<sup>2</sup> |23.97| <sub>`https://s3.eu-central-1.amazonaws.com/avg-projects/stylegan_xl/models/pokemon256.pkl`</sub><br>
diff --git a/gen_class_samplesheet.py b/gen_class_samplesheet.py
@@ -0,0 +1,70 @@
+import os
+from pathlib import Path
+import PIL.Image
+from typing import List
+import click
+import numpy as np
+import torch
+from tqdm import tqdm
+
+import legacy
+import dnnlib
+from training.training_loop import save_image_grid
+from torch_utils import gen_utils
+from gen_images import parse_range
+
+@click.command()
+@click.option('--network', 'network_pkl', help='Network pickle filename', required=True)
+@click.option('--trunc', 'truncation_psi', help='Truncation psi', type=float, default=1, show_default=True)
+@click.option('--seed', help='Random seed', type=int, default=42)
+@click.option('--centroids-path', type=str, help='Pass path to precomputed centroids to enable multimodal truncation')
+@click.option('--classes', type=parse_range, help='List of classes (e.g., \'0,1,4-6\')', required=True)
+@click.option('--samples-per-class', help='Samples per class.', type=int, default=4)
+@click.option('--grid-width', help='Total width of image grid', type=int, default=32)
+@click.option('--batch-gpu', help='Samples per pass, adapt to fit on GPU', type=int, default=32)
+@click.option('--outdir', help='Where to save the output images', type=str, required=True, metavar='DIR')
+@click.option('--desc', help='String to include in result dir name', metavar='STR', type=str)
+def generate_samplesheet(
+    network_pkl: str,
+    truncation_psi: float,
+    seed: int,
+    centroids_path: str,
+    classes: List[int],
+    samples_per_class: int,
+    batch_gpu: int,
+    grid_width: int,
+    outdir: str,
+    desc: str,
+):
+    print('Loading networks from "%s"...' % network_pkl)
+    device = torch.device('cuda')
+    with dnnlib.util.open_url(network_pkl) as f:
+        G = legacy.load_network_pkl(f)['G_ema'].to(device).requires_grad_(False)
+
+    # setup
+    os.makedirs(outdir, exist_ok=True)
+    desc_full = f'{Path(network_pkl).stem}_trunc_{truncation_psi}'
+    if desc is not None: desc_full += f'-{desc}'
+    run_dir = Path(gen_utils.make_run_dir(outdir, desc_full))
+
+    print('Generating latents.')
+    ws = []
+    for class_idx in tqdm(classes):
+        w = gen_utils.get_w_from_seed(G, samples_per_class, device, truncation_psi, seed=seed,
+                                      centroids_path=centroids_path, class_idx=class_idx)
+        ws.append(w)
+    ws = torch.cat(ws)
+
+    print('Generating samples.')
+    images = []
+    for w in tqdm(ws.split(batch_gpu)):
+        img = gen_utils.w_to_img(G, w, to_np=True)
+        images.append(img)
+
+    # adjust grid widht to prohibit folding between same class then save to disk
+    grid_width = grid_width - grid_width % samples_per_class
+    images = gen_utils.create_image_grid(np.concatenate(images), grid_size=(grid_width, None))
+    PIL.Image.fromarray(images, 'RGB').save(run_dir / 'sheet.png')
+
+if __name__ == "__main__":
+    generate_samplesheet()
diff --git a/gen_images.py b/gen_images.py
@@ -19,6 +19,7 @@
 import torch
 
 import legacy
+from torch_utils import gen_utils
 
 #----------------------------------------------------------------------------
 
@@ -71,7 +72,9 @@ def make_transform(translate: Tuple[float,float], angle: float):
 @click.command()
 @click.option('--network', 'network_pkl', help='Network pickle filename', required=True)
 @click.option('--seeds', type=parse_range, help='List of random seeds (e.g., \'0,1,4-6\')', required=True)
+@click.option('--batch-sz', type=int, help='Batch size per sample', default=1)
 @click.option('--trunc', 'truncation_psi', type=float, help='Truncation psi', default=1, show_default=True)
+@click.option('--centroids-path', type=str, help='Pass path to precomputed centroids to enable multimodal truncation')
 @click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)')
 @click.option('--noise-mode', help='Noise mode', type=click.Choice(['const', 'random', 'none']), default='const', show_default=True)
 @click.option('--translate', help='Translate XY-coordinate (e.g. \'0.3,1\')', type=parse_vec2, default='0,0', show_default=True, metavar='VEC2')
@@ -80,49 +83,26 @@ def make_transform(translate: Tuple[float,float], angle: float):
 def generate_images(
     network_pkl: str,
     seeds: List[int],
+    batch_sz: int,
     truncation_psi: float,
+    centroids_path: str,
     noise_mode: str,
     outdir: str,
     translate: Tuple[float,float],
     rotate: float,
     class_idx: Optional[int]
 ):
-    """Generate images using pretrained network pickle.
-
-    Examples:
-
-    \b
-    # Generate an image using pre-trained AFHQv2 model ("Ours" in Figure 1, left).
-    python gen_images.py --outdir=out --trunc=1 --seeds=2 \\
-        --network=https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-r-afhqv2-512x512.pkl
-
-    \b
-    # Generate uncurated images with truncation using the MetFaces-U dataset
-    python gen_images.py --outdir=out --trunc=0.7 --seeds=600-605 \\
-        --network=https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-t-metfacesu-1024x1024.pkl
-    """
-
     print('Loading networks from "%s"...' % network_pkl)
     device = torch.device('cuda')
     with dnnlib.util.open_url(network_pkl) as f:
-        G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore
+        G = legacy.load_network_pkl(f)['G_ema']
+        G = G.eval().requires_grad_(False).to(device)
 
     os.makedirs(outdir, exist_ok=True)
 
-    # Labels.
-    label = torch.zeros([1, G.c_dim], device=device)
-    if G.c_dim != 0:
-        if class_idx is None:
-            raise click.ClickException('Must specify class label with --class when using a conditional network')
-        label[:, class_idx] = 1
-    else:
-        if class_idx is not None:
-            print ('warn: --class=lbl ignored when running on an unconditional network')
-
     # Generate images.
     for seed_idx, seed in enumerate(seeds):
         print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))
-        z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(device)
 
         # Construct an inverse rotation/translation matrix and pass to the generator.  The
         # generator expects this matrix as an inverse to avoid potentially failing numerical
@@ -132,9 +112,10 @@ def generate_images(
             m = np.linalg.inv(m)
             G.synthesis.input.transform.copy_(torch.from_numpy(m))
 
-        img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
-        img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
-        PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/seed{seed:04d}.png')
+        w = gen_utils.get_w_from_seed(G, batch_sz, device, truncation_psi, seed=seed,
+                                      centroids_path=centroids_path, class_idx=class_idx)
+        img = gen_utils.w_to_img(G, w, to_np=True)
+        PIL.Image.fromarray(gen_utils.create_image_grid(img), 'RGB').save(f'{outdir}/seed{seed:04d}.png')
 
 
 #----------------------------------------------------------------------------
diff --git a/gen_samplesheet.py b/gen_samplesheet.py
diff --git a/torch_utils/gen_utils.py b/torch_utils/gen_utils.py
@@ -9,6 +9,8 @@
 import click
 import numpy as np
 import torch
+import torch.nn.functional as F
+import dnnlib
 
 
 # ----------------------------------------------------------------------------
@@ -413,29 +415,52 @@ def w_to_img(G, dlatents: Union[List[torch.Tensor], torch.Tensor], noise_mode: s
     assert isinstance(dlatents, torch.Tensor), f'dlatents should be a torch.Tensor!: "{type(dlatents)}"'
     if len(dlatents.shape) == 2:
         dlatents = dlatents.unsqueeze(0)  # An individual dlatent => [1, G.mapping.num_ws, G.mapping.w_dim]
+
     synth_image = G.synthesis(dlatents, noise_mode=noise_mode)
     synth_image = (synth_image + 1) * 255/2  # [-1.0, 1.0] -> [0.0, 255.0]
     if to_np:
         synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8).cpu().numpy()  # NCWH => NWHC
     return synth_image
 
 
-def get_w_from_seed(G, device: torch.device, seed: int, truncation_psi: float, class_idx: Optional[int]) -> torch.Tensor:
-    """Get the dlatent from a random seed, using the truncation trick (this could be optional)"""
+def get_w_from_seed(G, batch_sz: int, device: torch.device, truncation_psi: float, seed: Optional[int], centroids_path: Optional[str], class_idx: Optional[int]) -> torch.Tensor:
+    """Get the dlatent from a list of random seeds, using the truncation trick (this could be optional)"""
 
-    label = torch.zeros([1, G.c_dim], device=device)
     if G.c_dim != 0:
+        # sample random labels if no class idx is given
         if class_idx is None:
-            raise click.ClickException('Must specify class label via --class when using a conditional network')
-        w_avg = G.mapping.w_avg[class_idx]
-        label[:, class_idx] = 1
+            class_indices = np.random.RandomState(seed).randint(low=0, high=G.c_dim, size=(batch_sz))
+            class_indices = torch.from_numpy(class_indices).to(device)
+            w_avg = G.mapping.w_avg.index_select(0, class_indices)
+        else:
+            w_avg = G.mapping.w_avg[class_idx].unsqueeze(0).repeat(batch_sz, 1)
+            class_indices = torch.full((batch_sz,), class_idx).to(device)
+
+        labels = F.one_hot(class_indices, G.c_dim)
+
     else:
-        w_avg = G.mapping.w_avg
+        w_avg = G.mapping.w_avg.unsqueeze(0)
+        labels = None
         if class_idx is not None:
             print('Warning: --class is ignored when running an unconditional network')
 
-    z = np.random.RandomState(seed).randn(1, G.z_dim)
-    w = G.mapping(torch.from_numpy(z).to(device), label)
+    z = np.random.RandomState(seed).randn(batch_sz, G.z_dim)
+    z = torch.from_numpy(z).to(device)
+    w = G.mapping(z, labels)
+
+    # multimodal truncation
+    if centroids_path is not None:
+
+        with dnnlib.util.open_url(centroids_path, verbose=False) as f:
+            w_centroids = np.load(f)
+        w_centroids = torch.from_numpy(w_centroids).to(device)
+        w_centroids = w_centroids[None].repeat(batch_sz, 1, 1)
+
+        # measure distances
+        dist = torch.norm(w_centroids - w[:, :1], dim=2, p=2)
+        w_avg = w_centroids[0].index_select(0, dist.argmin(1))
+
+    w_avg = w_avg.unsqueeze(1).repeat(1, G.mapping.num_ws, 1)
     w = w_avg + (w - w_avg) * truncation_psi
 
     return w