@@ -153,9 +153,9 @@ class Encoder(nn.Module):
153153 channels: sequence of block output channels.
154154 out_channels: number of channels in the bottom layer (latent space) of the autoencoder.
155155 num_res_blocks: number of residual blocks (see _ResBlock) per level.
156- norm_num_groups: number of groups for the GroupNorm layers, num_channels must be divisible by this number.
156+ norm_num_groups: number of groups for the GroupNorm layers, channels must be divisible by this number.
157157 norm_eps: epsilon for the normalization.
158- attention_levels: indicate which level from num_channels contain an attention block.
158+ attention_levels: indicate which level from channels contain an attention block.
159159 with_nonlocal_attn: if True use non-local attention block.
160160 include_fc: whether to include the final linear layer. Default to True.
161161 use_combined_linear: whether to use a single linear layer for qkv projection, default to False.
@@ -299,9 +299,9 @@ class Decoder(nn.Module):
299299 in_channels: number of channels in the bottom layer (latent space) of the autoencoder.
300300 out_channels: number of output channels.
301301 num_res_blocks: number of residual blocks (see _ResBlock) per level.
302- norm_num_groups: number of groups for the GroupNorm layers, num_channels must be divisible by this number.
302+ norm_num_groups: number of groups for the GroupNorm layers, channels must be divisible by this number.
303303 norm_eps: epsilon for the normalization.
304- attention_levels: indicate which level from num_channels contain an attention block.
304+ attention_levels: indicate which level from channels contain an attention block.
305305 with_nonlocal_attn: if True use non-local attention block.
306306 use_convtranspose: if True, use ConvTranspose to upsample feature maps in decoder.
307307 include_fc: whether to include the final linear layer. Default to True.
@@ -483,7 +483,7 @@ class AutoencoderKL(nn.Module):
483483 channels: number of output channels for each block.
484484 attention_levels: sequence of levels to add attention.
485485 latent_channels: latent embedding dimension.
486- norm_num_groups: number of groups for the GroupNorm layers, num_channels must be divisible by this number.
486+ norm_num_groups: number of groups for the GroupNorm layers, channels must be divisible by this number.
487487 norm_eps: epsilon for the normalization.
488488 with_encoder_nonlocal_attn: if True use non-local attention block in the encoder.
489489 with_decoder_nonlocal_attn: if True use non-local attention block in the decoder.
@@ -518,18 +518,18 @@ def __init__(
518518
519519 # All number of channels should be multiple of num_groups
520520 if any ((out_channel % norm_num_groups ) != 0 for out_channel in channels ):
521- raise ValueError ("AutoencoderKL expects all num_channels being multiple of norm_num_groups" )
521+ raise ValueError ("AutoencoderKL expects all channels being multiple of norm_num_groups" )
522522
523523 if len (channels ) != len (attention_levels ):
524- raise ValueError ("AutoencoderKL expects num_channels being same size of attention_levels" )
524+ raise ValueError ("AutoencoderKL expects channels being same size of attention_levels" )
525525
526526 if isinstance (num_res_blocks , int ):
527527 num_res_blocks = ensure_tuple_rep (num_res_blocks , len (channels ))
528528
529529 if len (num_res_blocks ) != len (channels ):
530530 raise ValueError (
531531 "`num_res_blocks` should be a single integer or a tuple of integers with the same length as "
532- "`num_channels `."
532+ "`channels `."
533533 )
534534
535535 self .encoder : nn .Module = Encoder (
0 commit comments