From 045593671ee7bbcb3db107d3f69468f21ea43c48 Mon Sep 17 00:00:00 2001 From: Ryan <23580140+brianhou0208@users.noreply.github.com> Date: Sat, 30 Nov 2024 02:11:36 +0800 Subject: [PATCH 1/7] fix encoder depth & output stride --- .../decoders/deeplabv3/decoder.py | 31 ++++++++-------- .../decoders/deeplabv3/model.py | 37 ++++++++++++++++--- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/segmentation_models_pytorch/decoders/deeplabv3/decoder.py b/segmentation_models_pytorch/decoders/deeplabv3/decoder.py index caeb95d1..8f2668bc 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/decoder.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/decoder.py @@ -61,7 +61,6 @@ def __init__( nn.BatchNorm2d(out_channels), nn.ReLU(), ) - self.out_channels = out_channels def forward(self, *features): return super().forward(features[-1]) @@ -71,6 +70,7 @@ class DeepLabV3PlusDecoder(nn.Module): def __init__( self, encoder_channels: Sequence[int, ...], + encoder_depth: int, out_channels: int, atrous_rates: Iterable[int], output_stride: Literal[8, 16], @@ -78,14 +78,13 @@ def __init__( aspp_dropout: float, ): super().__init__() - if output_stride not in {8, 16}: + if encoder_depth < 3: raise ValueError( - "Output stride should be 8 or 16, got {}.".format(output_stride) + "Encoder depth for DeepLabV3Plus decoder cannot be less than 3, got {}.".format( + encoder_depth + ) ) - self.out_channels = out_channels - self.output_stride = output_stride - self.aspp = nn.Sequential( ASPP( encoder_channels[-1], @@ -101,10 +100,10 @@ def __init__( nn.ReLU(), ) - scale_factor = 2 if output_stride == 8 else 4 + scale_factor = 4 if output_stride == 16 and encoder_depth > 3 else 2 self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor) - highres_in_channels = encoder_channels[-4] + highres_in_channels = encoder_channels[2] highres_out_channels = 48 # proposed by authors of paper self.block1 = nn.Sequential( nn.Conv2d( @@ -128,7 +127,7 @@ def __init__( def forward(self, *features): aspp_features = self.aspp(features[-1]) aspp_features = self.up(aspp_features) - high_res_features = self.block1(features[-4]) + high_res_features = self.block1(features[2]) concat_features = torch.cat([aspp_features, high_res_features], dim=1) fused_features = self.block2(concat_features) return fused_features @@ -228,13 +227,13 @@ def forward(self, x): class SeparableConv2d(nn.Sequential): def __init__( self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - bias=True, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + bias: bool = True, ): dephtwise_conv = nn.Conv2d( in_channels, diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py index d67a3be3..030e42fd 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/model.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/model.py @@ -35,7 +35,7 @@ class DeepLabV3(SegmentationModel): Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**. Default is **None** - upsampling: Final upsampling factor (should have the same value as ``encoder_output_stride`` to preserve input-output spatial shape identity). + upsampling: Final upsampling factor. Default is **None** to preserve input-output spatial shape identity aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build on top of encoder if **aux_params** is not **None** (default). Supported params: - classes (int): A number of classes @@ -43,7 +43,8 @@ class DeepLabV3(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + Keys with ``None`` values are pruned before passing. Returns: ``torch.nn.Module``: **DeepLabV3** @@ -72,6 +73,13 @@ def __init__( ): super().__init__() + if encoder_output_stride not in [8, 16]: + raise ValueError( + "DeeplabV3 support output stride 8 or 16, got {}.".format( + encoder_output_stride + ) + ) + self.encoder = get_encoder( encoder_name, in_channels=in_channels, @@ -81,6 +89,14 @@ def __init__( **kwargs, ) + if upsampling is None: + if encoder_depth <= 3: + scale_factor = 2 ** encoder_depth + else: + scale_factor = encoder_output_stride + else: + scale_factor = upsampling + self.decoder = DeepLabV3Decoder( in_channels=self.encoder.out_channels[-1], out_channels=decoder_channels, @@ -90,11 +106,11 @@ def __init__( ) self.segmentation_head = SegmentationHead( - in_channels=self.decoder.out_channels, + in_channels=decoder_channels, out_channels=classes, activation=activation, kernel_size=1, - upsampling=encoder_output_stride if upsampling is None else upsampling, + upsampling=scale_factor, ) if aux_params is not None: @@ -137,7 +153,8 @@ class DeepLabV3Plus(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + Keys with ``None`` values are pruned before passing. Returns: ``torch.nn.Module``: **DeepLabV3Plus** @@ -166,6 +183,13 @@ def __init__( ): super().__init__() + if encoder_output_stride not in [8, 16]: + raise ValueError( + "DeeplabV3Plus support output stride 8 or 16, got {}.".format( + encoder_output_stride + ) + ) + self.encoder = get_encoder( encoder_name, in_channels=in_channels, @@ -177,6 +201,7 @@ def __init__( self.decoder = DeepLabV3PlusDecoder( encoder_channels=self.encoder.out_channels, + encoder_depth=encoder_depth, out_channels=decoder_channels, atrous_rates=decoder_atrous_rates, output_stride=encoder_output_stride, @@ -185,7 +210,7 @@ def __init__( ) self.segmentation_head = SegmentationHead( - in_channels=self.decoder.out_channels, + in_channels=decoder_channels, out_channels=classes, activation=activation, kernel_size=1, From 79d55685934e9d7496dea9a49d966b13165a0cf8 Mon Sep 17 00:00:00 2001 From: Ryan <23580140+brianhou0208@users.noreply.github.com> Date: Sat, 30 Nov 2024 02:16:23 +0800 Subject: [PATCH 2/7] fix ruff style --- segmentation_models_pytorch/__init__.py | 4 +++- segmentation_models_pytorch/decoders/deeplabv3/model.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/segmentation_models_pytorch/__init__.py b/segmentation_models_pytorch/__init__.py index c8209226..5cde6004 100644 --- a/segmentation_models_pytorch/__init__.py +++ b/segmentation_models_pytorch/__init__.py @@ -26,7 +26,9 @@ # Suppress the specific SyntaxWarning for `pretrainedmodels` warnings.filterwarnings("ignore", message="is with a literal", category=SyntaxWarning) -warnings.filterwarnings("ignore", message=r'"is" with \'str\' literal.*', category=SyntaxWarning) # for python >= 3.12 +warnings.filterwarnings( + "ignore", message=r'"is" with \'str\' literal.*', category=SyntaxWarning +) # for python >= 3.12 def create_model( diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py index 030e42fd..73fc67e3 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/model.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/model.py @@ -43,7 +43,7 @@ class DeepLabV3(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. Returns: @@ -79,7 +79,7 @@ def __init__( encoder_output_stride ) ) - + self.encoder = get_encoder( encoder_name, in_channels=in_channels, @@ -91,7 +91,7 @@ def __init__( if upsampling is None: if encoder_depth <= 3: - scale_factor = 2 ** encoder_depth + scale_factor = 2**encoder_depth else: scale_factor = encoder_output_stride else: @@ -153,7 +153,7 @@ class DeepLabV3Plus(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. Returns: From c97d43a52ca0904dc033f180352156b26f5e1e32 Mon Sep 17 00:00:00 2001 From: Ryan <23580140+brianhou0208@users.noreply.github.com> Date: Sat, 30 Nov 2024 03:38:57 +0800 Subject: [PATCH 3/7] Revert "fix ruff style" This reverts commit 79d55685934e9d7496dea9a49d966b13165a0cf8. --- segmentation_models_pytorch/decoders/deeplabv3/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py index 73fc67e3..030e42fd 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/model.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/model.py @@ -43,7 +43,7 @@ class DeepLabV3(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. Returns: @@ -79,7 +79,7 @@ def __init__( encoder_output_stride ) ) - + self.encoder = get_encoder( encoder_name, in_channels=in_channels, @@ -91,7 +91,7 @@ def __init__( if upsampling is None: if encoder_depth <= 3: - scale_factor = 2**encoder_depth + scale_factor = 2 ** encoder_depth else: scale_factor = encoder_output_stride else: @@ -153,7 +153,7 @@ class DeepLabV3Plus(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. Returns: From 14dc2a9bbf132e718dc05847b5bf2134283b62e8 Mon Sep 17 00:00:00 2001 From: Ryan <23580140+brianhou0208@users.noreply.github.com> Date: Sat, 30 Nov 2024 02:11:36 +0800 Subject: [PATCH 4/7] fix encoder depth & output stride --- .../decoders/deeplabv3/decoder.py | 41 +++++++------------ .../decoders/deeplabv3/model.py | 36 +++++++++++++--- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/segmentation_models_pytorch/decoders/deeplabv3/decoder.py b/segmentation_models_pytorch/decoders/deeplabv3/decoder.py index e20acf3f..3fd73786 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/decoder.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/decoder.py @@ -61,7 +61,6 @@ def __init__( nn.BatchNorm2d(out_channels), nn.ReLU(), ) - self.out_channels = out_channels def forward(self, *features): return super().forward(features[-1]) @@ -79,17 +78,12 @@ def __init__( aspp_dropout: float, ): super().__init__() - if encoder_depth not in (3, 4, 5): + if encoder_depth < 3: raise ValueError( - "Encoder depth should be 3, 4 or 5, got {}.".format(encoder_depth) + "Encoder depth for DeepLabV3Plus decoder cannot be less than 3, got {}.".format( + encoder_depth + ) ) - if output_stride not in (8, 16): - raise ValueError( - "Output stride should be 8 or 16, got {}.".format(output_stride) - ) - - self.out_channels = out_channels - self.output_stride = output_stride self.aspp = nn.Sequential( ASPP( @@ -106,17 +100,10 @@ def __init__( nn.ReLU(), ) - scale_factor = 2 if output_stride == 8 else 4 + scale_factor = 4 if output_stride == 16 and encoder_depth > 3 else 2 self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor) - if encoder_depth == 3 and output_stride == 8: - self.highres_input_index = -2 - elif encoder_depth == 3 or encoder_depth == 4: - self.highres_input_index = -3 - else: - self.highres_input_index = -4 - - highres_in_channels = encoder_channels[self.highres_input_index] + highres_in_channels = encoder_channels[2] highres_out_channels = 48 # proposed by authors of paper self.block1 = nn.Sequential( nn.Conv2d( @@ -140,7 +127,7 @@ def __init__( def forward(self, *features): aspp_features = self.aspp(features[-1]) aspp_features = self.up(aspp_features) - high_res_features = self.block1(features[self.highres_input_index]) + high_res_features = self.block1(features[2]) concat_features = torch.cat([aspp_features, high_res_features], dim=1) fused_features = self.block2(concat_features) return fused_features @@ -240,13 +227,13 @@ def forward(self, x): class SeparableConv2d(nn.Sequential): def __init__( self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - bias=True, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + bias: bool = True, ): dephtwise_conv = nn.Conv2d( in_channels, diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py index c0ef1238..6011019b 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/model.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/model.py @@ -35,7 +35,7 @@ class DeepLabV3(SegmentationModel): Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**. Default is **None** - upsampling: Final upsampling factor (should have the same value as ``encoder_output_stride`` to preserve input-output spatial shape identity). + upsampling: Final upsampling factor. Default is **None** to preserve input-output spatial shape identity aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build on top of encoder if **aux_params** is not **None** (default). Supported params: - classes (int): A number of classes @@ -43,7 +43,8 @@ class DeepLabV3(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + Keys with ``None`` values are pruned before passing. Returns: ``torch.nn.Module``: **DeepLabV3** @@ -72,6 +73,13 @@ def __init__( ): super().__init__() + if encoder_output_stride not in [8, 16]: + raise ValueError( + "DeeplabV3 support output stride 8 or 16, got {}.".format( + encoder_output_stride + ) + ) + self.encoder = get_encoder( encoder_name, in_channels=in_channels, @@ -81,6 +89,14 @@ def __init__( **kwargs, ) + if upsampling is None: + if encoder_depth <= 3: + scale_factor = 2 ** encoder_depth + else: + scale_factor = encoder_output_stride + else: + scale_factor = upsampling + self.decoder = DeepLabV3Decoder( in_channels=self.encoder.out_channels[-1], out_channels=decoder_channels, @@ -90,11 +106,11 @@ def __init__( ) self.segmentation_head = SegmentationHead( - in_channels=self.decoder.out_channels, + in_channels=decoder_channels, out_channels=classes, activation=activation, kernel_size=1, - upsampling=encoder_output_stride if upsampling is None else upsampling, + upsampling=scale_factor, ) if aux_params is not None: @@ -138,7 +154,8 @@ class DeepLabV3Plus(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + Keys with ``None`` values are pruned before passing. Returns: ``torch.nn.Module``: **DeepLabV3Plus** @@ -167,6 +184,13 @@ def __init__( ): super().__init__() + if encoder_output_stride not in [8, 16]: + raise ValueError( + "DeeplabV3Plus support output stride 8 or 16, got {}.".format( + encoder_output_stride + ) + ) + self.encoder = get_encoder( encoder_name, in_channels=in_channels, @@ -187,7 +211,7 @@ def __init__( ) self.segmentation_head = SegmentationHead( - in_channels=self.decoder.out_channels, + in_channels=decoder_channels, out_channels=classes, activation=activation, kernel_size=1, From bafd4fbe2a4052de0acf66b112c1923a61c3c6f5 Mon Sep 17 00:00:00 2001 From: Ryan <23580140+brianhou0208@users.noreply.github.com> Date: Sat, 30 Nov 2024 02:16:23 +0800 Subject: [PATCH 5/7] fix ruff style --- segmentation_models_pytorch/decoders/deeplabv3/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py index 6011019b..9667415a 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/model.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/model.py @@ -43,7 +43,7 @@ class DeepLabV3(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. Returns: @@ -79,7 +79,7 @@ def __init__( encoder_output_stride ) ) - + self.encoder = get_encoder( encoder_name, in_channels=in_channels, @@ -91,7 +91,7 @@ def __init__( if upsampling is None: if encoder_depth <= 3: - scale_factor = 2 ** encoder_depth + scale_factor = 2**encoder_depth else: scale_factor = encoder_output_stride else: @@ -154,7 +154,7 @@ class DeepLabV3Plus(SegmentationModel): - dropout (float): Dropout factor in [0, 1) - activation (str): An activation function to apply "sigmoid"/"softmax" (could be **None** to return logits) - kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. + kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing. Returns: From f96dbda8c2091756faef7e424b008a1ee145e369 Mon Sep 17 00:00:00 2001 From: Ryan <23580140+brianhou0208@users.noreply.github.com> Date: Sat, 30 Nov 2024 18:02:31 +0800 Subject: [PATCH 6/7] update deeplabv3+ doc --- segmentation_models_pytorch/decoders/deeplabv3/model.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py index 9667415a..c5f366a2 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/model.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/model.py @@ -145,9 +145,7 @@ class DeepLabV3Plus(SegmentationModel): Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**. Default is **None** - upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity. In case - **encoder_depth** and **encoder_output_stride** are 3 and 16 resp., set **upsampling** to 2 to preserve. - aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build + upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity. on top of encoder if **aux_params** is not **None** (default). Supported params: - classes (int): A number of classes - pooling (str): One of "max", "avg". Default is "avg" From 9fc8a4b35a0483e9215e1d8eb026f461d86fba28 Mon Sep 17 00:00:00 2001 From: Ryan <23580140+brianhou0208@users.noreply.github.com> Date: Sat, 30 Nov 2024 23:26:36 +0800 Subject: [PATCH 7/7] restored aux_params --- segmentation_models_pytorch/decoders/deeplabv3/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/segmentation_models_pytorch/decoders/deeplabv3/model.py b/segmentation_models_pytorch/decoders/deeplabv3/model.py index 53e1846e..830906cb 100644 --- a/segmentation_models_pytorch/decoders/deeplabv3/model.py +++ b/segmentation_models_pytorch/decoders/deeplabv3/model.py @@ -145,6 +145,7 @@ class DeepLabV3Plus(SegmentationModel): **callable** and **None**. Default is **None** upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity. + aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build on top of encoder if **aux_params** is not **None** (default). Supported params: - classes (int): A number of classes - pooling (str): One of "max", "avg". Default is "avg"