Merge pull request #33 from LukasHedegaard/develop

LukasHedegaard · web-flow · commit ff66157b4589 · 2021-09-14T15:28:05.000+02:00
Add `forward_shrink` option to `Delay` and `Residual`
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,12 @@ From v1.0.0 and on, the project will adherence strictly to Semantic Versioning.
 
 ## [Unreleased]
 
+
+## [0.13.0]
+### Added
+- Add `forward_shrink` option to `Delay` and `Residual`.
+
+
 ## [0.12.0]
 ### Added
 - Add `Constant`.
diff --git a/README.md b/README.md
@@ -215,6 +215,9 @@ Below is a list of the modules and utilities included in the library:
     - `co.Add` - Adds a constant value.
     - `co.Multiply` - Multiplies with a constant factor.
     - `co.Unity` - Maps input to output without modification.
+    - `co.Constant` - Maps input to and output with constant value.
+    - `co.Zero` - Maps input to output of zeros.
+    - `co.One` - Maps input to output of ones.
 
 - Converters
     <!-- - `co.Residual` - residual connection, which automatically adds delay if needed -->
diff --git a/continual/container.py b/continual/container.py
@@ -1,6 +1,7 @@
 from collections import OrderedDict
 from enum import Enum
 from functools import reduce, wraps
+from numbers import Number
 from typing import Callable, List, Optional, Sequence, Tuple, TypeVar, Union, overload
 
 import torch
@@ -82,11 +83,11 @@ def wrapped(inputs: Sequence[Tensor]) -> Tensor:
     return wrapped
 
 
-def int_from(tuple_or_int: Union[int, Tuple[int, ...]], dim=0) -> int:
-    if isinstance(tuple_or_int, int):
-        return tuple_or_int
+def num_from(tuple_or_num: Union[Number, Tuple[Number, ...]], dim=0) -> Number:
+    if isinstance(tuple_or_num, Number):
+        return tuple_or_num
 
-    return tuple_or_int[dim]
+    return tuple_or_num[dim]
 
 
 class FlattenableStateDict:
@@ -206,8 +207,8 @@ def __init__(
             ]
 
         assert (
-            len(set(int_from(getattr(m, "stride", 1)) for _, m in modules)) == 1
-        ), f"Expected all modules to have the same stride, but got strides {[(int_from(getattr(m, 'stride', 1))) for _, m in modules]}"
+            len(set(num_from(getattr(m, "stride", 1)) for _, m in modules)) == 1
+        ), f"Expected all modules to have the same stride, but got strides {[(num_from(getattr(m, 'stride', 1))) for _, m in modules]}"
 
         for key, module in modules:
             self.add_module(key, module)
@@ -253,11 +254,11 @@ def delay(self) -> int:
 
     @property
     def stride(self) -> int:
-        return int_from(getattr(next(iter(self)), "stride", 1))
+        return num_from(getattr(next(iter(self)), "stride", 1))
 
     @property
     def padding(self) -> int:
-        return max(int_from(getattr(m, "padding", 0)) for m in self)
+        return max(num_from(getattr(m, "padding", 0)) for m in self)
 
     def clean_state(self):
         for m in self:
@@ -375,12 +376,12 @@ def delay(self):
     def stride(self) -> int:
         tot = 1
         for m in self:
-            tot *= int_from(getattr(m, "stride", 1))
+            tot *= num_from(getattr(m, "stride", 1))
         return tot
 
     @property
     def padding(self) -> int:
-        return max(int_from(getattr(m, "padding", 0)) for m in self)
+        return max(num_from(getattr(m, "padding", 0)) for m in self)
 
     @staticmethod
     def build_from(module: nn.Sequential) -> "Sequential":
@@ -466,8 +467,8 @@ def __init__(
             ]
 
         assert (
-            len(set(int_from(getattr(m, "stride", 1)) for _, m in modules)) == 1
-        ), f"Expected all modules to have the same stride, but got strides {[(int_from(getattr(m, 'stride', 1))) for _, m in modules]}"
+            len(set(num_from(getattr(m, "stride", 1)) for _, m in modules)) == 1
+        ), f"Expected all modules to have the same stride, but got strides {[(num_from(getattr(m, 'stride', 1))) for _, m in modules]}"
 
         for key, module in modules:
             self.add_module(key, module)
@@ -542,11 +543,11 @@ def delay(self) -> int:
 
     @property
     def stride(self) -> int:
-        return int_from(getattr(next(iter(self)), "stride", 1))
+        return num_from(getattr(next(iter(self)), "stride", 1))
 
     @property
     def padding(self) -> int:
-        return max(int_from(getattr(m, "padding", 0)) for m in self)
+        return max(num_from(getattr(m, "padding", 0)) for m in self)
 
     def clean_state(self):
         for m in self:
@@ -561,14 +562,18 @@ def Residual(
     module: CoModule,
     temporal_fill: PaddingMode = None,
     reduce: Reduction = "sum",
+    forward_shrink: bool = False,
 ):
+    assert num_from(getattr(module, "stride", 1)) == 1, (
+        "The simple `Residual` only works for modules with temporal stride=1. "
+        "Complex residuals can be achieved using `BroadcastReduce` or the `Broadcast`, `Parallel`, and `Reduce` modules."
+    )
+    temporal_fill = temporal_fill or getattr(
+        module, "temporal_fill", PaddingMode.REPLICATE.value
+    )
     return BroadcastReduce(
         # Residual first yields easier broadcasting in reduce functions
-        Delay(
-            delay=module.delay,
-            temporal_fill=temporal_fill
-            or getattr(module, "temporal_fill", PaddingMode.REPLICATE.value),
-        ),
+        Delay(module.delay, temporal_fill, forward_shrink),
         module,
         reduce=reduce,
         auto_delay=False,
diff --git a/continual/delay.py b/continual/delay.py
@@ -22,9 +22,20 @@ def __init__(
         self,
         delay: int,
         temporal_fill: PaddingMode = "zeros",
+        forward_shrink: bool = False,
     ):
+        """Initialise Delay block
+
+        Args:
+            delay (int): the number of steps to delay an output.
+            temporal_fill (PaddingMode, optional): Temporal state initialisation mode ("zeros" or "replicate"). Defaults to "zeros".
+            forward_shrink (int, optional): Whether to shrink the temporal dimension of the feature map during forward.
+                This is handy for residuals that are parallel to modules which reduce the number of temporal steps. Defaults to False.
+        """
+        assert delay >= 0
         assert temporal_fill in {"zeros", "replicate"}
         self._delay = delay
+        self.forward_shrink = forward_shrink
         self.make_padding = {"zeros": torch.zeros_like, "replicate": torch.clone}[
             temporal_fill
         ]
@@ -98,11 +109,14 @@ def forward_steps(self, input: Tensor, pad_end=False, update_state=True) -> Tens
 
     def forward(self, input: Tensor) -> Tensor:
         # No delay during regular forward
-        return input
+        if not self.forward_shrink or self.delay == 0:
+            return input
+        return input[:, :, self.delay : -self.delay]
 
     @property
     def delay(self) -> int:
         return self._delay
 
     def extra_repr(self):
-        return f"{self.delay}"
+        shrink_str = ", forward_shrink=True" if self.forward_shrink else ""
+        return f"{self.delay}" + shrink_str
diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@ def from_file(file_name: str = "requirements.txt", comment_char: str = "#"):
 
 setup(
     name="continual-inference",
-    version="0.12.0",
+    version="0.13.0",
     description="Building blocks for Continual Inference Networks in PyTorch",
     long_description=long_description(),
     long_description_content_type="text/markdown",
diff --git a/tests/continual/test_delay.py b/tests/continual/test_delay.py
@@ -100,3 +100,14 @@ def test_zero_delay():
 def test_repr():
     delay = Delay(delay=2)
     assert delay.__repr__() == "Delay(2)"
+
+    delay = Delay(delay=2, forward_shrink=True)
+    assert delay.__repr__() == "Delay(2, forward_shrink=True)"
+
+
+def test_forward_shrink():
+    sample = torch.rand((2, 2, 5, 3))
+    delay = Delay(delay=2, forward_shrink=True)
+
+    output = delay.forward(sample)
+    assert torch.equal(sample[:, :, 2:-2], output)