vislearn · fdraxler · Aug 23, 2023 · Jun 20, 2023 · Jun 20, 2023 · Aug 17, 2023
diff --git a/FrEIA/modules/all_in_one_block.py b/FrEIA/modules/all_in_one_block.py
@@ -102,10 +102,13 @@ class or callable ``f``, called as ``f(channels_in, channels_out)`` and
         self.splits = [split_len1, split_len2]
 
         try:
-            self.permute_function = {0: F.linear,
-                                     1: F.conv1d,
-                                     2: F.conv2d,
-                                     3: F.conv3d}[self.input_rank]
+            if permute_soft or learned_householder_permutation:
+                self.permute_function = {0: F.linear,
+                                        1: F.conv1d,
+                                        2: F.conv2d,
+                                        3: F.conv3d}[self.input_rank]
+            else:
+                self.permute_function = lambda x, p: x[:, p]
         except KeyError:
             raise ValueError(f"Data is {1 + self.input_rank}D. Must be 1D-4D.")
 
@@ -143,6 +146,7 @@ class or callable ``f``, called as ``f(channels_in, channels_out)`` and
         if permute_soft:
             w = special_ortho_group.rvs(channels)
         else:
+            w_index = torch.randperm(channels, requires_grad=False)
             w = np.zeros((channels, channels))
             for i, j in enumerate(np.random.permutation(channels)):
                 w[i, j] = 1.
@@ -155,11 +159,14 @@ class or callable ``f``, called as ``f(channels_in, channels_out)`` and
             self.w_perm = None
             self.w_perm_inv = None
             self.w_0 = nn.Parameter(torch.FloatTensor(w), requires_grad=False)
-        else:
+        elif permute_soft:
             self.w_perm = nn.Parameter(torch.FloatTensor(w).view(channels, channels, *([1] * self.input_rank)),
                                        requires_grad=False)
             self.w_perm_inv = nn.Parameter(torch.FloatTensor(w.T).view(channels, channels, *([1] * self.input_rank)),
                                            requires_grad=False)
+        else:
+            self.w_perm = nn.Parameter(w_index, requires_grad=False)
+            self.w_perm_inv = nn.Parameter(torch.argsort(w_index), requires_grad=False)
 
         if subnet_constructor is None:
             raise ValueError("Please supply a callable subnet_constructor "
@@ -222,7 +229,7 @@ def _affine(self, x, a, rev=False):
         a *= 0.1
         ch = x.shape[1]
 
-        sub_jac = self.clamp * torch.tanh(a[:, :ch])
+        sub_jac = self.clamp * torch.tanh(a[:, :ch]/self.clamp)
         if self.GIN:
             sub_jac -= torch.mean(sub_jac, dim=self.sum_dims, keepdim=True)
 

diff --git a/FrEIA/modules/invertible_resnet.py b/FrEIA/modules/invertible_resnet.py
@@ -30,9 +30,9 @@ def __init__(self, dims_in, dims_c=None, init_data: torch.Tensor = None):
 
         self.register_buffer("is_initialized", torch.tensor(False))
 
-        dim = next(iter(dims_in))[0]
-        self.log_scale = nn.Parameter(torch.empty(1, dim))
-        self.loc = nn.Parameter(torch.empty(1, dim))
+        dims = next(iter(dims_in))
+        self.log_scale = nn.Parameter(torch.empty(1, *dims))
+        self.loc = nn.Parameter(torch.empty(1, *dims))
 
         if init_data is not None:
             self.initialize(init_data)

diff --git a/FrEIA/modules/splines/binned.py b/FrEIA/modules/splines/binned.py
@@ -64,7 +64,7 @@ class BinnedSplineBase(InvertibleModule):
 
     def __init__(self, dims_in, dims_c=None, bins: int = 10, parameter_counts: Dict[str, int] = None, 
                  min_bin_sizes: Tuple[float] = (0.1, 0.1), default_domain: Tuple[float] = (-3.0, 3.0, -3.0, 3.0),
-                 identity_tails: bool = False) -> None:
+                 identity_tails: bool = False, domain_clamping: float = None) -> None:
         """
         Args:
             bins: number of bins to use
@@ -75,6 +75,7 @@ def __init__(self, dims_in, dims_c=None, bins: int = 10, parameter_counts: Dict[
             default_domain: tuple of (left, right, bottom, top) default spline domain values
                 these values will be used as the starting domain (when the network outputs zero)
             identity_tails: whether to use identity tails for the spline
+            domain_clamping: clamping value for the domain
         """
         if dims_c is None:
             dims_c = []
@@ -92,6 +93,13 @@ def __init__(self, dims_in, dims_c=None, bins: int = 10, parameter_counts: Dict[
         assert default_domain[3] - default_domain[2] >= min_bin_sizes[1] * bins, \
         "{bins} bins of size {min_bin_sizes[1]} are too large for domain {default_domain[2]} to {default_domain[3]}"
 
+        if domain_clamping is not None:
+            self.clamp_domain = lambda domain: domain_clamping * torch.tanh(
+                domain / domain_clamping
+            )
+        else:
+            self.clamp_domain = lambda domain: domain
+
         self.register_buffer("bins", torch.tensor(bins, dtype=torch.int32))
         self.register_buffer("min_bin_sizes", torch.as_tensor(min_bin_sizes, dtype=torch.float32))
         self.register_buffer("default_domain", torch.as_tensor(default_domain, dtype=torch.float32))
@@ -143,6 +151,7 @@ def constrain_parameters(self, parameters: Dict[str, torch.Tensor]) -> Dict[str,
             total_width = parameters["total_width"]
             shift = np.log(np.e - 1)
             total_width = self.default_width * F.softplus(total_width + shift)
+            total_width = self.clamp_domain(total_width)
             parameters["left"] = -total_width / 2
             parameters["bottom"] = -total_width / 2
 
@@ -161,7 +170,16 @@ def constrain_parameters(self, parameters: Dict[str, torch.Tensor]) -> Dict[str,
 
             parameters["widths"] = self.min_bin_sizes[0] + F.softplus(parameters["widths"] + xshift)
             parameters["heights"] = self.min_bin_sizes[1] + F.softplus(parameters["heights"] + yshift)
-
+
+            domain_width = torch.sum(parameters["widths"], dim=-1, keepdim=True)
+            domain_height = torch.sum(parameters["heights"], dim=-1, keepdim=True)
+            width_resize = self.clamp_domain(domain_width) / domain_width
+            height_resize = self.clamp_domain(domain_height) / domain_height
+
+            parameters["widths"] = parameters["widths"] * width_resize
+            parameters["heights"] = parameters["heights"] * height_resize
+            parameters["left"] = parameters["left"] * width_resize
+            parameters["bottom"] = parameters["bottom"] * height_resize
 
         return parameters
 

diff --git a/FrEIA/modules/splines/rational_quadratic.py b/FrEIA/modules/splines/rational_quadratic.py
@@ -164,7 +164,7 @@ def rational_quadratic_spline(x: torch.Tensor,
 
         # Eq 29 in the appendix of the paper
         discriminant = b ** 2 - 4 * a * c
-        assert torch.all(discriminant >= 0)
+        assert torch.all(discriminant >= 0), f"Discriminant must be positive, but is violated by {torch.min(discriminant)}"
 
         xi = 2 * c / (-b - torch.sqrt(discriminant))