google-deepmind · SergeySandler · Mar 8, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/README.md b/README.md
@@ -1,3 +1,11 @@
+**This is the clone of the [TAPIR Repository](https://github.com/google-deepmind/tapnet) that addresses the Standard TAPIR model compatibility with Torchscript, see [PR#85](https://github.com/google-deepmind/tapnet/pull/85). Only _tapir_model.py,_ _nets.py_ and _utils.py_ from _torch_ directory are updated.**
+
+**It is not yet aligned with the version 2 of the model [bootstapir_checkpoint_v2.pt](https://storage.googleapis.com/dm-tapnet/bootstap/bootstapir_checkpoint_v2.pt), only with the original version [bootstapir_checkpoint.pt](https://storage.googleapis.com/dm-tapnet/bootstap/bootstapir_checkpoint.pt).**
+
+**Online TAPIR is not yet supported.**
+
+---
+
 # Tracking Any Point (TAP)
 
 [[`TAP-Vid`](https://tapvid.github.io/)] [[`TAPIR`](https://deepmind-tapir.github.io/)] [[`RoboTAP`](https://robotap.github.io/)] [[`Blog Post`](https://deepmind-tapir.github.io/blogpost.html)] [[`BootsTAP`](https://arxiv.org/abs/2402.00847)]

diff --git a/torch/nets.py b/torch/nets.py
@@ -57,7 +57,7 @@ def forward(self, x):
     x = x.permute(0, 3, 1, 2)
     prev_frame = torch.cat([x[0:1], x[:-1]], dim=0)
     next_frame = torch.cat([x[1:], x[-1:]], dim=0)
-    resid = torch.cat([x, prev_frame, next_frame], axis=1)
+    resid = torch.cat([x, prev_frame, next_frame], dim=1)
     resid = self.conv(resid)
     resid = F.gelu(resid, approximate='tanh')
     x += self.conv_1(resid)
@@ -198,10 +198,20 @@ def forward(self, x):
     x = self.linear_1(x)
     return x
 
+class DummyModel:
+
+    def __init__(self):
+        pass
+
+    def forward(self):
+        return torch.tensor(0)
+
+    def __call__(self, input):
+        return self.forward()
 
 class BlockV2(nn.Module):
   """ResNet V2 block."""
-
+  
   def __init__(
       self,
       channels_in: int,
@@ -223,14 +233,16 @@ def __init__(
 
     self.use_projection = use_projection
     if self.use_projection:
-      self.proj_conv = nn.Conv2d(
+        self.proj_conv = nn.Conv2d(
           in_channels=channels_in,
           out_channels=channels_out,
           kernel_size=1,
           stride=stride,
           padding=0,
           bias=False,
-      )
+        )
+    else:
+        self.proj_conv = DummyModel()
 
     self.bn_0 = nn.InstanceNorm2d(
         num_features=channels_in,