Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ classifiers = [
]
dependencies = [
"timm>=1.0.17",
"numpy==1.26",
"numpy>=1.26",
"tqdm",
"ftfy==6.1.1",
"regex",
Expand Down Expand Up @@ -59,7 +59,7 @@ notebooks = [
"ipycanvas",
"ipympl",
"pycocotools",
"decord",
"decord2",
"opencv-python",
"einops",
"scikit-image",
Expand Down
20 changes: 14 additions & 6 deletions sam3/eval/postprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,13 @@ def _process_masks(self, target_sizes, pred_masks, consistent=True, keep=None):
if pred_masks is None:
return None
if self.always_interpolate_masks_on_gpu:
gpu_device = target_sizes.device
assert gpu_device.type == "cuda"
pred_masks = pred_masks.to(device=gpu_device)
device = target_sizes.device
if device.type == "cpu":
logging.warning(
"always_interpolate_masks_on_gpu=True but data is on CPU; "
"falling back to CPU interpolation"
)
pred_masks = pred_masks.to(device=device)
if consistent:
assert keep is None, "TODO: implement?"
# All masks should have the same shape, expected when processing a batch of size 1
Expand Down Expand Up @@ -454,9 +458,13 @@ def process_results(
] # [P,Q,...] --> [K,...]
meta_td = meta_td[tracked_obj_ids_idx[PROMPT_AXIS].cpu()]
if self.always_interpolate_masks_on_gpu:
gpu_device = meta_td["original_size"].device
assert gpu_device.type == "cuda"
tracked_objs_outs_td = tracked_objs_outs_td.to(device=gpu_device)
device = meta_td["original_size"].device
if device.type == "cpu":
logging.warning(
"always_interpolate_masks_on_gpu=True but data is on CPU; "
"falling back to CPU interpolation"
)
tracked_objs_outs_td = tracked_objs_outs_td.to(device=device)
frame_results_td = self(
tracked_objs_outs_td.unsqueeze(1),
(
Expand Down
3 changes: 2 additions & 1 deletion sam3/model/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,9 @@ def __init__(

if resolution is not None and stride is not None:
feat_size = resolution // stride
device = "cuda" if torch.cuda.is_available() else "cpu"
coords_h, coords_w = self._get_coords(
feat_size, feat_size, device="cuda"
feat_size, feat_size, device=device
)
self.compilable_cord_cache = (coords_h, coords_w)
self.compilable_stored_size = (feat_size, feat_size)
Expand Down
4 changes: 2 additions & 2 deletions sam3/model/geometry_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .act_ckpt_utils import activation_ckpt_wrapper
from .box_ops import box_cxcywh_to_xyxy

from .model_misc import get_clones
from .model_misc import get_clones, tensor_to_device


def is_right_padded(mask):
Expand Down Expand Up @@ -656,7 +656,7 @@ def _encode_boxes(self, boxes, boxes_mask, boxes_labels, img_feats):
# We need to denormalize, and convert to [x, y, x, y]
boxes_xyxy = box_cxcywh_to_xyxy(boxes)
scale = torch.tensor([W, H, W, H], dtype=boxes_xyxy.dtype)
scale = scale.pin_memory().to(device=boxes_xyxy.device, non_blocking=True)
scale = tensor_to_device(scale, boxes_xyxy.device)
scale = scale.view(1, 1, 4)
boxes_xyxy = boxes_xyxy * scale
sampled = torchvision.ops.roi_align(
Expand Down
Loading