Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Tiled rendering #866

Draft
wants to merge 3 commits into
base: og-develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions omnigibson/envs/vec_env_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from tqdm import trange

import omnigibson as og
from omnigibson.sensors import TiledCamera


class VectorEnvironment:
Expand All @@ -19,6 +20,8 @@ def __init__(self, num_envs, config):
for _ in trange(num_envs, desc="Loading environments")
]

self.tiled_camera = TiledCamera(modalities=["rgb", "depth"])

# Play, and finish loading all the envs
og.sim.play()
for env in self.envs:
Expand All @@ -29,19 +32,30 @@ def step(self, actions):
for i, action in enumerate(actions):
self.envs[i]._pre_step(action)
og.sim.step()

tiled_buffer = self.tiled_camera.get_obs()

rgb_tile = tiled_buffer["rgb"].cpu().numpy()
depth_tile = tiled_buffer["depth"].cpu().numpy()

for i, action in enumerate(actions):
# TODO: ignore camera observation here
# TODO: potentially, we could get the tiled image first, segment it, and then replace all the normal camera observations with the segmented tiled image
obs, reward, terminated, truncated, info = self.envs[i]._post_step(action)
observations.append(obs)
rewards.append(reward)
terminates.append(terminated)
truncates.append(truncated)
infos.append(info)

return observations, rewards, terminates, truncates, infos

def reset(self):
for env in self.envs:
env.reset()

# TODO: reset tiled rendering camera

def close(self):
pass

Expand Down
1 change: 1 addition & 0 deletions omnigibson/sensors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from omnigibson.sensors.scan_sensor import ScanSensor
from omnigibson.sensors.sensor_base import ALL_SENSOR_MODALITIES, REGISTERED_SENSORS, BaseSensor
from omnigibson.sensors.sensor_noise_base import REGISTERED_SENSOR_NOISES, BaseSensorNoise
from omnigibson.sensors.tiled_camera import TiledCamera
from omnigibson.sensors.vision_sensor import VisionSensor
from omnigibson.utils.python_utils import assert_valid_key

Expand Down
96 changes: 96 additions & 0 deletions omnigibson/sensors/tiled_camera.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import math

import torch as th

import omnigibson.lazy as lazy
from omnigibson.sensors.vision_sensor import VisionSensor


class TiledCamera:
"""
Args:
modalities (list of str): Modality(s) supported by this sensor. Default is "rgb", can also include "depth".
"""

def __init__(
self,
modalities=["rgb"],
):
self.modalities = modalities
self._camera_resolution = None
camera_prim_paths = []
for sensor in VisionSensor.SENSORS.values():
if self._camera_resolution == None:
self._camera_resolution = (sensor.image_width, sensor.image_height)
else:
assert self._camera_resolution == (
sensor.image_width,
sensor.image_height,
), "All cameras must have the same resolution!"
camera_prim_paths.append(sensor.prim_path)
stage = lazy.omni.usd.get_context().get_stage()
self._camera_prims = []
for path in camera_prim_paths:
camera_prim = stage.GetPrimAtPath(path)
self._camera_prims.append(lazy.pxr.UsdGeom.Camera(camera_prim))
tiled_camera = lazy.omni.replicator.core.create.tiled_sensor(
cameras=camera_prim_paths,
camera_resolution=self._camera_resolution,
tiled_resolution=self._tiled_img_shape(),
output_types=self.modalities,
)
self._render_product_path = lazy.omni.replicator.core.create.render_product(
camera=tiled_camera, resolution=self._tiled_img_shape()
)
self._annotator = lazy.omni.replicator.core.AnnotatorRegistry.get_annotator(
"RtxSensorGpu", device="cuda:0", do_array_copy=False
)
self._annotator.attach([self._render_product_path])

self._output_buffer = dict()
if "rgb" in self.modalities:
self._output_buffer["rgb"] = th.zeros(
(self._camera_count(), self._camera_resolution[1], self._camera_resolution[0], 3), device="cuda:0"
).contiguous()
if "depth" in self.modalities:
self._output_buffer["depth"] = th.zeros(
(self._camera_count(), self._camera_resolution[1], self._camera_resolution[0], 1), device="cuda:0"
).contiguous()

super().__init__()

def _camera_count(self):
return len(self._camera_prims)

def _tiled_grid_shape(self):
cols = round(math.sqrt(self._camera_count()))
rows = math.ceil(self._camera_count() / cols)
return (cols, rows)

def _tiled_img_shape(self):
cols, rows = self._tiled_grid_shape()
width, height = self._camera_resolution
return (width * cols, height * rows)

def get_obs(self):
# TODO: somehow isaac 4.1.0 introduced a bug: this always return a warp array on cpu instead of gpu, even when explicitly specifying device="cuda:0"
tiled_data = self._annotator.get_data().to(device="cuda:0")
breakpoint()
from omnigibson.utils.deprecated_utils import reshape_tiled_image

for modality in self.modalities:
lazy.warp.launch(
kernel=reshape_tiled_image,
dim=(self._camera_count(), self._camera_resolution[1], self._camera_resolution[0]),
inputs=[
tiled_data,
lazy.warp.from_torch(self._output_buffer[modality]), # zero-copy alias
*list(self._output_buffer[modality].shape[1:]), # height, width, num_channels
self._tiled_grid_shape()[0], # num_tiles_x
(
self._output_buffer["rgb"].numel() if "depth" in self.modalities else 0
), # rgb always comes first; needs an offset for depth
],
device="cuda:0",
)
return self._output_buffer
44 changes: 44 additions & 0 deletions omnigibson/utils/deprecated_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,3 +1029,47 @@ def get_world_pose(fabric_prim):
result_transform.Orthonormalize()
result_transform = np.transpose(result_transform)
return result_transform[:3, 3], R.from_matrix(result_transform[:3, :3]).as_quat()


@wp.kernel
def reshape_tiled_image(
tiled_image_buffer: wp.array(dtype=float),
batched_image: wp.array(dtype=float, ndim=4),
image_height: int,
image_width: int,
num_channels: int,
num_tiles_x: int,
offset: int,
):
"""Reshapes a tiled image into a batch of images.

This function reshapes the input tiled image buffer into a batch of images. The input image buffer
is assumed to be tiled in the x and y directions. The output image is a batch of images with the
specified height, width, and number of channels.

Args:
tiled_image_buffer: The input image buffer. Shape is (height * width * num_channels * num_cameras,).
batched_image: The output image. Shape is (num_cameras, height, width, num_channels).
image_width: The width of the image.
image_height: The height of the image.
num_channels: The number of channels in the image.
num_tiles_x: The number of tiles in x-direction.
offset: The offset in the image buffer. This is used when multiple image types are concatenated in the buffer.
"""
# get the thread id
camera_id, height_id, width_id = wp.tid()

# resolve the tile indices
tile_x_id = camera_id % num_tiles_x
tile_y_id = camera_id // num_tiles_x
# compute the start index of the pixel in the tiled image buffer
pixel_start = (
offset
+ num_channels * num_tiles_x * image_width * (image_height * tile_y_id + height_id)
+ num_channels * tile_x_id * image_width
+ num_channels * width_id
)

# copy the pixel values into the batched image
for i in range(num_channels):
batched_image[camera_id, height_id, width_id, i] = tiled_image_buffer[pixel_start + i]
Loading