diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
index c554cd38d8..526dec7353 100644
--- a/applications/CMakeLists.txt
+++ b/applications/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,6 +25,9 @@ add_holohub_application(async_buffer_deadline)
 add_holohub_application(basic_networking_ping DEPENDS
                         OPERATORS basic_network)
 
+add_holohub_application(bci_visualization DEPENDS
+                        OPERATORS volume_renderer)
+
 add_holohub_application(body_pose_estimation DEPENDS
                         OPERATORS OPTIONAL dds_video_subscriber dds_video_publisher)
 
diff --git a/applications/bci_visualization/CMakeLists.txt b/applications/bci_visualization/CMakeLists.txt
new file mode 100644
index 0000000000..d1ee218194
--- /dev/null
+++ b/applications/bci_visualization/CMakeLists.txt
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.20)
+project(bci_visualization LANGUAGES NONE)
+
+find_package(holoscan 2.0 REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+add_subdirectory(operators)
diff --git a/applications/bci_visualization/Dockerfile b/applications/bci_visualization/Dockerfile
new file mode 100644
index 0000000000..8b744b701d
--- /dev/null
+++ b/applications/bci_visualization/Dockerfile
@@ -0,0 +1,32 @@
+# syntax=docker/dockerfile:1
+
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE} AS base
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Install curl for downloading data files
+RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
+
+ENV HOLOSCAN_INPUT_PATH=/workspace/holohub/data/bci_visualization
+
+# Install Python dependencies
+COPY applications/bci_visualization/requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt --no-cache-dir
+    
\ No newline at end of file
diff --git a/applications/bci_visualization/README.md b/applications/bci_visualization/README.md
new file mode 100644
index 0000000000..d8d4020378
--- /dev/null
+++ b/applications/bci_visualization/README.md
@@ -0,0 +1,234 @@
+# Kernel Flow BCI Real-Time Reconstruction and Visualization
+
+<p align="center">
+  <img src="docs/brain_activity_example.gif" alt="Example output for BCI Visualization" width="400"><br>
+  <em>Example 3D visualization</em>
+</p>
+
+## Overview
+
+This Holohub application demonstrates how to perform real-time source reconstruction and visualization of streaming functional brain data from the Kernel Flow 2 system. The application was developed and tested on an NVIDIA Jetson Thor paired with a Kernel Flow 2 headset. To lower the barrier to entry, we also provide recorded datasets and a data replayer, enabling developers to build and experiment with visualization and classification pipelines within the Holoscan framework without requiring access to the hardware.
+
+This example processes streaming [moments from the distribution of time-of-flight histograms](https://doi.org/10.1117/1.NPh.10.1.013504). These moments can originate either from the [Kernel Flow SDK](https://docs.kernel.com/docs/kernel-sdk-install) when connected to the Kernel hardware, or from the included [shared near-infrared spectroscopy format (SNIRF)](https://github.com/fNIRS/snirf) replayer. The moments are then combined with the sensors' spatial geometry and an average anatomical head model to produce source-reconstructed outputs similar to what was [published in previous work](https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00475/127769/A-Compact-Time-Domain-Diffuse-Optical-Tomography). 
+
+To visualize the reconstructed 3D volumes, this application utilizes both the [VolumeRendererOp](../../operators/volume_renderer/) operator and HolovizOp for real-time 3D rendering and interactive visualization.
+
+For optimal efficiency and smooth user experience, we employ an event-based scheduler that decouples the reconstruction and visualization pipelines. This allows each stage to run on separate threads, resulting in higher rendering quality and more responsive interaction.
+
+## Background
+
+Kernel Flow is a multimodal non-invasive brain measurement system. It combines the relatively high resolution of time-domain functional near-infrared spectroscopy (TD-fNIRS) with the fast temporal resolution of electroencephalography (EEG)
+into a compact and scalable form factor that enables a new class of non-invasive Brain-Computer Interface (BCI) applications. 
+
+The differentiating technology underlying the performance of the Kernel Flow system is the time-resolved detectors and high-speed laser drivers. Short (~100ps) pulses of near-infrared laser light (690nm & 905nm) are emitted into the user's head with a repetition rate of 20 MHz. The photons in these laser pulses scatter through the scalp, skull, and cerebrospinal fluid before reaching the brain and then scattering back out. When the photons emerge from
+the scalp, we use single-photon sensitive detectors to timestamp exactly how much time the photon took to traverse through the head. The amount of time photons take to reach the detector is proportional to the path length traveled by the photon and the average depth it was able to reach. 
+
+This simulation shows the relationship between photon scattering paths (black lines) and the measured time of flight (blue sections).  
+
+<p align="center">
+  <img src="docs/photon_simulation.gif" alt="Monte Carlo simulation of photon scattering events." width="400"><br>
+  <em>The relationship between photon path lengths and measured time</em>
+</p>
+
+As you can see, later times correspond to photons that have travelled farther into the tissue. In a given second, we are timestamping over 10 billion individual photons, which generates an enormous amount of data. After compression, the data production rate of Kernel Flow is ~1GB/min.
+
+As the photons scatter through the tissue, many of the photons are absorbed by cells and molecules in the tissue. In particular, the wavelengths we use are particularly sensitive to hemoglobin and its two states: oxyhemoglobin and deoxyhemoglobin, which allow us to follow the locations in the brain that are demanding and consuming oxygen and is an indirect measure of neuronal activity. These same biophysical principles are behind the pulse oximeters that are found in smart watches and finger-clip sensors! For more detailed information about the biophysics, [see this review article](https://www.mdpi.com/2076-3417/9/8/1612).
+
+With the Kernel Flow headset we have combined 120 laser sources and 240 of our custom sensors to collect over 3000 measurement paths that criss-cross the head with a frame rate of 4.75Hz. When visualized, these paths resemble this:
+
+<p align="center">
+  <img src="docs/flow_channel_map.png" alt="Flow's 3000+ channels" width="400"><br>
+  <em>The 3000+ measurements that are made with a Kernel Flow</em>
+</p>
+
+We call each of these measurement paths a "channel" and the measurement is made in "sensor space" (i.e. from the perspective of the detector). In order to have a more anatomical representation of the data, it is common to transform the
+sensor-space data into source-space (i.e. where the changes in hemoglobin concentrations likely occurred in the brain, based on what was observed at the sensor) by solving an inverse problem, commonly called source reconstruction. This inverse problem requires complex modeling that is computationally expensive but highly parallelizable. 
+
+In this Holohub application, we demonstrate a real-time source reconstruction pipeline that runs on a Jetson Thor at the native framerate of the Kernel Flow system (4.75 Hz) and visualizes the 3D data using volume rendering techniques.
+
+## Requirements
+
+This application was developed to run on an NVIDIA Jetson Thor Developer kit. Any Holoscan SDK supported platform should work. 
+
+To run the application you need a streaming Kernel Flow data source. This can be either:
+    
+    * Kernel Flow hardware and SDK
+    * Recorded `.snirf` files running with our data replayer.
+    
+## Quick Start
+
+### 1. Download Required Data
+
+The required data can be found on [Hugging Face](https://huggingface.co/datasets/KernelCo/holohub_bci_visualization/tree/main). The dataset includes:
+- **SNIRF data file** (`data.snirf`): Recorded brain activity measurements. More recorded snirf file can be found on [OpenNeuro](https://openneuro.org/datasets/ds006545).
+- **Anatomy masks** (`anatomy_labels_high_res.nii.gz`): Brain tissue segmentation (skin, skull, CSF, gray matter, white matter)
+- **Reconstruction matrices**: Pre-computed Jacobian and voxel information
+
+To prepare the required data for this application, download the dataset into `holohub/data/bci_visualization` as described in the [expected data folder structure](#expected-data-folder-structure).
+
+From the `holohub` directory, use the following command:
+```
+hf download KernelCo/holohub_bci_visualization --repo-type dataset --local-dir data/bci_visualization
+```
+
+
+### Expected Data Folder Structure
+
+Download the correct folder matching your device type along with the other files into `data/bci_visualization`. Your `data/bci_visualization` folder should have the following structure:
+
+```
+holohub/data/bci_visualization/
+├── anatomy_labels_high_res.nii.gz      # Brain segmentation
+├── data.snirf                          # SNIRF format brain activity data
+├── extinction_coefficients_mua.csv     # Absorption coefficients for HbO/HbR
+├── flow_channel_map.json               # Sensor-source channel mapping
+├── flow_mega_jacobian.npy              # Pre-computed sensitivity matrix (channels → voxels)
+└── voxel_info/                         # Voxel geometry and optical properties
+    ├── affine.npy                      # 4x4 affine transformation matrix
+    ├── idxs_significant_voxels.npy     # Indices of voxels with sufficient sensitivity
+    ├── ijk.npy                         # Voxel coordinates in volume space
+    ├── mua.npy                         # Absorption coefficient per voxel
+    ├── musp.npy                        # Reduced scattering coefficient per voxel
+    ├── resolution.npy                  # Voxel resolution (mm)
+    ├── wavelengths.npy                 # Measurement wavelengths (690nm, 905nm)
+    └── xyz.npy                         # Voxel coordinates in anatomical space (mm)
+```
+
+### 2. Run the Application
+```bash
+./holohub run bci_visualization
+```
+
+## Pipeline Overview
+
+The application consists of two main pipelines running on separate threads:
+
+### Reconstruction Pipeline
+Transforms sensor-space measurements into 3D brain activity maps:
+
+```mermaid
+graph LR
+    A[SNIRF Stream] --> B[Stream Operator]
+    B --> C[Build RHS]
+    C --> D[Normalize]
+    D --> E[Regularized Solver]
+    E --> F[Convert to Voxels]
+    F --> G[Voxel to Volume]
+    
+    style A fill:#e1f5ff
+    style G fill:#ffe1f5
+```
+
+**Key Steps:**
+1. **Stream Operator**: Reads SNIRF data and emits time-of-flight moments
+2. **Build RHS**: Constructs the right-hand side of the inverse problem from the streaming moments taking into account the channel mapping
+3. **Normalize**: Normalizes the moments to hard-coded upper bounds
+4. **Regularized Solver**: Solves the inverse problem
+5. **Convert to Voxels**: Maps solution to 3D voxel coordinates with HbO/HbR conversion
+6. **Voxel to Volume**: Resamples to match anatomy mask, applies adaptive normalization
+
+### Visualization Pipeline
+Renders 3D brain volumes with real-time interaction:
+
+```mermaid
+graph LR
+    G[Voxel to Volume] --> H[Volume Renderer]
+    H --> I[Color Buffer Passthrough]
+    I --> J[HolovizOp]
+    J --> H
+    
+    style G fill:#ffe1f5
+    style J fill:#e1ffe1
+```
+
+**Key Steps:**
+1. **Volume Renderer**: GPU-accelerated ray-casting with ClaraViz (tissue segmentation + activation overlay)
+2. **Color Buffer Passthrough**: Queue management with POP policy to prevent frame stacking
+3. **HolovizOp**: Interactive 3D display with camera controls (bidirectional camera pose feedback)
+
+## Volume Renderer Configuration
+
+The `config.json` file in the data folder configures the ClaraViz volume renderer. For detailed documentation, see the [VolumeRenderer operator documentation](../../operators/volume_renderer/) and [ClaraViz proto definitions](https://github.com/NVIDIA/clara-viz/blob/main/src/protos/nvidia/claraviz/cinematic/v1/render_server.proto).
+
+### Key Configuration Parameters
+
+#### 1. Rendering Quality
+```json
+{
+  "timeSlot": 100
+}
+```
+- **`timeSlot`** (milliseconds): Rendering time budget per frame
+  - Higher values = better quality
+  - Lower values = faster rendering
+
+#### 2. Transfer Functions
+The transfer function maps voxel values to colors and opacity. This application uses **three components**.
+
+##### Component 1: Brain Tissue Base (Gray/White Matter)
+```json
+{
+  "activeRegions": [3, 4],
+  "range": { "min": 0, "max": 1 },
+  "opacity": 0.5,
+  "opacityProfile": "SQUARE",
+  "diffuseStart": { "x": 1, "y": 1, "z": 1 },
+  "diffuseEnd": { "x": 1, "y": 1, "z": 1 }
+}
+```
+- **`activeRegions`**: Tissue types to render
+  - `0`: Skin, `1`: Skull, `2`: CSF, `3`: Gray matter, `4`: White matter, `5`: Air
+  - Here: `[3, 4]` = gray and white matter only
+- **`range`**: `[0, 1]` = full normalized value range
+- **`opacity`**: `0.5` = semi-transparent base layer
+- **`opacityProfile`**: `"SQUARE"` = constant opacity throughout range
+- **`diffuseStart/End`**: `[1, 1, 1]` = white base color
+
+##### Component 2: Negative Activation / Deactivation (Blue)
+```json
+{
+  "activeRegions": [3, 4],
+  "range": { "min": 0, "max": 0.4 },
+  "opacity": 1.0,
+  "opacityProfile": "SQUARE",
+  "diffuseStart": { "x": 0.0, "y": 0.0, "z": 1.0 },
+  "diffuseEnd": { "x": 0.0, "y": 0.0, "z": 0.5 }
+}
+```
+- **`range`**: `[0, 0.4]` = lower 40% of normalized range (deactivation)
+- **`opacity`**: `1.0` = fully opaque
+- **`opacityProfile`**: `"SQUARE"` = constant opacity
+- **`diffuseStart/End`**: `[0, 0, 1]` → `[0, 0, 0.5]` = bright blue to dark blue gradient
+
+##### Component 3: Positive Activation (Red)
+```json
+{
+  "activeRegions": [3, 4],
+  "range": { "min": 0.6, "max": 1 },
+  "opacity": 1.0,
+  "opacityProfile": "SQUARE",
+  "diffuseStart": { "x": 0.5, "y": 0.0, "z": 0.0 },
+  "diffuseEnd": { "x": 1.0, "y": 0.0, "z": 0.0 }
+}
+```
+- **`range`**: `[0.6, 1]` = upper 40% of normalized range (activation)
+- **`opacity`**: `1.0` = fully opaque
+- **`opacityProfile`**: `"SQUARE"` = constant opacity
+- **`diffuseStart/End`**: `[0.5, 0, 0]` → `[1, 0, 0]` = dark red to bright red gradient
+
+#### 3. Blending
+```json
+{
+  "blendingProfile": "BLENDED_OPACITY"
+}
+```
+- **`blendingProfile`**: How overlapping components combine
+  
+### Visualization Strategy
+
+The three-component approach creates a layered visualization:
+
+1. **Base layer** (white, 50% opacity): Shows overall brain structure (gray + white matter) throughout the full range [0, 1]
+2. **Blue overlay** (100% opacity): Highlights low values [0, 0.4] representing decreased hemoglobin.
+3. **Red overlay** (100% opacity): Highlights high values [0.6, 1] representing increased hemoglobin.
+4. **Neutral range** [0.4, 0.6]: Only shows the white base layer (no significant change)
\ No newline at end of file
diff --git a/applications/bci_visualization/bci_visualization.py b/applications/bci_visualization/bci_visualization.py
new file mode 100644
index 0000000000..257130e282
--- /dev/null
+++ b/applications/bci_visualization/bci_visualization.py
@@ -0,0 +1,280 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
+SPDX-License-Identifier: Apache-2.0
+
+BCI Visualization Application - streams synthetic voxel data and renders as 3D volume.
+"""
+
+import argparse
+import os
+from pathlib import Path
+
+from holoscan.core import Application, ConditionType
+from holoscan.operators import HolovizOp
+from holoscan.resources import CudaStreamPool, UnboundedAllocator
+from holoscan.schedulers import EventBasedScheduler
+from operators.reconstruction import (
+    BuildRHSOperator,
+    ConvertToVoxelsOperator,
+    NormalizeOperator,
+    RegularizedSolverOperator,
+)
+from operators.stream import StreamOperator
+from operators.voxel_stream_to_volume import VoxelStreamToVolumeOp
+from streams.base_nirs import BaseNirsStream
+from streams.snirf import SNIRFStream
+from utils.reconstruction.assets import get_assets
+
+from holohub.color_buffer_passthrough import ColorBufferPassthroughOp
+from holohub.volume_renderer import VolumeRendererOp
+
+
+class BciVisualizationApp(Application):
+    """Kernel Flow BCI Real-Time Reconstruction and Visualization
+
+    This application integrates the full pipeline from NIRS data streaming through
+    reconstruction to 3D volume rendering and visualization.
+    """
+
+    def __init__(
+        self,
+        argv=None,
+        *args,
+        render_config_file,
+        stream: BaseNirsStream,
+        jacobian_path: Path | str,
+        channel_mapping_path: Path | str,
+        voxel_info_dir: Path | str,
+        coefficients_path: Path | str,
+        mask_path=None,
+        reg: float = RegularizedSolverOperator.REG_DEFAULT,
+        **kwargs,
+    ):
+        self._rendering_config = render_config_file
+        self._mask_path = mask_path
+
+        # Reconstruction pipeline parameters
+        self._stream = stream
+        self._reg = reg
+        self._jacobian_path = Path(jacobian_path)
+        self._channel_mapping_path = Path(channel_mapping_path)
+        self._coefficients_path = Path(coefficients_path)
+        self._voxel_info_dir = Path(voxel_info_dir)
+
+        super().__init__(argv, *args, **kwargs)
+
+    def compose(self):
+        # Resources
+        volume_allocator = UnboundedAllocator(self, name="allocator")
+        cuda_stream_pool = CudaStreamPool(
+            self,
+            name="cuda_stream",
+            dev_id=0,
+            stream_flags=0,
+            stream_priority=0,
+            reserved_size=1,
+            max_size=5,
+        )
+
+        # Get reconstruction pipeline assets
+        pipeline_assets = get_assets(
+            jacobian_path=self._jacobian_path,
+            channel_mapping_path=self._channel_mapping_path,
+            voxel_info_dir=self._voxel_info_dir,
+            coefficients_path=self._coefficients_path,
+        )
+
+        # ========== Reconstruction Pipeline Operators ==========
+        stream_operator = StreamOperator(stream=self._stream, fragment=self)
+
+        build_rhs_operator = BuildRHSOperator(
+            assets=pipeline_assets,
+            fragment=self,
+        )
+
+        normalize_operator = NormalizeOperator(
+            fragment=self,
+        )
+
+        regularized_solver_operator = RegularizedSolverOperator(
+            reg=self._reg,
+            fragment=self,
+        )
+
+        convert_to_voxels_operator = ConvertToVoxelsOperator(
+            fragment=self,
+            coefficients=pipeline_assets.extinction_coefficients,
+            ijk=pipeline_assets.ijk,
+            xyz=pipeline_assets.xyz,
+        )
+
+        # ========== Visualization Pipeline Operators ==========
+        # Get volume_renderer kwargs from YAML config to extract density range
+        volume_renderer_kwargs = self.kwargs("volume_renderer")
+        density_min = volume_renderer_kwargs.get("density_min", -100.0)
+        density_max = volume_renderer_kwargs.get("density_max", 100.0)
+
+        voxel_to_volume = VoxelStreamToVolumeOp(
+            self,
+            name="voxel_to_volume",
+            pool=volume_allocator,
+            mask_nifti_path=self._mask_path,
+            density_min=density_min,
+            density_max=density_max,
+            **self.kwargs("voxel_stream_to_volume"),
+        )
+
+        volume_renderer = VolumeRendererOp(
+            self,
+            name="volume_renderer",
+            config_file=self._rendering_config,
+            allocator=volume_allocator,
+            cuda_stream_pool=cuda_stream_pool,
+            **volume_renderer_kwargs,
+        )
+
+        # IMPORTANT changes to avoid deadlocks of volume_renderer and holoviz
+        # when running in multi-threading mode
+        # 1. Set the output port condition to NONE to remove backpressure
+        volume_renderer.spec.outputs["color_buffer_out"].condition(ConditionType.NONE)
+        # 2. Use a passthrough operator to configure queue policy as POP to use latest frame
+        color_buffer_passthrough = ColorBufferPassthroughOp(
+            self,
+            name="color_buffer_passthrough",
+        )
+
+        holoviz = HolovizOp(
+            self,
+            name="holoviz",
+            window_title="Kernel Flow BCI Real-Time Reconstruction and Visualization",
+            enable_camera_pose_output=True,
+            cuda_stream_pool=cuda_stream_pool,
+        )
+
+        # ========== Connect Reconstruction Pipeline ==========
+        self.add_flow(
+            stream_operator,
+            build_rhs_operator,
+            {
+                ("samples", "moments"),
+            },
+        )
+        self.add_flow(
+            build_rhs_operator,
+            normalize_operator,
+            {
+                ("batch", "batch"),
+            },
+        )
+        self.add_flow(
+            normalize_operator,
+            regularized_solver_operator,
+            {
+                ("normalized", "batch"),
+            },
+        )
+        self.add_flow(
+            regularized_solver_operator,
+            convert_to_voxels_operator,
+            {
+                ("result", "result"),
+            },
+        )
+        self.add_flow(
+            convert_to_voxels_operator,
+            voxel_to_volume,
+            {
+                ("affine_4x4", "affine_4x4"),
+                ("hb_voxel_data", "hb_voxel_data"),
+            },
+        )
+
+        # ========== Connect Visualization Pipeline ==========
+        # voxel_to_volume → volume_renderer
+        self.add_flow(
+            voxel_to_volume,
+            volume_renderer,
+            {
+                ("volume", "density_volume"),
+                ("spacing", "density_spacing"),
+                ("permute_axis", "density_permute_axis"),
+                ("flip_axes", "density_flip_axes"),
+            },
+        )
+        # Add mask connections to VolumeRendererOp
+        self.add_flow(
+            voxel_to_volume,
+            volume_renderer,
+            {
+                ("mask_volume", "mask_volume"),
+                ("mask_spacing", "mask_spacing"),
+                ("mask_permute_axis", "mask_permute_axis"),
+                ("mask_flip_axes", "mask_flip_axes"),
+            },
+        )
+
+        # volume_renderer ↔ holoviz
+        self.add_flow(
+            volume_renderer, color_buffer_passthrough, {("color_buffer_out", "color_buffer_in")}
+        )
+        self.add_flow(color_buffer_passthrough, holoviz, {("color_buffer_out", "receivers")})
+        self.add_flow(holoviz, volume_renderer, {("camera_pose_output", "camera_pose")})
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Kernel Flow BCI Real-Time Reconstruction and Visualization", add_help=False
+    )
+
+    parser.add_argument(
+        "-c",
+        "--renderer_config",
+        action="store",
+        dest="renderer_config",
+        help="Path to the renderer JSON configuration file to load",
+    )
+
+    parser.add_argument(
+        "-m",
+        "--mask_path",
+        action="store",
+        type=str,
+        dest="mask_path",
+        help="Path to the mask NIfTI file containing 3D integer labels (I, J, K). Optional.",
+    )
+
+    parser.add_argument(
+        "-h", "--help", action="help", default=argparse.SUPPRESS, help="Help message"
+    )
+
+    args = parser.parse_args()
+
+    # Setup data paths
+    default_data_path = os.path.join(os.getcwd(), "data/bci_visualization")
+    kernel_data = Path(os.environ.get("HOLOSCAN_INPUT_PATH", default_data_path))
+
+    stream = SNIRFStream(kernel_data / "data.snirf")
+    app = BciVisualizationApp(
+        render_config_file=args.renderer_config,
+        stream=stream,
+        jacobian_path=kernel_data / "flow_mega_jacobian.npy",
+        channel_mapping_path=kernel_data / "flow_channel_map.json",
+        voxel_info_dir=kernel_data / "voxel_info",
+        coefficients_path=kernel_data / "extinction_coefficients_mua.csv",
+        mask_path=args.mask_path,
+        reg=RegularizedSolverOperator.REG_DEFAULT,
+    )
+
+    # Load YAML configuration
+    config_file = os.path.join(os.path.dirname(__file__), "bci_visualization.yaml")
+
+    app.config(config_file)
+    app.scheduler(EventBasedScheduler(app, worker_thread_number=5, stop_on_deadlock=True))
+
+    app.run()
+
+    print("App has finished running.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/applications/bci_visualization/bci_visualization.yaml b/applications/bci_visualization/bci_visualization.yaml
new file mode 100644
index 0000000000..c75ffcdd1d
--- /dev/null
+++ b/applications/bci_visualization/bci_visualization.yaml
@@ -0,0 +1,12 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+---
+voxel_stream_to_volume:
+  visualization_scale: 10.0
+
+volume_renderer:
+  alloc_width: 1024
+  alloc_height: 768
+  density_min: -100
+  density_max: 100
\ No newline at end of file
diff --git a/applications/bci_visualization/config.json b/applications/bci_visualization/config.json
new file mode 100644
index 0000000000..18e9632af2
--- /dev/null
+++ b/applications/bci_visualization/config.json
@@ -0,0 +1,264 @@
+{
+    "BackgroundLight": {
+     "bottomColor": {
+      "x": 0.0,
+      "y": 0.0,
+      "z": 0.0
+     },
+     "castLight": false,
+     "enable": false,
+     "horizonColor": {
+      "x": 0.0,
+      "y": 0.0,
+      "z": 0.0
+     },
+     "intensity": 1,
+     "show": true,
+     "topColor": {
+      "x": 0.0,
+      "y": 0.0,
+      "z": 0.0
+     }
+    },
+    "Camera": {
+     "eye": {
+      "x": -0.26613375709170434,
+      "y": 0.18896026836330537,
+      "z": 0.12506341747625926
+     },
+     "fieldOfView": 55,
+     "lookAt": {
+      "x": 0.00025000300956889987,
+      "y": 0.0002500034752301872,
+      "z": 0.0002499971888028085
+     },
+     "pixelAspectRatio": 1,
+     "up": {
+      "x": 0.3602252996858582,
+      "y": 0.8121067525713994,
+      "z": -0.4590428693425827
+     }
+    },
+    "CameraAperture": {
+     "autoFocus": true,
+     "focusDistance": 1
+    },
+    "Light": [
+     {
+      "color": {
+       "x": 1.0,
+       "y": 1.0,
+       "z": 1.0
+      },
+      "direction": {
+       "x": -0.15643446504023087,
+       "y": -0.9876883405951378,
+       "z": 0.0
+      },
+      "enable": true,
+      "index": 0,
+      "intensity": 3.630780547701013,
+      "position": {
+       "x": 0.15643446504023087,
+       "y": 0.9876883405951378,
+       "z": 0.0
+      },
+      "show": false,
+      "size": 1.0
+     },
+     {
+        "color": {
+            "x": 1,
+            "y": 1,
+            "z": 1
+        },
+        "direction": {
+            "x": 0.0,
+            "y": 0.0,
+            "z": -1.0
+        },
+        "position": {
+            "x": 0.0,
+            "y": 0.0,
+            "z": 1.0
+        },
+        "intensity": 1,
+        "index": 1,
+        "enable": true,
+        "show": false,
+        "size": 1
+    }
+    ],
+    "PostProcessDenoise": {
+     "depthWeight": 3,
+     "enableIterationLimit": false,
+     "iterationLimit": 40,
+     "method": "KNN",
+     "noiseThreshold": 0.2,
+     "radius": 1,
+     "spatialWeight": 0.05
+    },
+    "PostProcessTonemap": {
+     "enable": true,
+     "exposure": 0.5
+    },
+    "RenderSettings": {
+     "enableFoveation": false,
+     "enableReproject": false,
+     "enableWarp": false,
+     "interpolationMode": "CATMULLROM",
+     "maxIterations": 10000,
+     "shadowStepSize": 1,
+     "stepSize": 1,
+     "timeSlot": 100,
+     "warpFullResolutionSize": 0.3,
+     "warpResolutionScale": 0.3
+    },
+    "TransferFunction": {
+     "blendingProfile": "BLENDED_OPACITY",
+     "components": [
+        {
+            "activeRegions": [3, 4],
+            "diffuseEnd": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "diffuseStart": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "specularEnd": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "specularStart": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "emissiveEnd": {
+                "x": 0,
+                "y": 0,
+                "z": 0
+            },
+            "emissiveStart": {
+                "x": 0,
+                "y": 0,
+                "z": 0
+            },
+            "emissiveStrength": 0,
+            "opacity": 0.5,
+            "opacityProfile": "SQUARE",
+            "opacityTransition": 0.2,
+            "range": {
+                "min": 0,
+                "max": 1
+            },
+            "roughness": 0
+        },
+        {
+            "activeRegions": [3, 4],
+            "diffuseEnd": {
+                "x": 0.0,
+                "y": 0.0,
+                "z": 0.5
+            },
+            "diffuseStart": {
+                "x": 0.0,
+                "y": 0.0,
+                "z": 1.0
+            },
+            "specularEnd": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "specularStart": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "emissiveEnd": {
+                "x": 0,
+                "y": 0,
+                "z": 0
+            },
+            "emissiveStart": {
+                "x": 0,
+                "y": 0,
+                "z": 0
+            },
+            "emissiveStrength": 0,
+            "opacity": 1.0,
+            "opacityProfile": "SQUARE",
+            "opacityTransition": 0.2,
+            "range": {
+                "min": 0,
+                "max": 0.4
+            },
+            "roughness": 0
+        },
+        {
+            "activeRegions": [3, 4],
+            "diffuseEnd": {
+                "x": 1.0,
+                "y": 0.0,
+                "z": 0.0
+            },
+            "diffuseStart": {
+                "x": 0.5,
+                "y": 0.0,
+                "z": 0.0
+            },
+            "specularEnd": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "specularStart": {
+                "x": 1,
+                "y": 1,
+                "z": 1
+            },
+            "emissiveEnd": {
+                "x": 0,
+                "y": 0,
+                "z": 0
+            },
+            "emissiveStart": {
+                "x": 0,
+                "y": 0,
+                "z": 0
+            },
+            "emissiveStrength": 0,
+            "opacity": 1.0,
+            "opacityProfile": "SQUARE",
+            "opacityTransition": 0.2,
+            "range": {
+                "min": 0.6,
+                "max": 1
+            },
+            "roughness": 0
+        }
+    ],
+     "densityScale": 1000,
+     "globalOpacity": 1,
+     "gradientScale": 10,
+     "shadingProfile": "HYBRID"
+    },
+    "VolumeCrop": {
+     "max": {
+      "x": 1,
+      "y": 1,
+      "z": 1
+     },
+     "min": {
+      "x": 0,
+      "y": 0,
+      "z": 0
+     }
+    }
+}
\ No newline at end of file
diff --git a/applications/bci_visualization/docs/brain_activity_example.gif b/applications/bci_visualization/docs/brain_activity_example.gif
new file mode 100644
index 0000000000..0ca609f4b4
Binary files /dev/null and b/applications/bci_visualization/docs/brain_activity_example.gif differ
diff --git a/applications/bci_visualization/docs/flow_channel_map.png b/applications/bci_visualization/docs/flow_channel_map.png
new file mode 100644
index 0000000000..6a371669b6
Binary files /dev/null and b/applications/bci_visualization/docs/flow_channel_map.png differ
diff --git a/applications/bci_visualization/docs/photon_simulation.gif b/applications/bci_visualization/docs/photon_simulation.gif
new file mode 100644
index 0000000000..7bea20e862
Binary files /dev/null and b/applications/bci_visualization/docs/photon_simulation.gif differ
diff --git a/applications/bci_visualization/metadata.json b/applications/bci_visualization/metadata.json
new file mode 100644
index 0000000000..c5e45045ce
--- /dev/null
+++ b/applications/bci_visualization/metadata.json
@@ -0,0 +1,77 @@
+{
+	"application": {
+		"name": "Kernel Flow BCI Real-Time Reconstruction and Visualization",
+		"authors": [
+			{
+				"name": "Holoscan Team",
+				"affiliation": "NVIDIA"
+			},
+			{
+				"name": "Kernel Team",
+				"affiliation": "Kernel"
+			},
+			{
+				"name": "Mimi Liao",
+				"affiliation": "NVIDIA"
+			},
+			{
+				"name": "Julien Dubois",
+				"affiliation": "Kernel"
+			},
+			{
+				"name": "Victor Szczepanski",
+				"affiliation": "Kernel"
+			},
+			{
+				"name": "Gabe Lerner",
+				"affiliation": "Kernel"
+			}
+		],
+		"language": "Python",
+		"version": "1.0.0",
+		"changelog": {
+			"1.0": "Initial Python Release"
+		},
+		"holoscan_sdk": {
+			"minimum_required_version": "3.7.0",
+			"tested_versions": [
+				"3.7.0",
+				"3.9.0"
+			]
+		},
+		"platforms": [
+			"x86_64",
+			"aarch64"
+		],
+		"tags": ["Visualization", "BCI", "Hemodynamics", "Volume Rendering", "Neuroscience", "Optical Sensing"],
+		"ranking": 2,
+		"requirements": {
+			"python-packages": [
+				{
+					"name": "numpy",
+					"version": "2.3.3"
+				},
+				{
+					"name": "cupy",
+					"version": "13.6.0"
+				},
+				{
+					"name": "nibabel",
+					"version": "5.3.2"
+				},
+				{
+					"name": "scipy",
+					"version": "1.16.3"
+				},
+				{
+					"name": "h5py",
+					"version": "3.15.1"
+				}
+			]
+		},
+		"run": {
+			"command": "python3 bci_visualization.py --renderer_config <holohub_app_source>/config.json --mask_path <holohub_data_dir>/bci_visualization/anatomy_labels_high_res.nii.gz",
+			"workdir": "holohub_app_source"
+		}
+	}
+}
diff --git a/applications/bci_visualization/operators/CMakeLists.txt b/applications/bci_visualization/operators/CMakeLists.txt
new file mode 100644
index 0000000000..51c87e6f99
--- /dev/null
+++ b/applications/bci_visualization/operators/CMakeLists.txt
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_subdirectory(color_buffer_passthrough)
diff --git a/applications/bci_visualization/operators/__init__.py b/applications/bci_visualization/operators/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/applications/bci_visualization/operators/color_buffer_passthrough/CMakeLists.txt b/applications/bci_visualization/operators/color_buffer_passthrough/CMakeLists.txt
new file mode 100644
index 0000000000..9e861d6475
--- /dev/null
+++ b/applications/bci_visualization/operators/color_buffer_passthrough/CMakeLists.txt
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.20)
+project(color_buffer_passthrough LANGUAGES CXX)
+
+find_package(holoscan 2.0 REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+add_library(color_buffer_passthrough SHARED
+  cpp/color_buffer_passthrough.cpp
+  cpp/color_buffer_passthrough.hpp
+)
+
+target_link_libraries(color_buffer_passthrough
+  PRIVATE
+  holoscan::core
+)
+
+target_include_directories(color_buffer_passthrough
+  PUBLIC
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpp
+  INTERFACE
+    ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+install(
+  TARGETS color_buffer_passthrough
+  EXPORT holoscan-ops
+)
+
+if(HOLOHUB_BUILD_PYTHON)
+  add_subdirectory(python)
+endif()
+
+
+
+
diff --git a/applications/bci_visualization/operators/color_buffer_passthrough/cpp/color_buffer_passthrough.cpp b/applications/bci_visualization/operators/color_buffer_passthrough/cpp/color_buffer_passthrough.cpp
new file mode 100644
index 0000000000..8a5834af3f
--- /dev/null
+++ b/applications/bci_visualization/operators/color_buffer_passthrough/cpp/color_buffer_passthrough.cpp
@@ -0,0 +1,44 @@
+/* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "color_buffer_passthrough.hpp"
+
+namespace holoscan::ops {
+
+void ColorBufferPassthroughOp::setup(OperatorSpec& spec) {
+  // This drops stale frames if queue is full
+  spec.input<holoscan::gxf::Entity>("color_buffer_in",
+                                   holoscan::IOSpec::kSizeOne,
+                                   holoscan::IOSpec::QueuePolicy::kPop);
+
+  spec.output<holoscan::gxf::Entity>("color_buffer_out");
+}
+
+void ColorBufferPassthroughOp::compute(InputContext& input, OutputContext& output,
+                                       ExecutionContext& context) {
+  auto color_message = input.receive<holoscan::gxf::Entity>("color_buffer_in");
+  if (!color_message) { throw std::runtime_error("Failed to receive color buffer message"); }
+
+  auto cuda_streams = input.receive_cuda_streams("color_buffer_in");
+
+  output.emit(color_message.value(), "color_buffer_out");
+}
+
+}  // namespace holoscan::ops
+
+
+
+
diff --git a/applications/bci_visualization/operators/color_buffer_passthrough/cpp/color_buffer_passthrough.hpp b/applications/bci_visualization/operators/color_buffer_passthrough/cpp/color_buffer_passthrough.hpp
new file mode 100644
index 0000000000..509cb21210
--- /dev/null
+++ b/applications/bci_visualization/operators/color_buffer_passthrough/cpp/color_buffer_passthrough.hpp
@@ -0,0 +1,40 @@
+/* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "holoscan/holoscan.hpp"
+
+namespace holoscan::ops {
+
+/**
+ * @brief Pass-through operator for rendered color buffers.
+ *
+ * This operator exists to control the input port queue policy (size=1, pop),
+ * preventing backpressure/deadlock when downstream visualization runs slower
+ * than the producer.
+ */
+class ColorBufferPassthroughOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(ColorBufferPassthroughOp)
+
+  void setup(OperatorSpec& spec) override;
+  void compute(InputContext& input, OutputContext& output, ExecutionContext& context) override;
+};
+
+}  // namespace holoscan::ops
+
+
diff --git a/applications/bci_visualization/operators/color_buffer_passthrough/python/CMakeLists.txt b/applications/bci_visualization/operators/color_buffer_passthrough/python/CMakeLists.txt
new file mode 100644
index 0000000000..ff03aeefe2
--- /dev/null
+++ b/applications/bci_visualization/operators/color_buffer_passthrough/python/CMakeLists.txt
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(pybind11_add_holohub_module)
+
+pybind11_add_holohub_module(
+  CPP_CMAKE_TARGET color_buffer_passthrough
+  CLASS_NAME "ColorBufferPassthroughOp"
+  SOURCES color_buffer_passthrough.cpp
+)
+
+
+
+
diff --git a/applications/bci_visualization/operators/color_buffer_passthrough/python/color_buffer_passthrough.cpp b/applications/bci_visualization/operators/color_buffer_passthrough/python/color_buffer_passthrough.cpp
new file mode 100644
index 0000000000..c034dd54c9
--- /dev/null
+++ b/applications/bci_visualization/operators/color_buffer_passthrough/python/color_buffer_passthrough.cpp
@@ -0,0 +1,64 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../cpp/color_buffer_passthrough.hpp"
+
+#include <memory>
+#include <string>
+
+#include <holoscan/core/fragment.hpp>
+#include <holoscan/core/operator.hpp>
+#include <holoscan/core/operator_spec.hpp>
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace holoscan::ops {
+
+using pybind11::literals::operator""_a;
+
+class PyColorBufferPassthroughOp : public ColorBufferPassthroughOp {
+ public:
+  using ColorBufferPassthroughOp::ColorBufferPassthroughOp;
+
+  PyColorBufferPassthroughOp(Fragment* fragment, const py::args& args,
+                             const std::string& name = "color_buffer_passthrough")
+      : ColorBufferPassthroughOp(ArgList{}) {
+    (void)args;
+    name_ = name;
+    fragment_ = fragment;
+    spec_ = std::make_shared<OperatorSpec>(fragment);
+    setup(*spec_.get());
+  }
+};
+
+PYBIND11_MODULE(_color_buffer_passthrough, m) {
+  py::class_<ColorBufferPassthroughOp,
+             PyColorBufferPassthroughOp,
+             Operator,
+             std::shared_ptr<ColorBufferPassthroughOp>>(
+      m, "ColorBufferPassthroughOp")
+      .def(py::init<Fragment*, const py::args&, const std::string&>(),
+           "fragment"_a,
+           "name"_a = std::string("color_buffer_passthrough"))
+      .def("setup", &ColorBufferPassthroughOp::setup, "spec"_a);
+}
+
+}  // namespace holoscan::ops
+
+
diff --git a/applications/bci_visualization/operators/reconstruction/__init__.py b/applications/bci_visualization/operators/reconstruction/__init__.py
new file mode 100644
index 0000000000..ff43d71332
--- /dev/null
+++ b/applications/bci_visualization/operators/reconstruction/__init__.py
@@ -0,0 +1,21 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from .build_rhs_operator import BuildRHSOperator
+from .convert_to_voxels_operator import ConvertToVoxelsOperator
+from .normalize_operator import NormalizeOperator
+from .solver_operator import RegularizedSolverOperator
+from .types import BuildRHSOutput, NormalizedSolveBatch, SolverResult, VoxelMetadata
+
+__all__ = [
+    "BuildRHSOperator",
+    "ConvertToVoxelsOperator",
+    "NormalizeOperator",
+    "RegularizedSolverOperator",
+    "BuildRHSOutput",
+    "NormalizedSolveBatch",
+    "SolverResult",
+    "VoxelMetadata",
+]
diff --git a/applications/bci_visualization/operators/reconstruction/build_rhs_operator.py b/applications/bci_visualization/operators/reconstruction/build_rhs_operator.py
new file mode 100644
index 0000000000..7bad1629b5
--- /dev/null
+++ b/applications/bci_visualization/operators/reconstruction/build_rhs_operator.py
@@ -0,0 +1,191 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, List, Tuple
+
+import cupy as cp
+import numpy as np
+from holoscan.core import ExecutionContext, InputContext, Operator, OperatorSpec, OutputContext
+from utils.reconstruction.assets import Assets
+
+from ..stream import SampleOutput
+from .types import BuildRHSOutput, VoxelMetadata
+
+logger = logging.getLogger(__name__)
+
+
+class BuildRHSOperator(Operator):
+    """Convert realtime moments tensors into trimmed Right-Hand Side (RHS)/Jacobian batches."""
+
+    def __init__(
+        self,
+        *,
+        assets: Assets,
+        fragment: Any | None = None,
+    ) -> None:
+        super().__init__(fragment, name=self.__class__.__name__)
+
+        # Keep CPU copies for disk-loaded assets; GPU copies are created lazily on first compute.
+        self._model_optical_properties_cpu = np.concatenate((assets.mua, assets.musp)).astype(
+            np.float32, copy=False
+        )
+        self._mega_jacobians_cpu = assets.mega_jacobian
+        self._channel_mapping = assets.channel_mapping
+        self._idxs_significant_voxels_cpu = assets.idxs_significant_voxels
+        self._voxel_metadata = VoxelMetadata(
+            ijk=assets.ijk, xyz=assets.xyz, resolution=assets.resolution
+        )
+        self._wavelengths = assets.wavelengths
+
+        # GPU caches (CuPy arrays on the propagated CUDA stream)
+        self._mega_jacobians_gpu = None
+        self._model_optical_properties_gpu = None
+        self._idxs_significant_voxels_gpu = None
+        self._jacobian_cache = None
+        self._last_frame = None  # previous frame (GPU)
+
+    def setup(self, spec: OperatorSpec) -> None:
+        spec.input("moments")
+        spec.output("batch")
+
+    def _apply_baseline(self, realtime_moments):
+        """
+        simple diff against previous frame
+        """
+        if self._last_frame is None:
+            self._last_frame = realtime_moments.copy()
+            return None
+
+        # diff with last frame and update last frame
+        diff = realtime_moments - self._last_frame
+        self._last_frame = realtime_moments.copy()
+        return diff
+
+    def _zero_out_invalids(self, data_rhs) -> None:
+        invalid_samples = ~cp.isfinite(data_rhs)
+        if not cp.any(invalid_samples):
+            return
+
+        # NOTE: this is in-place on GPU, async on the current CUDA stream.
+        cp.nan_to_num(data_rhs, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
+
+    def _get_channel_indices(self, optode_order: List[Tuple[int, int, int, int]]) -> List[int]:
+        """Map optode tuples to jacobian channel indices (CPU-side dict lookups)."""
+        indices: List[int] = []
+        for src_module, src, det_module, det in optode_order:
+            try:
+                srcs = self._channel_mapping[str(src_module)]
+                detectors = srcs[str(src)][str(det_module)]
+                jacobian_index = detectors[str(det)]
+            except KeyError as e:
+                raise ValueError(
+                    "Channel without jacobian mapping "
+                    f"(src_module={src_module}, src={src}, det_module={det_module}, det={det})"
+                ) from e
+            indices.append(int(jacobian_index))
+        if not indices:
+            raise ValueError("Empty channel mapping; no channels resolved to jacobian indices")
+        return indices
+
+    def compute(
+        self,
+        op_input: InputContext,
+        op_output: OutputContext,
+        context: ExecutionContext,
+    ) -> None:
+        payload: SampleOutput = op_input.receive("moments")
+
+        # Create the CUDA stream at the earliest GPU-producing operator and propagate it downstream.
+        cuda_stream = context.allocate_cuda_stream("reconstruction_stream")
+        with cp.cuda.ExternalStream(cuda_stream):
+            # Host->device copy is enqueued on the current stream (may be sync if host memory isn't pinned).
+            realtime_moments = cp.asarray(payload.data, dtype=cp.float32)
+
+            # take log of moment 0 to convert to optical density
+            # shape is (moments, channels, wavelengths)
+            cp.log(realtime_moments[0], out=realtime_moments[0])
+
+            realtime_moments = self._apply_baseline(realtime_moments)
+            if realtime_moments is None:
+                logger.info("Skipping RHS build for first frame (baseline capture)")
+                return
+
+        flowaxis_optodes: List[Tuple[int, int, int, int]] = [
+            (
+                payload.channels.source_module[channel_idx],
+                payload.channels.source_number[channel_idx],
+                payload.channels.detector_module[channel_idx],
+                payload.channels.detector_number[channel_idx],
+            )
+            for channel_idx in range(len(payload.channels))
+        ]
+
+        # Validate that jacobian features dimension matches realtime moments
+        # 5D jacobian shape: (channels, features, wavelengths, voxels, simulation_types)
+        num_features = realtime_moments.shape[0]
+        if self._mega_jacobians_cpu.shape[1] != num_features:
+            raise ValueError(
+                f"Jacobian features dimension mismatch: expected {self._mega_jacobians_cpu.shape[1]}, "
+                f"got {num_features} from moments data. Check SNIRF file format."
+            )
+
+        with cp.cuda.ExternalStream(cuda_stream):
+            # One-time GPU uploads of large static assets.
+            if self._mega_jacobians_gpu is None:
+                self._mega_jacobians_gpu = cp.asarray(self._mega_jacobians_cpu, dtype=cp.float32)
+            if self._model_optical_properties_gpu is None:
+                self._model_optical_properties_gpu = cp.asarray(
+                    self._model_optical_properties_cpu, dtype=cp.float32
+                )
+            if self._idxs_significant_voxels_gpu is None:
+                self._idxs_significant_voxels_gpu = cp.asarray(
+                    self._idxs_significant_voxels_cpu, dtype=cp.int64
+                )
+
+            if self._jacobian_cache is None:
+                channel_indices = self._get_channel_indices(flowaxis_optodes)
+                channel_indices_gpu = cp.asarray(channel_indices, dtype=cp.int64)
+
+                jacobians = self._mega_jacobians_gpu[channel_indices_gpu, :, :, :, :]  # 5d
+
+                # swap axes so it's features, channels first
+                jacobians = jacobians.transpose(1, 0, 2, 3, 4)
+                # reshape to 3d and use Fortran-style ordering
+                jacobians = cp.reshape(
+                    jacobians,
+                    (
+                        jacobians.shape[0] * jacobians.shape[1],
+                        jacobians.shape[2],
+                        jacobians.shape[3] * jacobians.shape[4],
+                    ),
+                    order="F",
+                )
+                self._jacobian_cache = jacobians
+
+            data_jacobians = self._jacobian_cache
+            # swap from (moments, channels, wavelengths) to (channels, moments, wavelengths)
+            # then reshape to (channels x moments, wavelengths)
+            data_rhs = realtime_moments.transpose(1, 0, 2).reshape(-1, realtime_moments.shape[2])
+
+            self._zero_out_invalids(data_rhs)
+
+            # Propagate CUDA stream downstream for correct ordering of GPU work.
+            op_output.set_cuda_stream(cuda_stream, "batch")
+            op_output.emit(
+                BuildRHSOutput(
+                    data_jacobians=data_jacobians,
+                    data_rhs=data_rhs,
+                    model_optical_properties=self._model_optical_properties_gpu,
+                    idxs_significant_voxels=self._idxs_significant_voxels_gpu,
+                    num_full_voxels=int(self._voxel_metadata.ijk.shape[0]),
+                    num_features=int(num_features),
+                    wavelengths=tuple(self._wavelengths.tolist()),
+                    voxel_metadata=self._voxel_metadata,
+                ),
+                "batch",
+            )
diff --git a/applications/bci_visualization/operators/reconstruction/convert_to_voxels_operator.py b/applications/bci_visualization/operators/reconstruction/convert_to_voxels_operator.py
new file mode 100644
index 0000000000..5d74c6c3ec
--- /dev/null
+++ b/applications/bci_visualization/operators/reconstruction/convert_to_voxels_operator.py
@@ -0,0 +1,193 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, Tuple, cast
+
+import cupy as cp
+import numpy as np
+from holoscan.core import ExecutionContext, InputContext, Operator, OperatorSpec, OutputContext
+from numpy.typing import NDArray
+from utils.reconstruction.hbo import ExtinctionCoefficient, HbO
+
+from .types import SolverResult, VoxelMetadata
+
+logger = logging.getLogger(__name__)
+
+
+def _convert_to_full_voxels(
+    array: NDArray[np.float32],
+    num_voxels: int,
+    idxs_significant_voxels: NDArray[np.int_],
+) -> NDArray[np.float32]:
+    """Convert an array from significant voxels to full voxel arrays.
+
+    Parameters
+    ----------
+    array : NDArray[np.float32]
+        Array to convert from significant voxels to full voxels.
+    num_voxels : int
+        Number of total voxels in the mesh.
+    idxs_significant_voxels : NDArray[np.int_]
+        Indices of significant voxels.
+
+    Returns
+    -------
+    array_full_voxels : NDArray[np.float32]
+        The input array converted to full voxel space.
+    """
+    array_full_voxels = cp.zeros((num_voxels, array.shape[1]))
+    array_full_voxels[idxs_significant_voxels, :] = array
+
+    return array_full_voxels
+
+
+def _compute_affine(xyz: NDArray[np.float32], ijk: NDArray[np.float32]) -> NDArray[np.float32]:
+    """Computes affine based on coordinates.
+
+    Parameters
+    ----------
+    xyz: NDArray[np.float32]
+        x, y, z coordinates
+    ijk: NDArray[np.float32]
+        Voxel indexes
+    Returns
+    -------
+    affine: np.ndarray
+        Affine matrix defining the given space.
+    """
+    rng = np.random.default_rng(0)
+
+    n = 4
+    ctr = 0
+    out: NDArray[np.float32] = np.array([])  # bind outside loop
+    B: NDArray[np.float32] = np.array([])  # bind outside loop
+    while ctr < 100:
+        ctr += 1
+        inds = rng.choice(np.arange(len(ijk)), size=n, replace=False)
+        ins = ijk[np.array(inds), :]  # <- points
+        out = xyz[np.array(inds), :]  # <- mapped to
+        B = np.vstack([np.transpose(ins), np.ones(n, dtype=np.float32)])
+        if np.linalg.det(B) == 0:
+            continue
+    if np.linalg.det(B) == 0:
+        raise RuntimeError("Cannot compute affine, algorithm failed after 100 attempts")
+    D = 1.0 / np.linalg.det(B)
+
+    def entry(r, d):
+        return np.linalg.det(np.delete(np.vstack([r, B]), (d + 1), axis=0))
+
+    M = [[(-1) ** i * D * entry(R, i) for i in range(n)] for R in np.transpose(out)]
+
+    affine = np.concatenate((M, np.array([0, 0, 0, 1]).reshape(1, -1)), axis=0)
+    if affine.shape != (4, 4):
+        raise ValueError(f"Affine must be 4x4 matrix, got shape {affine.shape}")
+    return affine
+
+
+class ConvertToVoxelsOperator(Operator):
+    """Expand trimmed solver outputs back to the full voxel grid."""
+
+    def __init__(
+        self,
+        *,
+        coefficients: Dict[int, ExtinctionCoefficient],
+        ijk: NDArray[np.float32],
+        xyz: NDArray[np.float32],
+        fragment: Any | None = None,
+    ) -> None:
+        self._hbo = HbO(coefficients)
+        self._affine = np.round(_compute_affine(xyz, ijk), 6)
+        self._affine_sent: bool = False
+        self._cached_affine: NDArray[np.float32] | None = None
+        self._cum_hbo: NDArray[np.float32] | None = None
+        self._cum_hbr: NDArray[np.float32] | None = None
+
+        super().__init__(fragment, name=self.__class__.__name__)
+
+    def setup(self, spec: OperatorSpec) -> None:
+        spec.input("result")
+        spec.output("affine_4x4")
+        spec.output("hb_voxel_data")
+
+    def compute(
+        self,
+        op_input: InputContext,
+        op_output: OutputContext,
+        context: ExecutionContext,
+    ) -> None:
+        result: SolverResult = op_input.receive("result")
+        cuda_stream = op_input.receive_cuda_stream("result")
+
+        with cp.cuda.ExternalStream(cuda_stream):
+            data_mua_full = _convert_to_full_voxels(
+                result.data_mua,
+                result.num_full_voxels,
+                result.idxs_significant_voxels,
+            )
+
+            data_hbo, data_hbr = self._hbo.convert_mua_to_hb(
+                data_mua_full,
+                result.wavelengths,
+                result.idxs_significant_voxels,
+            )
+
+            self._cum_hbo = data_hbo if self._cum_hbo is None else self._cum_hbo + data_hbo
+            self._cum_hbr = data_hbr if self._cum_hbr is None else self._cum_hbr + data_hbr
+
+            layout = self._compute_voxel_layout(result.voxel_metadata)
+            hb_volume = self._voxelize_hbo(self._cum_hbo, layout)
+
+        self._emit_affine_once(op_output)
+        op_output.emit(hb_volume, "hb_voxel_data")
+
+    def _emit_affine_once(self, op_output: OutputContext) -> None:
+        if self._affine_sent:
+            return
+
+        op_output.emit(self._affine, "affine_4x4")
+        self._affine_sent = True
+
+    def _voxelize_hbo(
+        self,
+        data_hbo: NDArray[np.float32],
+        layout: Tuple[NDArray[np.int_], Tuple[int, int, int], NDArray[np.int_]],
+    ) -> NDArray[np.float32]:
+        scatter_coords, normalized_shape, _ijk_int = layout
+        scatter_coords = scatter_coords.astype(np.int32, copy=False)  # for indexing
+
+        num_voxels = data_hbo.shape[0]
+        if num_voxels != scatter_coords.shape[0]:
+            raise ValueError(
+                f"Number of voxels must match, got {num_voxels} and {scatter_coords.shape[0]}"
+            )
+
+        # scatter ijk to full voxel grid
+        volume_small: NDArray[np.float32] = cp.zeros(normalized_shape, dtype=data_hbo.dtype)
+        x_idx, y_idx, z_idx = scatter_coords.T
+        volume_small[x_idx, y_idx, z_idx] = data_hbo
+
+        return volume_small
+
+    def _compute_voxel_layout(
+        self,
+        metadata: VoxelMetadata,
+    ) -> Tuple[NDArray[np.int_], Tuple[int, int, int], NDArray[np.int_]]:
+        """
+        Compute normalized voxel coordinates and grid shape from metadata.
+        """
+        ijk = cp.asarray(metadata.ijk)
+        if ijk.ndim != 2 or ijk.shape[1] != 3:
+            raise ValueError(f"IJK must be 2D array with 3 columns, got shape {ijk.shape}")
+
+        ijk_int = cp.rint(ijk)
+        min_idx = ijk_int.min(axis=0)
+        normalized = ijk_int - min_idx
+        shape = tuple(int(axis_max) + 1 for axis_max in normalized.max(axis=0))
+        if not all(dim > 0 for dim in shape):
+            raise ValueError(f"Shape must be positive, got shape {shape}")
+        return normalized, cast(Tuple[int, int, int], shape), ijk_int
diff --git a/applications/bci_visualization/operators/reconstruction/normalize_operator.py b/applications/bci_visualization/operators/reconstruction/normalize_operator.py
new file mode 100644
index 0000000000..edcf6713e7
--- /dev/null
+++ b/applications/bci_visualization/operators/reconstruction/normalize_operator.py
@@ -0,0 +1,170 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, List, Tuple
+
+import cupy as cp
+import numpy as np
+from holoscan.core import ExecutionContext, InputContext, Operator, OperatorSpec, OutputContext
+from numpy.typing import NDArray
+
+from .types import BuildRHSOutput, NormalizedSolveBatch, WavelengthSystem
+
+logger = logging.getLogger(__name__)
+
+
+# You can adjust these hard-coded values by taking the max value
+# of each moment and wavelength from a SNIRF file.
+HARD_CODED_NORMALIZERS = [  # for each feature type (moment and wavelength)
+    np.array([1, 5e2, 5e5]),
+    np.array([0.5, 2.5e2, 2.5e5]),
+]
+
+
+class NormalizeOperator(Operator):
+    """Apply Jacobian/RHS normalization before solver execution."""
+
+    def __init__(
+        self,
+        *,
+        fragment: Any | None = None,
+        use_hard_coded_normalizers: bool = True,
+    ) -> None:
+        super().__init__(fragment, name=self.__class__.__name__)
+        self._jacobian_cache: NDArray[np.float32] | None = None
+        self._max_rhs: NDArray[np.float32] | None = None
+
+        self._use_hard_coded_normalizers = use_hard_coded_normalizers
+        self._hard_coded_row_normalizers_cache: NDArray[np.float32] | None = None
+        self._hard_coded_normalized_jacobian_cache: NDArray[np.float32] | None = None
+
+    def setup(self, spec: OperatorSpec) -> None:
+        spec.input("batch")
+        spec.output("normalized")
+
+    def compute(
+        self,
+        op_input: InputContext,
+        op_output: OutputContext,
+        context: ExecutionContext,
+    ) -> None:
+        batch = op_input.receive("batch")
+        if not isinstance(batch, BuildRHSOutput):
+            raise TypeError(f"NormalizeOperator expected BuildRHSOutput, got {type(batch)}")
+
+        cuda_stream = op_input.receive_cuda_stream("batch")
+
+        with cp.cuda.ExternalStream(cuda_stream):
+            result = self._normalize_batch(batch)
+            if result is None:
+                logger.info("Skipping normalization for frame because max_rhs is all zeros")
+                return
+
+        systems, num_absorbers = result
+
+        op_output.emit(
+            NormalizedSolveBatch(
+                systems=tuple(systems),
+                idxs_significant_voxels=batch.idxs_significant_voxels,
+                num_full_voxels=batch.num_full_voxels,
+                num_absorbers=num_absorbers,
+                wavelengths=batch.wavelengths,
+                voxel_metadata=batch.voxel_metadata,
+            ),
+            "normalized",
+        )
+
+    def _get_hard_coded_row_normalizers(
+        self,
+        num_rows: int,
+        num_features: int,
+        num_wavelengths: int,
+    ) -> np.ndarray:
+        if self._hard_coded_row_normalizers_cache is not None:
+            return self._hard_coded_row_normalizers_cache
+
+        row_normalizers = cp.full((num_rows, num_wavelengths), cp.nan)
+        for wavelength_idx in range(num_wavelengths):
+            for idx_feature in range(num_features):
+                row_normalizers[idx_feature::num_features, wavelength_idx] = HARD_CODED_NORMALIZERS[
+                    wavelength_idx
+                ][idx_feature]
+
+        if cp.any(cp.isnan(row_normalizers)):
+            raise RuntimeError(
+                f"NaN values detected in row normalizers. Check HARD_CODED_NORMALIZERS configuration "
+                f"for num_features={num_features}, num_wavelengths={num_wavelengths}"
+            )
+        self._hard_coded_row_normalizers_cache = row_normalizers
+        return row_normalizers
+
+    def _normalize_batch(self, batch: BuildRHSOutput) -> Tuple[List[WavelengthSystem], int] | None:
+        num_cols = batch.data_jacobians.shape[-1]
+        num_significant = int(batch.idxs_significant_voxels.size)
+        num_absorbers, remainder = divmod(num_cols, num_significant)
+        if remainder != 0:
+            raise ValueError(
+                f"Number of columns must be divisible by number of significant voxels, got {num_cols} and {num_significant}"
+            )
+
+        # normalize rows
+        rhs = cp.asarray(batch.data_rhs, dtype=cp.float32)
+        num_wavelengths = batch.data_rhs.shape[-1]
+        row_normalizers = self._get_hard_coded_row_normalizers(
+            batch.data_jacobians.shape[0], batch.num_features, num_wavelengths
+        )
+
+        jacobian_template = self._get_template_jacobians(batch)
+        if (
+            self._use_hard_coded_normalizers
+            and self._hard_coded_normalized_jacobian_cache is not None
+        ):
+            jacobians = self._hard_coded_normalized_jacobian_cache
+        else:
+            jacobians = jacobian_template.copy()
+            jacobians /= row_normalizers[:, :, None]
+
+        rhs /= row_normalizers
+
+        if self._use_hard_coded_normalizers and self._hard_coded_normalized_jacobian_cache is None:
+            self._hard_coded_normalized_jacobian_cache = jacobians
+
+        systems: List[WavelengthSystem] = []
+        for idx_wavelength in range(num_wavelengths):
+            background_payload = cp.asarray(
+                batch.model_optical_properties[:, idx_wavelength],
+                dtype=cp.float32,
+            )
+            systems.append(
+                WavelengthSystem(
+                    jacobian=jacobians[:, idx_wavelength, :],
+                    rhs=rhs[:, idx_wavelength],
+                    background=background_payload,
+                )
+            )
+
+        return systems, num_absorbers
+
+    def _get_template_jacobians(
+        self,
+        batch: BuildRHSOutput,
+    ) -> NDArray[np.float32]:
+        """
+        Retrieve or build the normalized Jacobian template for the given batch.
+        """
+        if self._jacobian_cache is not None:
+            return self._jacobian_cache
+
+        template = cp.asarray(batch.data_jacobians, dtype=cp.float32).copy()
+        background = cp.asarray(batch.model_optical_properties, dtype=cp.float32)
+
+        background_T = cp.swapaxes(background, 0, 1)
+        template *= background_T[None, :, :]
+
+        self._jacobian_cache = template
+        return template
diff --git a/applications/bci_visualization/operators/reconstruction/solver_operator.py b/applications/bci_visualization/operators/reconstruction/solver_operator.py
new file mode 100644
index 0000000000..f9e02adeb7
--- /dev/null
+++ b/applications/bci_visualization/operators/reconstruction/solver_operator.py
@@ -0,0 +1,100 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+import cupy as cp
+from holoscan.core import ExecutionContext, InputContext, Operator, OperatorSpec, OutputContext
+from utils.reconstruction.reg_inv import solve_regularized_system
+
+from .types import NormalizedSolveBatch, SolverResult
+
+logger = logging.getLogger(__name__)
+
+
+class RegularizedSolverOperator(Operator):
+    """Run a regularized inverse solver per wavelength system."""
+
+    REG_DEFAULT = 0.1
+
+    def __init__(
+        self,
+        *,
+        reg: float = REG_DEFAULT,
+        fragment: Any | None = None,
+    ) -> None:
+        super().__init__(fragment, name=self.__class__.__name__)
+        self._reg = reg
+
+    def setup(self, spec: OperatorSpec) -> None:
+        spec.input("batch")
+        spec.output("result")
+
+    def compute(
+        self,
+        op_input: InputContext,
+        op_output: OutputContext,
+        context: ExecutionContext,
+    ) -> None:
+        batch: NormalizedSolveBatch = op_input.receive("batch")
+        cuda_stream = op_input.receive_cuda_stream("batch")
+
+        with cp.cuda.ExternalStream(cuda_stream):
+            result = self._solve_batch(batch)
+            op_output.emit(result, "result")
+
+    def _solve_batch(self, batch: NormalizedSolveBatch) -> SolverResult:
+        num_wavelengths = len(batch.systems)
+        num_significant_voxels = int(batch.idxs_significant_voxels.size)
+        num_cols_expected = batch.num_absorbers * num_significant_voxels
+
+        # GPU-only: always use CuPy.
+        result = cp.zeros(
+            (num_cols_expected, num_wavelengths),
+            dtype=batch.systems[0].jacobian.dtype,
+        )
+        for wavelength_idx, system in enumerate(batch.systems):
+            if system.rhs.ndim != 1:
+                raise ValueError(f"Expected 1D RHS vector, got shape {system.rhs.shape}")
+            if system.jacobian.shape[1] != num_cols_expected:
+                raise ValueError(
+                    f"Jacobian column mismatch: expected {num_cols_expected}, "
+                    f"got {system.jacobian.shape[1]} for wavelength {wavelength_idx}"
+                )
+
+            solution = solve_regularized_system(
+                system.jacobian,
+                system.rhs,
+                wavelength_idx,
+                reg=self._reg,
+            )
+
+            solution *= system.background
+
+            if solution.shape != (num_cols_expected,):
+                raise ValueError(
+                    f"Solution must be 1D array with {num_cols_expected} elements, got shape {solution.shape}"
+                )
+            result[:, wavelength_idx] = solution
+
+        # Reshape result to separate absorbers into mua/musp
+        reshaped = result.reshape(
+            (-1, batch.num_absorbers, num_wavelengths),
+            order="F",
+        )
+        data_mua = reshaped[:, 0, :]
+        data_musp = reshaped[:, 1, :]
+
+        return SolverResult(
+            data_mua=data_mua,
+            data_musp=data_musp,
+            idxs_significant_voxels=batch.idxs_significant_voxels,
+            num_full_voxels=batch.num_full_voxels,
+            wavelengths=batch.wavelengths,
+            voxel_metadata=batch.voxel_metadata,
+        )
diff --git a/applications/bci_visualization/operators/reconstruction/types.py b/applications/bci_visualization/operators/reconstruction/types.py
new file mode 100644
index 0000000000..4945b3ea8b
--- /dev/null
+++ b/applications/bci_visualization/operators/reconstruction/types.py
@@ -0,0 +1,58 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Tuple
+
+import numpy as np
+from numpy.typing import NDArray
+
+
+@dataclass(frozen=True)
+class VoxelMetadata:
+    ijk: NDArray[np.float32]
+    xyz: NDArray[np.float32] | None
+    resolution: Tuple[float, float, float]
+
+
+@dataclass(frozen=True)
+class BuildRHSOutput:
+    data_jacobians: NDArray[np.float32]  # 3d array (feature-channels, wavelengths, voxels)
+    data_rhs: NDArray[np.float32]  # 2d array (feature-channels, wavelengths)
+    model_optical_properties: NDArray[np.float32]
+    idxs_significant_voxels: NDArray[np.int_]
+    num_full_voxels: int
+    num_features: int
+    wavelengths: Tuple[int, ...]
+    voxel_metadata: VoxelMetadata
+
+
+@dataclass(frozen=True)
+class WavelengthSystem:
+    jacobian: NDArray[np.float32]
+    rhs: NDArray[np.float32]
+    background: NDArray[np.float32]
+
+
+@dataclass(frozen=True)
+class NormalizedSolveBatch:
+    systems: Tuple[WavelengthSystem, ...]
+    idxs_significant_voxels: NDArray[np.int_]
+    num_full_voxels: int
+    num_absorbers: int
+    wavelengths: Tuple[int, ...]
+    voxel_metadata: VoxelMetadata
+
+
+@dataclass(frozen=True)
+class SolverResult:
+    data_mua: NDArray[np.float32]
+    data_musp: NDArray[np.float32]
+    idxs_significant_voxels: NDArray[np.int_]
+    num_full_voxels: int
+    wavelengths: Tuple[int, ...]
+    voxel_metadata: VoxelMetadata
diff --git a/applications/bci_visualization/operators/stream.py b/applications/bci_visualization/operators/stream.py
new file mode 100644
index 0000000000..ba3f252aa2
--- /dev/null
+++ b/applications/bci_visualization/operators/stream.py
@@ -0,0 +1,50 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, NamedTuple
+
+import numpy as np
+from holoscan.core import ExecutionContext, InputContext, Operator, OperatorSpec, OutputContext
+from streams.base_nirs import BaseNirsStream, ChannelInfo
+
+logger = logging.getLogger(__name__)
+
+
+class SampleOutput(NamedTuple):
+    data: np.ndarray
+    channels: ChannelInfo
+
+
+class StreamOperator(Operator):
+    def __init__(
+        self,
+        stream: BaseNirsStream,
+        *,
+        fragment: Any | None = None,
+    ) -> None:
+        super().__init__(fragment, name=self.__class__.__name__)
+        self._stream = stream
+        self._channels: ChannelInfo
+
+    def setup(self, spec: OperatorSpec) -> None:
+        spec.output("samples")
+
+    def start(self) -> None:
+        self._stream.start()
+        self._channels = self._stream.get_channels()
+        self._iter = self._stream.stream_nirs()
+
+    def compute(
+        self, op_input: InputContext, op_output: OutputContext, context: ExecutionContext
+    ) -> None:
+
+        sample = next(self._iter, None)
+        if sample is None:
+            raise StopIteration("No more samples available in the stream.")
+
+        op_output.emit(SampleOutput(sample, self._channels), "samples")
diff --git a/applications/bci_visualization/operators/voxel_stream_to_volume/__init__.py b/applications/bci_visualization/operators/voxel_stream_to_volume/__init__.py
new file mode 100644
index 0000000000..a3f2cacfe1
--- /dev/null
+++ b/applications/bci_visualization/operators/voxel_stream_to_volume/__init__.py
@@ -0,0 +1,8 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from .voxel_stream_to_volume import VoxelStreamToVolumeOp as VoxelStreamToVolumeOp
+
+__all__ = ["VoxelStreamToVolumeOp"]
diff --git a/applications/bci_visualization/operators/voxel_stream_to_volume/voxel_stream_to_volume.py b/applications/bci_visualization/operators/voxel_stream_to_volume/voxel_stream_to_volume.py
new file mode 100644
index 0000000000..d0c6d0829a
--- /dev/null
+++ b/applications/bci_visualization/operators/voxel_stream_to_volume/voxel_stream_to_volume.py
@@ -0,0 +1,361 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
+SPDX-License-Identifier: Apache-2.0
+
+VoxelStreamToVolume operator: converts streaming voxel data to dense 3D volume.
+"""
+
+import cupy as cp
+import cupyx.scipy.ndimage
+import nibabel as nib
+import numpy as np
+from holoscan.core import ConditionType, Operator, OperatorSpec
+from nibabel.orientations import aff2axcodes
+
+
+class VoxelStreamToVolumeOp(Operator):
+    """
+    Convert streaming HbO/HbR voxel data [I, J, K, 2] into a 3D volume tensor for VolumeRendererOp.
+
+    Inputs:
+    - affine_4x4: np.ndarray shape (4, 4) (processed once if provided)
+    - hb_voxel_data: np.ndarray shape (I, J, K, n_channels) where last dim is channels [HbO, HbR] (HbO: 0, HbR: 1)
+
+    Outputs:
+    - volume: holoscan.gxf.Entity containing a tensor named "volume" with shape (Z,Y,X)
+    - spacing: np.ndarray shape (3,) derived from affine
+    - permute_axis: np.ndarray shape (3,) derived from affine
+    - flip_axes: np.ndarray shape (3,) derived from affine
+    """
+
+    def __init__(self, fragment, *args, **kwargs):
+        # Anatomy mask NIfTI file
+        self.mask_nifti_path = kwargs.pop("mask_nifti_path", None)
+
+        # Exponential moving average factor for running statistics (0 < alpha <= 1)
+        # Higher alpha = faster adaptation, lower alpha = more stable
+        self.stats_alpha = kwargs.pop("stats_alpha", 0.1)
+
+        # Visualization scale factor for amplifying activations
+        # Needed because global min/max includes the whole brain (larger values),
+        # but we only visualize white/gray matter (smaller activations).
+        # Higher scale = more sensitive visualization of small activations
+        self.visualization_scale = kwargs.pop("visualization_scale", 10)
+
+        # Density range, must be same as the VolumeRendererOp's density range
+        self.density_min = kwargs.pop("density_min", -100)
+        self.density_max = kwargs.pop("density_max", 100)
+
+        super().__init__(fragment, *args, **kwargs)
+
+        # Internal state
+        self.affine = None
+
+        # Metadata, set from the first frame, reused for subsequent frames
+        self.dims = None  # np.array([X, Y, Z], dtype=np.uint32)
+        self.out_spacing = None  # np.ndarray float32 (3,)
+        self.permute_axis = None  # np.ndarray uint32 (3,)
+        self.flip_axes = None  # np.ndarray bool (3,)
+        self.roi_mask = None  # np.ndarray bool (I, J, K)
+
+        # Raw incoming mask (I, J, K) for pass-through emission (loaded from file if provided)
+        self.mask_voxel_raw = None
+        self.mask_volume_gpu = None
+        self.mask_affine = None
+        self.mask_shape = None
+
+        # Running statistics for adaptive normalization (initialized from first frame)
+        self.global_min = None
+        self.global_max = None
+        self.frame_count = 0
+
+    def start(self):
+        if not self.mask_nifti_path:
+            raise ValueError("VoxelStreamToVolume: No mask NIfTI path provided")
+
+        try:
+            img = nib.load(self.mask_nifti_path)
+            mask_3d = img.get_fdata()
+            # Segmentation volumes must be unsigned 8-bit integer
+            self.mask_voxel_raw = np.asarray(mask_3d, dtype=np.uint8)
+            self.mask_affine = img.affine
+            self.mask_shape = mask_3d.shape
+            print(
+                f"VoxelStreamToVolume: Loaded mask from {self.mask_nifti_path}, "
+                f"shape: {self.mask_voxel_raw.shape}, values: {np.unique(self.mask_voxel_raw)}"
+            )
+        except Exception as e:
+            raise RuntimeError(
+                f"VoxelStreamToVolume: Failed to load mask NIfTI '{self.mask_nifti_path}': {e}"
+            ) from e
+
+    def setup(self, spec: OperatorSpec):
+        spec.input("affine_4x4").condition(
+            ConditionType.NONE
+        )  # (4, 4), only emit at the first frame
+        spec.input("hb_voxel_data")  # (I, J, K)
+
+        spec.output("volume")
+        spec.output("spacing")
+        spec.output("permute_axis")
+        spec.output("flip_axes")
+
+        # brain anatomy mask
+        spec.output("mask_volume").condition(ConditionType.NONE)
+        spec.output("mask_spacing").condition(ConditionType.NONE)
+        spec.output("mask_permute_axis").condition(ConditionType.NONE)
+        spec.output("mask_flip_axes").condition(ConditionType.NONE)
+
+    def compute(self, op_input, op_output, context):
+        # Receive Hb voxel data (cupy array)
+        hb_voxel = op_input.receive("hb_voxel_data")  # (I, J, K)
+        cuda_stream = op_input.receive_cuda_stream("hb_voxel_data")
+
+        # Check voxel data is valid
+        if not isinstance(hb_voxel, cp.ndarray):
+            raise TypeError(
+                f"VoxelStreamToVolume: Invalid voxel data type: {type(hb_voxel)}, expected cupy array"
+            )
+        if hb_voxel.ndim != 3:
+            raise ValueError(
+                f"VoxelStreamToVolume: Invalid voxel data shape: {hb_voxel.shape}, expected 3D"
+            )
+
+        # Receive affine matrix only at the first frame
+        affine = op_input.receive("affine_4x4")
+        if affine is not None:
+            self.affine = np.array(affine, dtype=np.float32).reshape(4, 4)
+            # Derive spacing/orientation from affine - use mask's affine as we will resample data to mask's size
+            self.out_spacing, self.permute_axis, self.flip_axes = (
+                self._derive_orientation_from_affine(self.mask_affine)
+            )
+            print("VoxelStreamToVolume: Received affine matrix")
+
+        # Check if affine has been set at least once
+        if self.affine is None:
+            raise ValueError("VoxelStreamToVolume: No affine matrix received")
+
+        with cp.cuda.ExternalStream(cuda_stream):
+            # Update running statistics from incoming data
+            self._update_running_statistics(hb_voxel)
+
+            # Note: +-1 to add a buffer avoiding edge case in ClaraViz boundaries.
+            hb_voxel_normalized = self._normalize_and_process_activated_voxels(
+                hb_voxel,
+                normalize_min_value=self.density_min + 1,
+                normalize_max_value=self.density_max - 1,
+            )
+
+            # Resample to mask's size
+            volume_gpu = self._cupy_resample(
+                hb_voxel_normalized, self.affine, self.mask_affine, self.mask_shape
+            )
+
+            volume_gpu = cp.transpose(volume_gpu, (2, 1, 0))
+            volume_gpu = cp.ascontiguousarray(volume_gpu, dtype=cp.float32)
+
+        # If we have a mask, emit oriented mask every frame for the renderer
+        if self.mask_volume_gpu is None:
+            with cp.cuda.ExternalStream(cuda_stream):
+                self.mask_volume_gpu = cp.asarray(self.mask_voxel_raw, dtype=cp.uint8)
+                self.mask_volume_gpu = cp.transpose(self.mask_volume_gpu, (2, 1, 0))
+                self.mask_volume_gpu = cp.ascontiguousarray(self.mask_volume_gpu)
+
+        # Emit mask outputs
+        op_output.emit({"volume": self.mask_volume_gpu}, "mask_volume")
+        op_output.emit(self.out_spacing, "mask_spacing", "std::array<float, 3>")
+        op_output.emit(self.permute_axis, "mask_permute_axis", "std::array<uint32_t, 3>")
+        op_output.emit(self.flip_axes, "mask_flip_axes", "std::array<bool, 3>")
+
+        # Emit density outputs
+        op_output.emit({"volume": volume_gpu}, "volume")
+        op_output.emit(self.out_spacing, "spacing", "std::array<float, 3>")
+        op_output.emit(self.permute_axis, "permute_axis", "std::array<uint32_t, 3>")
+        op_output.emit(self.flip_axes, "flip_axes", "std::array<bool, 3>")
+
+    def _derive_orientation_from_affine(self, affine_4x4: np.ndarray):
+        """
+        Derive spacing, axis permutation, and flips from affine.
+        spacing: voxel sizes along data axes (I,J,K) mapped to [X,Y,Z] ordering
+        permute_axis: for each data axis (I,J,K), index of world axis (X=0,Y=1,Z=2)
+        flip_axes: whether the axis is flipped (negative orientation)
+        """
+
+        R = affine_4x4[:3, :3].astype(np.float32)
+        # spacing along data axes (length of each column)
+        spacing_ijk = np.linalg.norm(R, axis=0).astype(np.float32)
+        # Avoid zeros
+        spacing_ijk[spacing_ijk == 0] = 1.0
+
+        # 1. Get the Orientation String from Nibabel
+        # nibabel returns where the axis points TO (e.g., 'RAS')
+        orientation_codes = aff2axcodes(affine_4x4)
+        print(f"Detected Orientation: {''.join(orientation_codes)}")
+
+        # 2. Parse orientation codes to determine axis assignment and flips
+        # Nibabel convention: codes indicate the direction each axis points TO
+        # Flip is needed when axis points in the negative direction (L, P, I)
+        rl_axis = 4
+        is_axis = 4
+        pa_axis = 4
+
+        rl_flip = False
+        is_flip = False
+        pa_flip = False
+
+        # Iterate through the codes (0=x, 1=y, 2=z in data array)
+        for axis, code in enumerate(orientation_codes):
+            # --- Right-Left Axis ---
+            if code in ["R", "r"]:
+                rl_axis = axis
+                rl_flip = False  # Points right (positive direction)
+            elif code in ["L", "l"]:
+                rl_axis = axis
+                rl_flip = True  # Points left (negative direction, needs flip)
+
+            # --- Inferior-Superior Axis ---
+            elif code in ["S", "s"]:
+                is_axis = axis
+                is_flip = False  # Points superior (positive direction)
+            elif code in ["I", "i"]:
+                is_axis = axis
+                is_flip = True  # Points inferior (negative direction, needs flip)
+
+            # --- Posterior-Anterior Axis ---
+            elif code in ["A", "a"]:
+                pa_axis = axis
+                pa_flip = False  # Points anterior (positive direction)
+            elif code in ["P", "p"]:
+                pa_axis = axis
+                pa_flip = True  # Points posterior (negative direction, needs flip)
+
+        # Validation
+        if 4 in [
+            rl_axis,
+            is_axis,
+            pa_axis,
+        ]:  # 4 is a sentinel to indicate any axis that was not set
+            raise ValueError(
+                f"Could not determine all axes from orientation: {''.join(orientation_codes)}"
+            )
+
+        # 3. Construct the final parameters
+        permute = [rl_axis, is_axis, pa_axis]
+        flips = [rl_flip, is_flip, pa_flip]
+
+        # spacing returned in [X, Y, Z] order by mapping data spacings
+        spacing_xyz = np.zeros(3, dtype=np.float32)
+        for a in range(3):
+            spacing_xyz[permute[a]] = spacing_ijk[a]
+
+        return spacing_xyz, permute, flips
+
+    def _update_running_statistics(self, hb_voxel: cp.ndarray):
+        """
+        Update running min/max statistics using exponential moving average.
+        Initializes from first frame with valid data.
+
+        Args:
+            hb_voxel: Current voxel data (cupy array)
+        """
+        self.frame_count += 1
+
+        # Compute current frame statistics from all voxels
+        current_min = float(cp.min(hb_voxel))
+        current_max = float(cp.max(hb_voxel))
+
+        # Initialize on first frame
+        if (
+            (self.global_min is None or self.global_max is None)
+            and current_min != 0
+            and current_max != 0
+        ):
+            self.global_min = current_min
+            self.global_max = current_max
+            print(
+                f"VoxelStreamToVolume: Initialized statistics from first frame - "
+                f"min={self.global_min:.6f}, max={self.global_max:.6f}"
+            )
+            return
+
+        # Use exponential moving average for smooth adaptation
+        # For first few frames, use larger alpha for faster convergence
+        alpha = self.stats_alpha if self.frame_count > 10 else 0.3
+
+        # Update running statistics
+        self.global_min = (1 - alpha) * self.global_min + alpha * current_min
+        self.global_max = (1 - alpha) * self.global_max + alpha * current_max
+
+        # Log statistics every 100 frames for debugging
+        if self.frame_count % 100 == 0:
+            print(
+                f"VoxelStreamToVolume: Frame {self.frame_count} - "
+                f"Running stats: min={self.global_min:.6f}, max={self.global_max:.6f} "
+                f"(current: min={current_min:.6f}, max={current_max:.6f})"
+            )
+
+    def _normalize_and_process_activated_voxels(
+        self, hb_voxel: np.ndarray, normalize_min_value: float, normalize_max_value: float
+    ):
+        """
+        Normalize the volume to [normalize_min_value, normalize_max_value] while preserving 0 as baseline.
+        Applies visualization scale factor to amplify small activations in white/gray matter.
+        """
+        # If statistics not initialized yet, return zeros (waiting for first valid frame)
+        if self.global_min is None or self.global_max is None:
+            print("VoxelStreamToVolume: Waiting for statistics initialization...")
+            return cp.zeros_like(hb_voxel, dtype=cp.float32)
+
+        # Step 1/2: Normalize while preserving 0 as baseline.
+        hb = hb_voxel.astype(cp.float32, copy=False)
+        hb_voxel_normalized = cp.zeros_like(hb, dtype=cp.float32)
+
+        if self.global_max > 0:
+            # Apply visualization scale to amplify small activations
+            # Global max includes whole brain, but we visualize white/gray matter (smaller values)
+            pos_scale = (
+                float(normalize_max_value) / float(self.global_max)
+            ) * self.visualization_scale
+            pos_mask = hb >= 0
+            hb_voxel_normalized[pos_mask] = hb[pos_mask] * pos_scale
+
+        if self.global_min < 0:
+            # Apply visualization scale to amplify small deactivations
+            neg_scale = (
+                float(abs(normalize_min_value)) / float(abs(self.global_min))
+            ) * self.visualization_scale
+            neg_mask = hb < 0
+            hb_voxel_normalized[neg_mask] = hb[neg_mask] * neg_scale
+
+        # Step 3: Clip in-place to ensure values stay in range.
+        cp.clip(
+            hb_voxel_normalized, normalize_min_value, normalize_max_value, out=hb_voxel_normalized
+        )
+
+        return hb_voxel_normalized
+
+    def _cupy_resample(self, data_gpu, src_affine, target_affine, target_shape):
+        # 1. Calculate the transform matrix (Target -> Source)
+        inv_src_affine = np.linalg.inv(src_affine)
+        mapping_matrix = inv_src_affine @ target_affine
+
+        # Extract the rotation/scaling (3x3) and translation parts
+        # SciPy/CuPy affine_transform expects: input_coords = matrix @ output_coords + offset
+        matrix = mapping_matrix[:3, :3]
+        offset = mapping_matrix[:3, 3]
+
+        # 2. Move data to GPU
+        data_gpu = cp.asarray(data_gpu, dtype=cp.float32)
+        matrix_gpu = cp.asarray(matrix)
+        offset_gpu = cp.asarray(offset)
+
+        # 3. Resample
+        resampled_gpu = cupyx.scipy.ndimage.affine_transform(
+            data_gpu,
+            matrix=matrix_gpu,
+            offset=offset_gpu,
+            output_shape=target_shape,
+            order=1,
+        )
+
+        return resampled_gpu
diff --git a/applications/bci_visualization/requirements.txt b/applications/bci_visualization/requirements.txt
new file mode 100644
index 0000000000..185626a547
--- /dev/null
+++ b/applications/bci_visualization/requirements.txt
@@ -0,0 +1,4 @@
+nibabel==5.3.2
+numpy==2.3.3
+scipy==1.16.3
+h5py==3.15.1
diff --git a/applications/bci_visualization/streams/__init__.py b/applications/bci_visualization/streams/__init__.py
new file mode 100644
index 0000000000..3f788039a0
--- /dev/null
+++ b/applications/bci_visualization/streams/__init__.py
@@ -0,0 +1,4 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
diff --git a/applications/bci_visualization/streams/base_nirs.py b/applications/bci_visualization/streams/base_nirs.py
new file mode 100644
index 0000000000..62e10bd548
--- /dev/null
+++ b/applications/bci_visualization/streams/base_nirs.py
@@ -0,0 +1,33 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import abc
+from typing import Iterator, NamedTuple
+
+import numpy as np
+from numpy.typing import NDArray
+
+
+class ChannelInfo(NamedTuple):
+    detector_module: NDArray[np.int_]
+    detector_number: NDArray[np.int_]
+    source_module: NDArray[np.int_]
+    source_number: NDArray[np.int_]
+
+    def __len__(self) -> int:
+        return len(self.source_module)
+
+
+class BaseNirsStream(abc.ABC):
+    def start(self) -> None:
+        pass
+
+    @abc.abstractmethod
+    def get_channels(self) -> ChannelInfo:
+        pass
+
+    @abc.abstractmethod
+    def stream_nirs(self) -> Iterator[np.ndarray]:
+        pass
diff --git a/applications/bci_visualization/streams/snirf.py b/applications/bci_visualization/streams/snirf.py
new file mode 100644
index 0000000000..f51081fc33
--- /dev/null
+++ b/applications/bci_visualization/streams/snirf.py
@@ -0,0 +1,156 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import logging
+from pathlib import Path
+from typing import Dict, Iterator, List, NamedTuple, cast
+
+import h5py
+import numpy as np
+from streams.base_nirs import ChannelInfo
+
+from .base_nirs import BaseNirsStream
+
+logger = logging.getLogger(__name__)
+
+NUM_MOMENTS = 3
+NUM_WAVELENGTHS = 2
+
+
+class SNIRFChannel(NamedTuple):
+    """
+    Represents a single channel in a SNIRF file.
+    """
+
+    moment: int
+    wavelength: int
+    source_module: int
+    source_number: int
+    detector_module: int
+    detector_number: int
+
+
+class SNIRFStream(BaseNirsStream):
+    """
+    Streams data from a SNIRF file.
+    See more about the spec at https://github.com/fNIRS/snirf
+    """
+
+    def __init__(self, snirf_file: Path | str) -> None:
+        self._snirf_file_path = Path(snirf_file)
+        if not self._snirf_file_path.exists():
+            raise FileNotFoundError(f"SNIRF file '{snirf_file}' does not exist")
+
+    def start(self) -> None:
+        self._snirf_file = h5py.File(self._snirf_file_path, "r")
+
+        self._channels = self._get_channels()
+        self._unique_channels = [
+            ch for ch in self._channels if ch.moment == 0 and ch.wavelength == 0
+        ]
+        logger.info("Got {} unique channels".format(len(self._unique_channels)))
+
+    def get_channels(self) -> ChannelInfo:
+        return ChannelInfo(
+            source_module=np.array([ch.source_module for ch in self._unique_channels]),
+            source_number=np.array([ch.source_number for ch in self._unique_channels]),
+            detector_module=np.array([ch.detector_module for ch in self._unique_channels]),
+            detector_number=np.array([ch.detector_number for ch in self._unique_channels]),
+        )
+
+    def _get_channels(self) -> List[SNIRFChannel]:
+        source_pos_3d: List[np.ndarray] = self._snirf_file["nirs"]["probe"]["sourcePos3D"][()]  # type: ignore
+        detector_pos_3d: List[np.ndarray] = self._snirf_file["nirs"]["probe"]["detectorPos3D"][()]  # type: ignore
+
+        source_labels: List[bytes] = self._snirf_file["nirs"]["probe"]["sourceLabels"][()]  # type: ignore
+        detector_labels: List[bytes] = self._snirf_file["nirs"]["probe"]["detectorLabels"][()]  # type: ignore
+
+        source_pos_3d_map = {}
+        for sourceIdx, sourceLabel in enumerate(source_labels):
+            m, s = sourceLabel.decode().split("S")
+            source_pos_3d_map[(int(m.replace("M", "")), int(s))] = source_pos_3d[sourceIdx]
+
+        detector_pos_3d_map = {}
+        for detectorIdx, detectorLabel in enumerate(detector_labels):
+            m, d = detectorLabel.decode().split("D")
+            detector_pos_3d_map[(int(m.replace("M", "")), int(d))] = detector_pos_3d[detectorIdx]
+
+        moments = self._snirf_file["nirs"]["probe"]["momentOrders"][()]  # type: ignore
+        data1 = cast(h5py.Dataset, self._snirf_file["nirs"]["data1"])  # type: ignore
+        channel_keys = [key for key in data1 if key.startswith("measurementList")]
+        # Sort channel keys numerically (e.g., measurementList1, measurementList2, ..., measurementList10)
+        # to match the column order in dataTimeSeries
+        channel_keys.sort(key=lambda x: int(x.replace("measurementList", "")))
+        channels: List[SNIRFChannel] = []
+        for channel_key in channel_keys:
+            channel = cast(h5py.Dataset, data1[channel_key])
+            source_module, source = (
+                source_labels[channel["sourceIndex"][()] - 1].decode().replace("M", "").split("S")
+            )
+            detector_module, detector = (
+                detector_labels[channel["detectorIndex"][()] - 1]
+                .decode()
+                .replace("M", "")
+                .split("D")
+            )
+            channels.append(
+                SNIRFChannel(
+                    moment=int(moments[channel["dataTypeIndex"][()] - 1]),  # type: ignore
+                    wavelength=int(channel["wavelengthIndex"][()] - 1),
+                    source_module=int(source_module),
+                    source_number=int(source),
+                    detector_module=int(detector_module),
+                    detector_number=int(detector),
+                )
+            )
+
+        return channels
+
+    def stream_nirs(self) -> Iterator[np.ndarray]:
+        data1 = cast(h5py.Dataset, self._snirf_file["nirs"]["data1"])  # type: ignore
+        times: np.ndarray = data1["time"][()]
+        data: np.ndarray = data1["dataTimeSeries"][()]
+
+        unique_channel_lut = {
+            (ch.source_module, ch.source_number, ch.detector_module, ch.detector_number): idx
+            for idx, ch in enumerate(self._unique_channels)
+        }
+        channel_idxs: Dict[int, Dict[int, Dict[str, List[int]]]] = {}
+        for moment in range(NUM_MOMENTS):
+            channel_idxs[moment] = {}
+            for wavelength in range(NUM_WAVELENGTHS):
+                channel_order = [
+                    (
+                        idx,
+                        unique_channel_lut.get(
+                            (
+                                ch.source_module,
+                                ch.source_number,
+                                ch.detector_module,
+                                ch.detector_number,
+                            ),
+                            -1,
+                        ),
+                    )
+                    for idx, ch in enumerate(self._channels)
+                    if ch.moment == moment and ch.wavelength == wavelength
+                ]
+                channel_idxs[moment][wavelength] = {
+                    "snirf_channel_idxs": [idx for idx, _ in channel_order],
+                    "unique_channel_idxs": [uniq_idx for _, uniq_idx in channel_order],
+                }
+
+        logger.info("Streaming {} samples from SNIRF".format(len(data)))
+        for ts, sample in zip(times, data):
+            # sample is shape (n_channels,)
+            # send (n_moments, n_unique_channels, n_wavelengths)
+            to_send = np.full((NUM_MOMENTS, len(self._unique_channels), NUM_WAVELENGTHS), np.nan)
+            for moment in range(NUM_MOMENTS):
+                for wavelength in range(NUM_WAVELENGTHS):
+                    snirf_channel_idxs = channel_idxs[moment][wavelength]["snirf_channel_idxs"]
+                    unique_channel_idxs = channel_idxs[moment][wavelength]["unique_channel_idxs"]
+                    to_send[moment, unique_channel_idxs, wavelength] = sample[snirf_channel_idxs]
+
+            yield to_send
diff --git a/applications/bci_visualization/utils/__init__.py b/applications/bci_visualization/utils/__init__.py
new file mode 100644
index 0000000000..3f788039a0
--- /dev/null
+++ b/applications/bci_visualization/utils/__init__.py
@@ -0,0 +1,4 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
diff --git a/applications/bci_visualization/utils/reconstruction/__init__.py b/applications/bci_visualization/utils/reconstruction/__init__.py
new file mode 100644
index 0000000000..3f788039a0
--- /dev/null
+++ b/applications/bci_visualization/utils/reconstruction/__init__.py
@@ -0,0 +1,4 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
diff --git a/applications/bci_visualization/utils/reconstruction/assets.py b/applications/bci_visualization/utils/reconstruction/assets.py
new file mode 100644
index 0000000000..f1b095f255
--- /dev/null
+++ b/applications/bci_visualization/utils/reconstruction/assets.py
@@ -0,0 +1,139 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import json
+import logging
+import pathlib
+from dataclasses import dataclass
+from typing import Dict, Tuple, cast
+
+import numpy as np
+from numpy.typing import NDArray
+
+from .hbo import ExtinctionCoefficient
+from .types import ChannelHeadsetMapping
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class Assets:
+    # float32 array (channels, features, wavelengths, voxels, simulation_types)
+    # channels: source-detector pairs of the Flow device
+    # features: the moments features (e.g., 0th, 1st, 2nd)
+    # wavelengths: wavelengths used in the simulation
+    # voxels: all voxels in the reconstruction volume
+    # simulation_types: mua and musp
+    mega_jacobian: NDArray[np.float32]
+
+    channel_mapping: ChannelHeadsetMapping  # mapping of channels to jacobian channel indices
+    mua: NDArray[np.float32]  # absorption coefficients
+    musp: NDArray[np.float32]  # reduced scattering coefficients
+    idxs_significant_voxels: NDArray[np.int_]  # indices of significant voxels in mega_jacobian
+
+    # these are used for creating the affine
+    # they do not contain all the voxels in the cube, just chosen ones
+    ijk: NDArray[np.float32]  # voxel ijk coordinates
+    xyz: NDArray[np.float32]  # voxel xyz coordinates
+
+    wavelengths: NDArray[np.int_]  # wavelengths
+    resolution: Tuple[float, float, float]
+    extinction_coefficients: Dict[int, ExtinctionCoefficient]  # HbO and HbR extinction coefficients
+
+
+_assets: Assets | None = None
+
+
+def _load_assets(
+    *,
+    mega_jacobian_path: pathlib.Path | str,
+    channel_mapping_path: pathlib.Path | str,
+    coefficients_path: pathlib.Path | str,
+    voxel_info_dir: pathlib.Path | str,
+) -> Assets:
+    """Load large reconstruction assets on demand.
+
+    Parameters
+    ----------
+    mega_jacobian_path : pathlib.Path | str, optional
+        Path to the mega Jacobian (.npy/.npz). Defaults to the module constant.
+    channel_mapping_path : pathlib.Path | str, optional
+        Path to the channel mapping JSON. Defaults to the module constant.
+    voxel_info_dir : pathlib.Path | str, optional
+        Directory containing voxel info files (mua, musp, idxs_significant_voxels, ijk, xyz, wavelengths).
+        Defaults to the module constant.
+
+    Throws
+    ------
+    FileNotFoundError
+        If any of the specified files are not found.
+
+    Returns
+    -------
+    Assets
+        Loaded reconstruction assets.
+    """
+    logger.info("Initializing reconstruction assets from disk.")
+
+    mega_jacobian_path = pathlib.Path(mega_jacobian_path)
+    channel_mapping_path = pathlib.Path(channel_mapping_path)
+
+    # note no memmap because we'll offload to GPU, but this is slow
+    _mega_jacobian = np.load(mega_jacobian_path).astype(np.float32, copy=False)
+
+    with channel_mapping_path.open() as f:
+        _channel_mapping = json.load(f)
+    # load extinction coefficients
+    _extinction_coefficients = ExtinctionCoefficient.from_csv(pathlib.Path(coefficients_path))
+    # load mua, musp, idxs_significant_voxels, ijk, xyz, wavelengths
+    mua_path = pathlib.Path(voxel_info_dir) / "mua.npy"
+    musp_path = pathlib.Path(voxel_info_dir) / "musp.npy"
+    idxs_significant_voxels_path = pathlib.Path(voxel_info_dir) / "idxs_significant_voxels.npy"
+    ijk_path = pathlib.Path(voxel_info_dir) / "ijk.npy"
+    xyz_path = pathlib.Path(voxel_info_dir) / "xyz.npy"
+    wavelengths_path = pathlib.Path(voxel_info_dir) / "wavelengths.npy"
+    resolution_path = pathlib.Path(voxel_info_dir) / "resolution.npy"
+
+    _mua = np.load(mua_path)
+    _musp = np.load(musp_path)
+    _idxs_significant_voxels = np.load(idxs_significant_voxels_path)
+    _ijk = np.load(ijk_path)
+    _xyz = np.load(xyz_path)
+    _wavelengths = np.load(wavelengths_path)
+    _resolution = tuple(np.load(resolution_path).tolist())
+
+    logger.info("Reconstruction assets initialization complete.")
+
+    return Assets(
+        mega_jacobian=cast(NDArray[np.float32], _mega_jacobian),
+        channel_mapping=cast(ChannelHeadsetMapping, _channel_mapping),
+        mua=cast(NDArray[np.float32], _mua),
+        musp=cast(NDArray[np.float32], _musp),
+        extinction_coefficients=_extinction_coefficients,
+        idxs_significant_voxels=cast(NDArray[np.int_], _idxs_significant_voxels),
+        ijk=cast(NDArray[np.float32], _ijk),
+        xyz=cast(NDArray[np.float32], _xyz),
+        wavelengths=cast(NDArray[np.int_], _wavelengths),
+        resolution=_resolution,
+    )
+
+
+def get_assets(
+    jacobian_path: pathlib.Path | str,
+    channel_mapping_path: pathlib.Path | str,
+    voxel_info_dir: pathlib.Path | str,
+    coefficients_path: pathlib.Path | str,
+) -> Assets:
+    global _assets
+    if _assets is None:
+        # load
+        _assets = _load_assets(
+            mega_jacobian_path=jacobian_path,
+            channel_mapping_path=channel_mapping_path,
+            voxel_info_dir=voxel_info_dir,
+            coefficients_path=coefficients_path,
+        )
+
+    return _assets
diff --git a/applications/bci_visualization/utils/reconstruction/hbo.py b/applications/bci_visualization/utils/reconstruction/hbo.py
new file mode 100644
index 0000000000..296201d3be
--- /dev/null
+++ b/applications/bci_visualization/utils/reconstruction/hbo.py
@@ -0,0 +1,141 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from __future__ import annotations
+
+import csv
+from pathlib import Path
+from typing import Dict, NamedTuple, Tuple
+
+import cupy as cp
+
+
+class ExtinctionCoefficient(NamedTuple):
+    """
+    Matches the row structure of the extinction coefficient CSV asset.
+    Provides the molar extinction coefficients for various chromophores at specific wavelengths.
+    """
+
+    Wavelength: int
+    HbO: float
+    deoxyHb: float
+    Water: float
+    Lipids: float
+    LuTex: float
+    GdTex: float
+
+    @classmethod
+    def from_csv(cls, path: Path) -> Dict[int, ExtinctionCoefficient]:
+        def _parse_wavelength(value: str) -> int:
+            # Some datasets store wavelength as scientific notation (e.g. "6.0000000e+02").
+            # Parse as float then round to nearest integer nm.
+            return int(round(float(value)))
+
+        with open(path, "r") as f:
+            csv_reader = csv.DictReader(f)
+            return {
+                _parse_wavelength(row["Wavelength"]): cls(
+                    Wavelength=_parse_wavelength(row["Wavelength"]),
+                    HbO=float(row["HbO"]),
+                    deoxyHb=float(row["deoxyHb"]),
+                    Water=float(row["Water"]),
+                    Lipids=float(row["Lipids"]),
+                    LuTex=float(row["LuTex"]),
+                    GdTex=float(row["GdTex"]),
+                )
+                for row in csv_reader
+            }
+
+    def get_oxy_deoxy_coefficients(self) -> cp.ndarray:
+        return cp.array(
+            [
+                self.HbO,
+                self.deoxyHb,
+            ],
+            dtype=cp.float32,
+        )
+
+
+class HbO:
+    def __init__(self, coefficients: Dict[int, ExtinctionCoefficient]) -> None:
+        self._coefficients = coefficients
+        self._cached_coefficients: cp.ndarray | None = None
+
+    def _get_molar_extinction_coefficients(self, wavelength: int) -> ExtinctionCoefficient:
+        """Get the molar extinction coefficients for a specific wavelength. These have been converted to µa values and
+        interpolated to achieve 1 nm wavelength resolution.
+
+        These values for the molar extinction coefficients e are in [m^-1 / µM], and were originally compiled by
+        Scott Prahl (prahl@ece.ogi.edu) using data from:
+        W. B. Gratzer, Med. Res. Council Labs, Holly Hill, London
+        N. Kollias, Wellman Laboratories, Harvard Medical School, Boston
+        https://omlc.org/spectra/hemoglobin/summary.html
+
+        Parameters
+        ----------
+        wavelength : int
+            Wavelength in nanometers.
+
+        Returns
+        -------
+        NDArray[np.float32]
+            Extinction coefficients in [m^-1 / µM] for oxy and deoxy-hemoglobin, water, lipids, LuTex, and GdTex.
+        """
+
+        coefficient = self._coefficients.get(round(wavelength))
+        if coefficient is None:
+            raise ValueError(
+                f"No entry found for {wavelength} nm. Please enter a valid integer wavelength between 600-1000 nm."
+            )
+
+        return coefficient
+
+    def convert_mua_to_hb(
+        self,
+        data_mua: cp.ndarray,
+        wavelengths: tuple,
+        idxs_significant_voxels: cp.ndarray,
+    ) -> Tuple[cp.ndarray, cp.ndarray]:
+        """Converts mua to Hb in voxel space.
+
+        Parameters
+        ----------
+        data_mua : NDArray[np.float32]
+            µa reconstructed values in voxel space.
+        wavelengths : tuple
+            Wavelengths (nm).
+        idxs_significant_voxels
+            Indices of significant voxels over the threshold of
+            max sensitivity (jacobian) across voxels.
+
+        Returns
+        -------
+        data_hbo : NDArray[np.float32]
+            Oxygenated hemoglobin in voxel space (µM).
+        data_hbr : NDArray[np.float32]
+            Deoxygenated hemoglobin in voxel space (µM).
+        """
+        num_voxels, _ = data_mua.shape
+
+        if self._cached_coefficients is None:
+            self._cached_coefficients = cp.asarray(
+                [
+                    self._get_molar_extinction_coefficients(wavelength).get_oxy_deoxy_coefficients()
+                    for wavelength in wavelengths
+                ]
+            )
+
+        idxs_significant_voxels = cp.asarray(idxs_significant_voxels)
+        sample_mua = cp.zeros((len(wavelengths), num_voxels))
+        sample_mua[:, idxs_significant_voxels] = data_mua[idxs_significant_voxels, :].T
+        sample_hb = cp.linalg.solve(self._cached_coefficients, sample_mua)
+
+        if sample_hb.shape != (len(wavelengths), num_voxels):
+            raise ValueError(
+                f"Sample Hb must be 2D array with {len(wavelengths)} rows and {num_voxels} columns, got shape {sample_hb.shape}"
+            )
+        data_hbo = sample_hb[0]
+        data_hbr = sample_hb[1]
+        return data_hbo, data_hbr
diff --git a/applications/bci_visualization/utils/reconstruction/reg_inv.py b/applications/bci_visualization/utils/reconstruction/reg_inv.py
new file mode 100644
index 0000000000..bbdff501ea
--- /dev/null
+++ b/applications/bci_visualization/utils/reconstruction/reg_inv.py
@@ -0,0 +1,133 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+import logging
+
+import cupy as cp
+
+logger = logging.getLogger(__name__)
+
+# Cache holds CuPy arrays when running GPU-only. (Keyed by wavelength index.)
+_HESSIAN_CACHE: dict[int, object] = {}
+
+MAX_REASONABLE_COND_RATIO = 10
+
+
+def solve_regularized_system(
+    data_jacobians,
+    data_rhs,
+    wavelength_idx: int,
+    reg: float,
+) -> object:
+    """
+    Parameters
+    ----------
+    data_jacobians : NDArray[np.float32]
+        Jacobian matrix of shape (features * channels, reconstruction_elements * 2).
+    data_rhs : NDArray[np.float32]
+        Right-hand side data of shape (features * channels).
+    reg : float
+        Regularization parameter λ.
+
+    Returns
+    -------
+    NDArray[np.float32]
+        Solution array of shape (reconstruction_elements * 2).
+    """
+    # add sample dimension
+    data_rhs = cp.asarray(data_rhs).reshape(1, -1)
+
+    # Form Hessian and get pre-computed matrix properties
+    hessian_reg = _build_regularized_system(
+        data_jacobians,
+        wavelength_idx,
+        reg,
+    )
+
+    # Dual formulation: solve smaller system, then back-substitute
+    alpha = _solve_square_system(hessian_reg, data_rhs.T)
+    solution = data_jacobians.T @ alpha
+    return solution.T.squeeze()  # remove sample dimension
+
+
+def _build_regularized_system(
+    data_jacobians,
+    wavelength_idx: int,
+    reg: float,
+) -> object:
+    """Build regularized system matrix.
+
+    Parameters
+    ----------
+    data_jacobians : NDArray[np.float32]
+        Jacobian matrix.
+    wavelength_idx : int
+        Wavelength index for caching.
+    reg : float
+        Regularization parameter.
+
+    Returns
+    -------
+    NDArray[np.float32]
+        Regularized system matrix
+    """
+    global _HESSIAN_CACHE
+    data_hessian_reg = _HESSIAN_CACHE.get(wavelength_idx)
+    if data_hessian_reg is not None:
+        logger.debug("Reusing cached Hessian")
+        return data_hessian_reg
+
+    # Smaller SPD system: (J J^T + λI) for underdetermined case
+    data_hessian = data_jacobians @ data_jacobians.T
+
+    data_hessian_reg = data_hessian + reg * cp.sqrt(cp.linalg.norm(data_hessian)) * cp.eye(
+        data_hessian.shape[0], dtype=data_jacobians.dtype
+    )
+
+    _HESSIAN_CACHE[wavelength_idx] = data_hessian_reg
+    logger.debug("Cached Hessian for reuse")
+
+    return data_hessian_reg
+
+
+def _solve_square_system(
+    A,
+    b,
+) -> object:
+    """
+    Parameters
+    ----------
+    A : NDArray[np.float32]
+        Square coefficient matrix of the linear system (typically a Hessian).
+    b : NDArray[np.float32]
+        Right-hand side vector or matrix.
+
+    Returns
+    -------
+    NDArray[np.float32]
+        Solution to the linear system Ax = b.
+    """
+
+    # Validate input
+    if A.ndim != 2 or A.shape[0] != A.shape[1]:
+        raise ValueError(f"Expected square matrix, got shape {A.shape}")
+    if b.ndim not in {1, 2} or b.shape[0] != A.shape[0]:
+        raise ValueError(f"Incompatible shapes: A={A.shape}, b={b.shape}")
+    if not cp.all(cp.isfinite(A)):
+        raise ValueError("Matrix A contains non-finite values")
+    if not cp.all(cp.isfinite(b)):
+        raise ValueError("Vector b contains non-finite values")
+
+    # Ensure symmetry for numerical stability
+    A = 0.5 * (A + A.T)
+
+    # Regular inverse
+    result = cp.linalg.solve(A, b)
+    if not cp.all(cp.isfinite(result)):
+        raise RuntimeError(
+            "Solver produced non-finite values. This may indicate numerical instability. "
+            "Check regularization parameter or input data quality."
+        )
+    return result
diff --git a/applications/bci_visualization/utils/reconstruction/types.py b/applications/bci_visualization/utils/reconstruction/types.py
new file mode 100644
index 0000000000..811660c978
--- /dev/null
+++ b/applications/bci_visualization/utils/reconstruction/types.py
@@ -0,0 +1,8 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2026 Kernel.
+SPDX-License-Identifier: Apache-2.0
+"""
+
+from typing import Tuple
+
+ChannelHeadsetMapping = dict[str, dict[str, dict[str, dict[str, Tuple[int]]]]]
diff --git a/operators/volume_renderer/dataset.cpp b/operators/volume_renderer/dataset.cpp
index 62caa6adb9..74924941fb 100644
--- a/operators/volume_renderer/dataset.cpp
+++ b/operators/volume_renderer/dataset.cpp
@@ -1,4 +1,4 @@
-/* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/* SPDX-FileCopyrightText: Copyright (c) 2023-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -55,7 +55,8 @@ void Dataset::SetVolume(Types type, const std::array<float, 3>& spacing,
                         const std::array<uint32_t, 3>& permute_axis,
                         const std::array<bool, 3>& flip_axes,
                         const std::vector<clara::viz::Vector2f>& element_range,
-                        const nvidia::gxf::Handle<nvidia::gxf::Tensor>& tensor) {
+                        const nvidia::gxf::Handle<nvidia::gxf::Tensor>& tensor,
+                        cudaStream_t cuda_stream) {
   DataArray data_array;
 
   switch (tensor->element_type()) {
@@ -109,7 +110,8 @@ void Dataset::SetVolume(Types type, const std::array<float, 3>& spacing,
         std::make_unique<clara::viz::CudaMemory>(volume_size));
 
     {
-      std::unique_ptr<clara::viz::IBlob::AccessGuard> access_gpu = cur_data_array->blob_->Access();
+      std::unique_ptr<clara::viz::IBlob::AccessGuard> access_gpu =
+          cur_data_array->blob_->Access(cuda_stream);
 
       cudaMemcpy3DParms copy_params = {0};
 
@@ -140,7 +142,7 @@ void Dataset::SetVolume(Types type, const std::array<float, 3>& spacing,
           return;
       }
 
-      if (cudaMemcpy3D(&copy_params) != cudaSuccess) {
+      if (cudaMemcpy3DAsync(&copy_params, cuda_stream) != cudaSuccess) {
         holoscan::log_error("Failed to copy to GPU memory");
         return;
       }
diff --git a/operators/volume_renderer/dataset.hpp b/operators/volume_renderer/dataset.hpp
index b16e860552..098c833ae2 100644
--- a/operators/volume_renderer/dataset.hpp
+++ b/operators/volume_renderer/dataset.hpp
@@ -1,4 +1,4 @@
-/* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -56,11 +56,12 @@ class Dataset {
    * then the range is calculated form the data. For example a full range of a uint8 data type is
    * defined by {0.f, 255.f}.
    * @param tensor volume data
+   * @param cuda_stream CUDA stream
    */
   void SetVolume(Types type, const std::array<float, 3>& spacing,
                  const std::array<uint32_t, 3>& permute_axis, const std::array<bool, 3>& flip_axes,
                  const std::vector<clara::viz::Vector2f>& element_range,
-                 const nvidia::gxf::Handle<nvidia::gxf::Tensor>& tensor);
+                 const nvidia::gxf::Handle<nvidia::gxf::Tensor>& tensor, cudaStream_t cuda_stream);
 
   /**
    * Reset a volume of the dataset.
diff --git a/operators/volume_renderer/python/CMakeLists.txt b/operators/volume_renderer/python/CMakeLists.txt
index 7d182e2171..ab0cd595c4 100644
--- a/operators/volume_renderer/python/CMakeLists.txt
+++ b/operators/volume_renderer/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/operators/volume_renderer/python/volume_renderer.cpp b/operators/volume_renderer/python/volume_renderer.cpp
index 9cf0eb2566..ac20d0282b 100644
--- a/operators/volume_renderer/python/volume_renderer.cpp
+++ b/operators/volume_renderer/python/volume_renderer.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,7 @@
 #include <holoscan/core/operator_spec.hpp>
 #include <holoscan/core/resources/gxf/allocator.hpp>
 #include "holoscan/core/resources/gxf/cuda_stream_pool.hpp"
+#include <holoscan/python/core/emitter_receiver_registry.hpp>
 
 using std::string_literals::operator""s;
 using pybind11::literals::operator""_a;
@@ -120,6 +121,12 @@ PYBIND11_MODULE(_volume_renderer, m) {
            "name"_a = "volume_renderer"s,
            doc::VolumeRendererOp::doc_VolumeRendererOp_python)
       .def("setup", &VolumeRendererOp::setup, "spec"_a, doc::VolumeRendererOp::doc_setup);
+
+  // Register custom types with the emitter/receiver registry
+  m.def("register_types", [](EmitterReceiverRegistry& registry) {
+    registry.add_emitter_receiver<std::array<uint32_t, 3>>("std::array<uint32_t, 3>"s);
+    registry.add_emitter_receiver<std::array<bool, 3>>("std::array<bool, 3>"s);
+  });
 }  // PYBIND11_MODULE
 
 }  // namespace holoscan::ops
diff --git a/operators/volume_renderer/python/volume_renderer_pydoc.hpp b/operators/volume_renderer/python/volume_renderer_pydoc.hpp
index 04f435e3ec..04695685a7 100644
--- a/operators/volume_renderer/python/volume_renderer_pydoc.hpp
+++ b/operators/volume_renderer/python/volume_renderer_pydoc.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/operators/volume_renderer/video_buffer_blob.hpp b/operators/volume_renderer/video_buffer_blob.hpp
index 91e83cc56f..0d940328af 100644
--- a/operators/volume_renderer/video_buffer_blob.hpp
+++ b/operators/volume_renderer/video_buffer_blob.hpp
@@ -1,4 +1,4 @@
-/* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/operators/volume_renderer/volume_renderer.cpp b/operators/volume_renderer/volume_renderer.cpp
index 1cbcb3eeb8..9bf6c8901a 100644
--- a/operators/volume_renderer/volume_renderer.cpp
+++ b/operators/volume_renderer/volume_renderer.cpp
@@ -1,4 +1,4 @@
-/* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights
+/* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights
  * reserved. SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -225,7 +225,7 @@ class ImageService : public clara::viz::MessageReceiver, public clara::viz::Mess
 
 class VolumeRendererOp::Impl {
  public:
-  bool receive_volume(InputContext& input, Dataset::Types type);
+  bool receive_volume(InputContext& input, ExecutionContext& context, Dataset::Types type);
 
   Parameter<std::vector<IOSpec*>> settings_;
   Parameter<std::vector<IOSpec*>> merge_settings_;
@@ -281,9 +281,14 @@ class VolumeRendererOp::Impl {
   bool default_enable_foveation_ = false;
   float default_warp_full_resolution_size_ = 1.f;
   float default_warp_resolution_scale_ = 1.f;
+  /// Cached camera pose for use when no new camera pose input is received.
+  /// Initialized to identity by default.
+  clara::viz::Matrix4x4 cached_camera_pose_;
 };
 
-bool VolumeRendererOp::Impl::receive_volume(InputContext& input, Dataset::Types type) {
+bool VolumeRendererOp::Impl::receive_volume(InputContext& input,
+                                            ExecutionContext& context,
+                                            Dataset::Types type) {
   std::string name(type == Dataset::Types::Density ? "density" : "mask");
 
   auto volume = input.receive<holoscan::gxf::Entity>((name + "_volume").c_str());
@@ -293,8 +298,24 @@ bool VolumeRendererOp::Impl::receive_volume(InputContext& input, Dataset::Types
   auto flip_axes_input = input.receive<std::array<bool, 3>>((name + "_flip_axes").c_str());
 
   if (volume) {
-    nvidia::gxf::Handle<nvidia::gxf::Tensor> volume_tensor =
-        static_cast<nvidia::gxf::Entity>(volume.value()).get<nvidia::gxf::Tensor>("volume").value();
+    nvidia::gxf::Expected<nvidia::gxf::Entity> entity = volume.value();
+    const gxf_result_t result = cuda_stream_handler_.fromMessage(context.context(), entity);
+    if (result != GXF_SUCCESS) {
+      throw std::runtime_error("Failed to get the CUDA stream from incoming messages");
+    }
+    const cudaStream_t cuda_stream = cuda_stream_handler_.getCudaStream(context.context());
+
+    // MEMORY LEAK FIX: Clear old volumes if we receive a new volume
+    // to avoid unbounded memory growth
+    dataset_.ResetVolume(type);
+
+    auto maybe_tensor =
+        static_cast<nvidia::gxf::Entity>(volume.value()).get<nvidia::gxf::Tensor>("volume");
+    if (!maybe_tensor) {
+      throw std::runtime_error("VolumeRendererOp: No volume tensor found");
+    }
+
+    nvidia::gxf::Handle<nvidia::gxf::Tensor> volume_tensor = maybe_tensor.value();
 
     std::array<float, 3> spacing{1.f, 1.f, 1.f};
     std::array<uint32_t, 3> permute_axis{0, 1, 2};
@@ -320,8 +341,8 @@ bool VolumeRendererOp::Impl::receive_volume(InputContext& input, Dataset::Types
       }
       if (has_range) { element_range.push_back(range); }
     }
-
-    dataset_.SetVolume(type, spacing, permute_axis, flip_axes, element_range, volume_tensor);
+    dataset_.SetVolume(type, spacing, permute_axis, flip_axes, element_range,
+                       volume_tensor, cuda_stream);
 
     return true;
   }
@@ -530,15 +551,15 @@ void VolumeRendererOp::setup(OperatorSpec& spec) {
 void VolumeRendererOp::compute(InputContext& input, OutputContext& output,
                                ExecutionContext& context) {
   // get the density volumes
-  bool new_volume = impl_->receive_volume(input, Dataset::Types::Density);
-  if (!impl_->receive_volume(input, Dataset::Types::Segmentation)) {
-    // there are datasets without segmentation volume, if we receive a density volume
-    // only, reset the segmentation volume
+  bool new_volume = impl_->receive_volume(input, context, Dataset::Types::Density);
+  bool new_mask = impl_->receive_volume(input, context, Dataset::Types::Segmentation);
+
+  // there are datasets without mask volume, if we receive a density volume
+  // only, reset the mask volume
+  if (new_volume && !new_mask) {
     impl_->dataset_.ResetVolume(Dataset::Types::Segmentation);
-  } else {
-    new_volume = true;
   }
-  if (new_volume) {
+  if (new_volume || new_mask) {
     impl_->dataset_.Configure(impl_->data_config_interface_);
     impl_->dataset_.Set(*impl_->data_interface_.get());
 
@@ -714,7 +735,7 @@ void VolumeRendererOp::compute(InputContext& input, OutputContext& output,
     }
 
     // Two types for the camera pose are supported, either a 4x4 matrix, or a nvidia::gxf::Pose3D
-    // object
+    // object. Matrix4x4 is initialized to identity by default.
     clara::viz::Matrix4x4 pose;
     auto camera_pose_input = input.receive<std::any>("camera_pose");
     if (camera_pose_input) {
@@ -744,6 +765,12 @@ void VolumeRendererOp::compute(InputContext& input, OutputContext& output,
         impl_->initial_inv_matrix_set_ = true;
       }
       pose = impl_->initial_inv_matrix_ * pose;
+
+      // Cache the pose for use when no input is received
+      impl_->cached_camera_pose_ = pose;
+    } else {
+      // Use the cached pose when no camera pose input is received
+      pose = impl_->cached_camera_pose_;
     }
 
     // apply the initial pose from the configuration file
@@ -817,17 +844,27 @@ void VolumeRendererOp::compute(InputContext& input, OutputContext& output,
     }
   }
 
-  // render
   const std::shared_ptr<VideoBufferBlob> color_buffer_blob(new VideoBufferBlob(color_buffer));
   std::shared_ptr<VideoBufferBlob> depth_buffer_blob;
-  if (depth_buffer) { depth_buffer_blob = std::make_shared<VideoBufferBlob>(depth_buffer); }
-  impl_->image_service_->Render(color_buffer->video_frame_info().width,
-                                color_buffer->video_frame_info().height,
-                                color_buffer_blob,
-                                depth_buffer_blob);
-
-  // then wait for the message with the encoded data
-  const auto image = impl_->image_service_->WaitForRenderedImage();
+  if (impl_->dataset_.GetNumberFrames() > 0) {
+    // render
+    if (depth_buffer) { depth_buffer_blob = std::make_shared<VideoBufferBlob>(depth_buffer); }
+    impl_->image_service_->Render(color_buffer->video_frame_info().width,
+                                  color_buffer->video_frame_info().height,
+                                  color_buffer_blob,
+                                  depth_buffer_blob);
+
+    // then wait for the message with the encoded data
+    const auto image = impl_->image_service_->WaitForRenderedImage();
+  } else {
+    // no density volume, sleep for the time slot
+    clara::viz::RenderSettingsInterface::AccessGuard access(
+        impl_->render_settings_interface_);
+    HOLOSCAN_LOG_INFO("No density volume, sleeping for {} milliseconds",
+                      access->time_slot.Get());
+    std::this_thread::sleep_for(
+        std::chrono::milliseconds(static_cast<int>(access->time_slot.Get())));
+  }
 
   // Add both a CUDA event and the CUDA stream to the outgoing message,
   // some operators expect an event and some a stream to synchronize with
diff --git a/operators/volume_renderer/volume_renderer.hpp b/operators/volume_renderer/volume_renderer.hpp
index 9cd6160903..f383e7a856 100644
--- a/operators/volume_renderer/volume_renderer.hpp
+++ b/operators/volume_renderer/volume_renderer.hpp
@@ -1,4 +1,4 @@
-/* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");