diff --git a/dogm/demo/main.cpp b/dogm/demo/main.cpp
index 2be2c89..cc6ffa8 100644
--- a/dogm/demo/main.cpp
+++ b/dogm/demo/main.cpp
@@ -80,7 +80,7 @@ int main(int argc, const char** argv)
 
     for (int step = 0; step < num_simulation_steps; ++step)
     {
-        dogm::MeasurementCell* meas_grid = grid_generator.generateGrid(sim_data[step].measurements);
+        dogm::MeasurementCellsSoA meas_grid = grid_generator.generateGrid(sim_data[step].measurements);
 
         const auto update_grid_caller = [&grid_map](auto&&... args) {
             grid_map.updateGrid(std::forward<decltype(args)>(args)...);
diff --git a/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h b/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h
index 49eb32d..a89d29a 100644
--- a/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h
+++ b/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h
@@ -6,19 +6,14 @@
 
 #include <device_launch_parameters.h>
 
-namespace dogm
-{
-struct MeasurementCell;
-}
-
 __global__ void createPolarGridTextureKernel(cudaSurfaceObject_t polar, const float* __restrict__ measurements,
                                              int width, int height, float resolution);
 
 __global__ void fusePolarGridTextureKernel(cudaSurfaceObject_t polar, const float* __restrict__ measurements, int width,
                                            int height, float resolution);
 
-__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid,
-                                                     cudaSurfaceObject_t cart, int grid_size);
+__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, cudaSurfaceObject_t cart,
+                                                     int grid_size);
 
-__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid,
-                                                 const float2* __restrict__ grid, int grid_size);
+__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, const float2* __restrict__ grid,
+                                                 int grid_size);
diff --git a/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h b/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h
index 0d210e5..7c74fea 100644
--- a/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h
+++ b/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h
@@ -23,10 +23,10 @@ class LaserMeasurementGrid
     LaserMeasurementGrid(const Params& params, float grid_length, float resolution);
     ~LaserMeasurementGrid();
 
-    dogm::MeasurementCell* generateGrid(const std::vector<float>& measurements);
+    dogm::MeasurementCellsSoA generateGrid(const std::vector<float>& measurements);
 
 private:
-    dogm::MeasurementCell* meas_grid;
+    dogm::MeasurementCellsSoA meas_grid;
     int grid_size;
 
     Params params;
diff --git a/dogm/demo/simulator/mapping/kernel/measurement_grid.cu b/dogm/demo/simulator/mapping/kernel/measurement_grid.cu
index 4337ba9..16147c3 100644
--- a/dogm/demo/simulator/mapping/kernel/measurement_grid.cu
+++ b/dogm/demo/simulator/mapping/kernel/measurement_grid.cu
@@ -112,8 +112,8 @@ __global__ void fusePolarGridTextureKernel(cudaSurfaceObject_t polar, const floa
     }
 }
 
-__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid,
-                                                     cudaSurfaceObject_t cart, int grid_size)
+__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, cudaSurfaceObject_t cart,
+                                                     int grid_size)
 {
     const int x = blockIdx.x * blockDim.x + threadIdx.x;
     const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -123,16 +123,16 @@ __global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCell* __re
     {
         float4 color = surf2Dread<float4>(cart, x * sizeof(float4), y);
 
-        meas_grid[index].occ_mass = color.x;
-        meas_grid[index].free_mass = color.y;
+        meas_grid.occ_mass[index] = color.x;
+        meas_grid.free_mass[index] = color.y;
 
-        meas_grid[index].likelihood = 1.0f;
-        meas_grid[index].p_A = 1.0f;
+        meas_grid.likelihood[index] = 1.0f;
+        meas_grid.p_A[index] = 1.0f;
     }
 }
 
-__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid,
-                                                 const float2* __restrict__ grid, int grid_size)
+__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, const float2* __restrict__ grid,
+                                                 int grid_size)
 {
     const int x = blockIdx.x * blockDim.x + threadIdx.x;
     const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -142,10 +142,10 @@ __global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCell* __restri
     {
         float2 masses = grid[index];
 
-        meas_grid[index].occ_mass = masses.x;
-        meas_grid[index].free_mass = masses.y;
+        meas_grid.occ_mass[index] = masses.x;
+        meas_grid.free_mass[index] = masses.y;
 
-        meas_grid[index].likelihood = 1.0f;
-        meas_grid[index].p_A = 1.0f;
+        meas_grid.likelihood[index] = 1.0f;
+        meas_grid.p_A[index] = 1.0f;
     }
 }
diff --git a/dogm/demo/simulator/mapping/laser_to_meas_grid.cu b/dogm/demo/simulator/mapping/laser_to_meas_grid.cu
index 035afd6..0c1ecb4 100644
--- a/dogm/demo/simulator/mapping/laser_to_meas_grid.cu
+++ b/dogm/demo/simulator/mapping/laser_to_meas_grid.cu
@@ -12,23 +12,23 @@ LaserMeasurementGrid::LaserMeasurementGrid(const Params& params, float grid_leng
 {
     int grid_cell_count = grid_size * grid_size;
 
-    CHECK_ERROR(cudaMalloc(&meas_grid, grid_cell_count * sizeof(dogm::MeasurementCell)));
+    meas_grid.init(grid_cell_count, true);
 
     renderer = std::make_unique<Renderer>(grid_size, params.fov, grid_length, params.max_range);
 }
 
 LaserMeasurementGrid::~LaserMeasurementGrid()
 {
-    CHECK_ERROR(cudaFree(meas_grid));
+    meas_grid.free();
 }
 
-dogm::MeasurementCell* LaserMeasurementGrid::generateGrid(const std::vector<float>& measurements)
+dogm::MeasurementCellsSoA LaserMeasurementGrid::generateGrid(const std::vector<float>& measurements)
 {
     const int num_measurements = measurements.size();
 
     float* d_measurements;
-    CHECK_ERROR(cudaMalloc(&d_measurements, num_measurements * sizeof(float)));
-    CHECK_ERROR(
+    CUDA_CALL(cudaMalloc(&d_measurements, num_measurements * sizeof(float)));
+    CUDA_CALL(
         cudaMemcpy(d_measurements, measurements.data(), num_measurements * sizeof(float), cudaMemcpyHostToDevice));
 
     const int polar_width = num_measurements;
@@ -47,7 +47,7 @@ dogm::MeasurementCell* LaserMeasurementGrid::generateGrid(const std::vector<floa
     createPolarGridTextureKernel<<<grid_dim, dim_block>>>(polar_surface, d_measurements, polar_width, polar_height,
                                                           params.resolution);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
     polar_texture.endCudaAccess(polar_surface);
 
     // render cartesian image to texture using polar texture
@@ -60,11 +60,11 @@ dogm::MeasurementCell* LaserMeasurementGrid::generateGrid(const std::vector<floa
     // transform RGBA texture to measurement grid
     cartesianGridToMeasurementGridKernel<<<cart_grid_dim, dim_block>>>(meas_grid, cartesian_surface, grid_size);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
     framebuffer->endCudaAccess(cartesian_surface);
 
-    CHECK_ERROR(cudaFree(d_measurements));
-    CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaFree(d_measurements));
+    CUDA_CALL(cudaDeviceSynchronize());
 
     return meas_grid;
 }
diff --git a/dogm/demo/simulator/mapping/opengl/framebuffer.cpp b/dogm/demo/simulator/mapping/opengl/framebuffer.cpp
index 8c67d33..4c7144e 100644
--- a/dogm/demo/simulator/mapping/opengl/framebuffer.cpp
+++ b/dogm/demo/simulator/mapping/opengl/framebuffer.cpp
@@ -23,7 +23,7 @@ Framebuffer::Framebuffer(int width, int height)
 
     glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, 0);
 
-    CHECK_ERROR(cudaGraphicsGLRegisterImage(&resource, texture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsReadOnly));
+    CUDA_CALL(cudaGraphicsGLRegisterImage(&resource, texture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsReadOnly));
 
     glBindFramebuffer(GL_FRAMEBUFFER, 0);
 }
@@ -36,23 +36,23 @@ Framebuffer::~Framebuffer()
 
 void Framebuffer::beginCudaAccess(cudaSurfaceObject_t* surfaceObject)
 {
-    CHECK_ERROR(cudaGraphicsMapResources(1, &resource, nullptr));
+    CUDA_CALL(cudaGraphicsMapResources(1, &resource, nullptr));
 
     cudaArray_t cudaArray;
-    CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0));
+    CUDA_CALL(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0));
 
     cudaResourceDesc resourceDesc;
     memset(&resourceDesc, 0, sizeof(cudaResourceDesc));
     resourceDesc.resType = cudaResourceTypeArray;
     resourceDesc.res.array.array = cudaArray;
 
-    CHECK_ERROR(cudaCreateSurfaceObject(surfaceObject, &resourceDesc));
+    CUDA_CALL(cudaCreateSurfaceObject(surfaceObject, &resourceDesc));
 }
 
 void Framebuffer::endCudaAccess(cudaSurfaceObject_t surfaceObject)
 {
-    CHECK_ERROR(cudaGraphicsUnmapResources(1, &resource, nullptr));
-    CHECK_ERROR(cudaDestroySurfaceObject(surfaceObject));
+    CUDA_CALL(cudaGraphicsUnmapResources(1, &resource, nullptr));
+    CUDA_CALL(cudaDestroySurfaceObject(surfaceObject));
 }
 
 void Framebuffer::bind()
diff --git a/dogm/demo/simulator/mapping/opengl/texture.cpp b/dogm/demo/simulator/mapping/opengl/texture.cpp
index 682a440..63a06a5 100644
--- a/dogm/demo/simulator/mapping/opengl/texture.cpp
+++ b/dogm/demo/simulator/mapping/opengl/texture.cpp
@@ -31,7 +31,7 @@ Texture::Texture(int width, int height, float anisotropy_level)
     float color[] = {0.0f, 0.0f, 1.0f, 1.0f};
     glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color);
 
-    CHECK_ERROR(
+    CUDA_CALL(
         cudaGraphicsGLRegisterImage(&resource, texture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsSurfaceLoadStore));
 
     glBindTexture(GL_TEXTURE_2D, 0);
@@ -44,24 +44,24 @@ Texture::~Texture()
 
 void Texture::beginCudaAccess(cudaSurfaceObject_t* surfaceObject)
 {
-    CHECK_ERROR(cudaGraphicsMapResources(1, &resource, nullptr));
+    CUDA_CALL(cudaGraphicsMapResources(1, &resource, nullptr));
 
     cudaArray_t cudaArray;
-    CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0));
+    CUDA_CALL(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0));
 
     cudaResourceDesc resourceDesc;
     memset(&resourceDesc, 0, sizeof(cudaResourceDesc));
     resourceDesc.resType = cudaResourceTypeArray;
     resourceDesc.res.array.array = cudaArray;
 
-    CHECK_ERROR(cudaCreateSurfaceObject(surfaceObject, &resourceDesc));
+    CUDA_CALL(cudaCreateSurfaceObject(surfaceObject, &resourceDesc));
 }
 
 void Texture::endCudaAccess(cudaSurfaceObject_t surfaceObject)
 {
-    CHECK_ERROR(cudaGraphicsUnmapResources(1, &resource, nullptr));
-    CHECK_ERROR(cudaGraphicsUnregisterResource(resource));
-    CHECK_ERROR(cudaDestroySurfaceObject(surfaceObject));
+    CUDA_CALL(cudaGraphicsUnmapResources(1, &resource, nullptr));
+    CUDA_CALL(cudaGraphicsUnregisterResource(resource));
+    CUDA_CALL(cudaDestroySurfaceObject(surfaceObject));
 }
 
 void Texture::generateMipMap()
diff --git a/dogm/demo/utils/image_creation.cpp b/dogm/demo/utils/image_creation.cpp
index cde8244..4b70c3a 100644
--- a/dogm/demo/utils/image_creation.cpp
+++ b/dogm/demo/utils/image_creation.cpp
@@ -32,29 +32,34 @@ std::vector<Point<dogm::GridCell>> computeCellsWithVelocity(const dogm::DOGM& gr
         {
             int index = y * grid_map.getGridSize() + x;
 
-            const dogm::GridCell& cell = grid_cells[index];
-            float occ = pignistic_transformation(cell.free_mass, cell.occ_mass);
+            float occ = pignistic_transformation(grid_cells.free_mass[index], grid_cells.occ_mass[index]);
             cv::Mat velocity_mean(2, 1, CV_32FC1);
-            velocity_mean.at<float>(0) = cell.mean_x_vel;
-            velocity_mean.at<float>(1) = cell.mean_y_vel;
+            velocity_mean.at<float>(0) = grid_cells.mean_x_vel[index];
+            velocity_mean.at<float>(1) = grid_cells.mean_y_vel[index];
 
             cv::Mat velocity_covar(2, 2, CV_32FC1);
-            velocity_covar.at<float>(0, 0) = cell.var_x_vel;
-            velocity_covar.at<float>(1, 0) = cell.covar_xy_vel;
-            velocity_covar.at<float>(0, 1) = cell.covar_xy_vel;
-            velocity_covar.at<float>(1, 1) = cell.var_y_vel;
+            velocity_covar.at<float>(0, 0) = grid_cells.var_x_vel[index];
+            velocity_covar.at<float>(1, 0) = grid_cells.covar_xy_vel[index];
+            velocity_covar.at<float>(0, 1) = grid_cells.covar_xy_vel[index];
+            velocity_covar.at<float>(1, 1) = grid_cells.var_y_vel[index];
 
             cv::Mat velocity_normalized_by_variance = velocity_mean.t() * velocity_covar.inv() * velocity_mean;
 
             if (occ >= min_occupancy_threshold &&
                 velocity_normalized_by_variance.at<float>(0, 0) >= min_velocity_threshold)
             {
-                Point<dogm::GridCell> point;
+                Point<dogm::GridCell> point{};
 
                 // Storing the point as grid index to be consistent with cell.mean_x_vel and cell.mean_y_vel
                 point.x = static_cast<float>(x);
                 point.y = static_cast<float>(y);
-                point.data = cell;
+                point.data.free_mass = grid_cells.free_mass[index];
+                point.data.occ_mass = grid_cells.occ_mass[index];
+                point.data.mean_x_vel = grid_cells.mean_x_vel[index];
+                point.data.mean_y_vel = grid_cells.mean_y_vel[index];
+                point.data.var_x_vel = grid_cells.var_x_vel[index];
+                point.data.var_y_vel = grid_cells.var_y_vel[index];
+                point.data.covar_xy_vel = grid_cells.covar_xy_vel[index];
                 point.cluster_id = UNCLASSIFIED;
 
                 cells_with_velocity.push_back(point);
@@ -76,8 +81,7 @@ cv::Mat compute_measurement_grid_image(const dogm::DOGM& grid_map)
         {
             int index = y * grid_map.getGridSize() + x;
 
-            const dogm::MeasurementCell& cell = meas_cells[index];
-            float occ = pignistic_transformation(cell.free_mass, cell.occ_mass);
+            float occ = pignistic_transformation(meas_cells.free_mass[index], meas_cells.occ_mass[index]);
             auto temp = static_cast<uchar>(occ * 255.0f);
 
             row_ptr[x] = cv::Vec3b(255 - temp, 255 - temp, 255 - temp);
@@ -97,9 +101,8 @@ cv::Mat compute_raw_measurement_grid_image(const dogm::DOGM& grid_map)
         for (int x = 0; x < grid_map.getGridSize(); x++)
         {
             int index = y * grid_map.getGridSize() + x;
-            const dogm::MeasurementCell& cell = meas_cells[index];
-            auto red = static_cast<int>(cell.occ_mass * 255.0f);
-            auto green = static_cast<int>(cell.free_mass * 255.0f);
+            auto red = static_cast<int>(meas_cells.occ_mass[index] * 255.0f);
+            auto green = static_cast<int>(meas_cells.free_mass[index] * 255.0f);
             int blue = 255 - red - green;
 
             row_ptr[x] = cv::Vec3b(blue, green, red);
@@ -120,8 +123,7 @@ cv::Mat compute_dogm_image(const dogm::DOGM& grid_map, const std::vector<Point<d
         {
             int index = y * grid_map.getGridSize() + x;
 
-            const dogm::GridCell& cell = grid_cells[index];
-            float occ = pignistic_transformation(cell.free_mass, cell.occ_mass);
+            float occ = pignistic_transformation(grid_cells.free_mass[index], grid_cells.occ_mass[index]);
             uchar grayscale_value = 255 - static_cast<uchar>(floor(occ * 255));
 
             row_ptr[x] = cv::Vec3b(grayscale_value, grayscale_value, grayscale_value);
diff --git a/dogm/include/dogm/cuda_utils.h b/dogm/include/dogm/cuda_utils.h
index 3efdbeb..7143117 100644
--- a/dogm/include/dogm/cuda_utils.h
+++ b/dogm/include/dogm/cuda_utils.h
@@ -10,18 +10,15 @@
 
 #define GPU_LAMBDA [=] __host__ __device__
 
-#define CHECK_ERROR(ans)                                                                                               \
+#ifndef CUDA_CALL
+#define CUDA_CALL(call)                                                                                                \
     {                                                                                                                  \
-        checkError((ans), __FILE__, __LINE__);                                                                         \
+        auto status = static_cast<cudaError_t>(call);                                                                  \
+        if (status != cudaSuccess)                                                                                     \
+            fprintf(stderr, "ERROR: CUDA RT call \"%s\" in line %d of file %s failed with %s (%d).\n", #call,          \
+                    __LINE__, __FILE__, cudaGetErrorString(status), status);                                           \
     }
-
-inline void checkError(cudaError_t code, const char* file, int line)
-{
-    if (code != cudaSuccess)
-    {
-        printf("GPU Kernel Error: %s %s %d\n", cudaGetErrorString(code), file, line);
-    }
-}
+#endif
 
 inline int divUp(int total, int grain)
 {
diff --git a/dogm/include/dogm/dogm.h b/dogm/include/dogm/dogm.h
index 9dc064b..2110889 100644
--- a/dogm/include/dogm/dogm.h
+++ b/dogm/include/dogm/dogm.h
@@ -76,7 +76,7 @@ class DOGM
      * @param dt delta time since the last update.
      * @param device whether the measurement grid resides in GPU memory (default: true).
      */
-    void updateGrid(MeasurementCell* measurement_grid, float new_x, float new_y, float new_yaw, float dt,
+    void updateGrid(MeasurementCellsSoA measurement_grid, float new_x, float new_y, float new_yaw, float dt,
                     bool device = true);
 
     /**
@@ -84,14 +84,14 @@ class DOGM
      *
      * @return grid map.
      */
-    std::vector<GridCell> getGridCells() const;
+    GridCellsSoA getGridCells() const;
 
     /**
      * Returns the measurement grid map in the host memory.
      *
      * @return measurement grid map.
      */
-    std::vector<MeasurementCell> getMeasurementCells() const;
+    MeasurementCellsSoA getMeasurementCells() const;
 
     /**
      * Returns the persistent particles of the particle filter.
@@ -139,7 +139,7 @@ class DOGM
     void initialize();
 
     void updatePose(float new_x, float new_y, float new_yaw);
-    void updateMeasurementGrid(MeasurementCell* measurement_grid, bool device);
+    void updateMeasurementGrid(MeasurementCellsSoA measurement_grid, bool device);
 
 public:
     void initializeParticles();
@@ -155,11 +155,11 @@ class DOGM
 public:
     Params params;
 
-    GridCell* grid_cell_array;
+    GridCellsSoA grid_cell_array;
     ParticlesSoA particle_array;
     ParticlesSoA particle_array_next;
     ParticlesSoA birth_particle_array;
-    MeasurementCell* meas_cell_array;
+    MeasurementCellsSoA meas_cell_array;
 
     float* weight_array;
     float* birth_weight_array;
diff --git a/dogm/include/dogm/dogm_types.h b/dogm/include/dogm/dogm_types.h
index 3c250ba..e856232 100644
--- a/dogm/include/dogm/dogm_types.h
+++ b/dogm/include/dogm/dogm_types.h
@@ -48,6 +48,262 @@ struct Particle
     glm::vec4 state;
 };
 
+struct GridCellsSoA
+{
+    int* start_idx;
+    int* end_idx;
+    float* new_born_occ_mass;
+    float* pers_occ_mass;
+    float* free_mass;
+    float* occ_mass;
+    float* pred_occ_mass;
+    float* mu_A;
+    float* mu_UA;
+
+    float* w_A;
+    float* w_UA;
+
+    float* mean_x_vel;
+    float* mean_y_vel;
+    float* var_x_vel;
+    float* var_y_vel;
+    float* covar_xy_vel;
+
+    int size;
+    bool device;
+
+    GridCellsSoA() : size(0), device(true) {}
+
+    GridCellsSoA(int new_size, bool is_device) { init(new_size, is_device); }
+
+    void init(int new_size, bool is_device)
+    {
+        size = new_size;
+        device = is_device;
+        if (device)
+        {
+            CUDA_CALL(cudaMalloc((void**)&start_idx, size * sizeof(int)));
+            CUDA_CALL(cudaMalloc((void**)&end_idx, size * sizeof(int)));
+            CUDA_CALL(cudaMalloc((void**)&new_born_occ_mass, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&pers_occ_mass, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&free_mass, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&occ_mass, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&pred_occ_mass, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&mu_A, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&mu_UA, size * sizeof(float)));
+
+            CUDA_CALL(cudaMalloc((void**)&w_A, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&w_UA, size * sizeof(float)));
+
+            CUDA_CALL(cudaMalloc((void**)&mean_x_vel, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&mean_y_vel, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&var_x_vel, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&var_y_vel, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&covar_xy_vel, size * sizeof(float)));
+        }
+        else
+        {
+            start_idx = (int*)malloc(size * sizeof(int));
+            end_idx = (int*)malloc(size * sizeof(int));
+            new_born_occ_mass = (float*)malloc(size * sizeof(float));
+            pers_occ_mass = (float*)malloc(size * sizeof(float));
+            free_mass = (float*)malloc(size * sizeof(float));
+            occ_mass = (float*)malloc(size * sizeof(float));
+            pred_occ_mass = (float*)malloc(size * sizeof(float));
+            mu_A = (float*)malloc(size * sizeof(float));
+            mu_UA = (float*)malloc(size * sizeof(float));
+
+            w_A = (float*)malloc(size * sizeof(float));
+            w_UA = (float*)malloc(size * sizeof(float));
+
+            mean_x_vel = (float*)malloc(size * sizeof(float));
+            mean_y_vel = (float*)malloc(size * sizeof(float));
+            var_x_vel = (float*)malloc(size * sizeof(float));
+            var_y_vel = (float*)malloc(size * sizeof(float));
+            covar_xy_vel = (float*)malloc(size * sizeof(float));
+        }
+    }
+
+    void free()
+    {
+        if (device)
+        {
+            CUDA_CALL(cudaFree(start_idx));
+            CUDA_CALL(cudaFree(end_idx));
+            CUDA_CALL(cudaFree(new_born_occ_mass));
+            CUDA_CALL(cudaFree(pers_occ_mass));
+            CUDA_CALL(cudaFree(free_mass));
+            CUDA_CALL(cudaFree(occ_mass));
+            CUDA_CALL(cudaFree(pred_occ_mass));
+            CUDA_CALL(cudaFree(mu_A));
+            CUDA_CALL(cudaFree(mu_UA));
+
+            CUDA_CALL(cudaFree(w_A));
+            CUDA_CALL(cudaFree(w_UA));
+
+            CUDA_CALL(cudaFree(mean_x_vel));
+            CUDA_CALL(cudaFree(mean_y_vel));
+            CUDA_CALL(cudaFree(var_x_vel));
+            CUDA_CALL(cudaFree(var_y_vel));
+            CUDA_CALL(cudaFree(covar_xy_vel));
+        }
+        else
+        {
+            ::free(start_idx);
+            ::free(end_idx);
+            ::free(new_born_occ_mass);
+            ::free(pers_occ_mass);
+            ::free(free_mass);
+            ::free(occ_mass);
+            ::free(pred_occ_mass);
+            ::free(mu_A);
+            ::free(mu_UA);
+
+            ::free(w_A);
+            ::free(w_UA);
+
+            ::free(mean_x_vel);
+            ::free(mean_y_vel);
+            ::free(var_x_vel);
+            ::free(var_y_vel);
+            ::free(covar_xy_vel);
+        }
+    }
+
+    void copy(const GridCellsSoA& other, cudaMemcpyKind kind)
+    {
+        CUDA_CALL(cudaMemcpy(start_idx, other.start_idx, size * sizeof(int), kind));
+        CUDA_CALL(cudaMemcpy(end_idx, other.end_idx, size * sizeof(int), kind));
+        CUDA_CALL(cudaMemcpy(new_born_occ_mass, other.new_born_occ_mass, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(pers_occ_mass, other.pers_occ_mass, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(free_mass, other.free_mass, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(occ_mass, other.occ_mass, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(pred_occ_mass, other.pred_occ_mass, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(mu_A, other.mu_A, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(mu_UA, other.mu_UA, size * sizeof(float), kind));
+
+        CUDA_CALL(cudaMemcpy(w_A, other.w_A, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(w_UA, other.w_UA, size * sizeof(float), kind));
+
+        CUDA_CALL(cudaMemcpy(mean_x_vel, other.mean_x_vel, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(mean_y_vel, other.mean_y_vel, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(var_x_vel, other.var_x_vel, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(var_y_vel, other.var_y_vel, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(covar_xy_vel, other.covar_xy_vel, size * sizeof(float), kind));
+    }
+
+    GridCellsSoA& operator=(const GridCellsSoA& other)
+    {
+        if (this != &other)
+        {
+            copy(other, cudaMemcpyDeviceToDevice);
+        }
+
+        return *this;
+    }
+
+    __device__ void copy(const GridCellsSoA& other, int index, int other_index)
+    {
+        start_idx[index] = other.start_idx[other_index];
+        end_idx[index] = other.end_idx[other_index];
+        new_born_occ_mass[index] = other.new_born_occ_mass[other_index];
+        pers_occ_mass[index] = other.pers_occ_mass[other_index];
+        free_mass[index] = other.free_mass[other_index];
+        occ_mass[index] = other.occ_mass[other_index];
+        pred_occ_mass[index] = other.pred_occ_mass[other_index];
+        mu_A[index] = other.mu_A[other_index];
+        mu_UA[index] = other.mu_UA[other_index];
+
+        w_A[index] = other.w_A[other_index];
+        w_UA[index] = other.w_UA[other_index];
+
+        mean_x_vel[index] = other.mean_x_vel[other_index];
+        mean_y_vel[index] = other.mean_y_vel[other_index];
+        var_x_vel[index] = other.var_x_vel[other_index];
+        var_y_vel[index] = other.var_y_vel[other_index];
+        covar_xy_vel[index] = other.covar_xy_vel[other_index];
+    }
+};
+
+struct MeasurementCellsSoA
+{
+    float* free_mass;
+    float* occ_mass;
+    float* likelihood;
+    float* p_A;
+
+    int size;
+    bool device;
+
+    MeasurementCellsSoA() : size(0), device(true) {}
+
+    MeasurementCellsSoA(int new_size, bool is_device) { init(new_size, is_device); }
+
+    void init(int new_size, bool is_device)
+    {
+        size = new_size;
+        device = is_device;
+        if (device)
+        {
+            CUDA_CALL(cudaMalloc((void**)&free_mass, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&occ_mass, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&likelihood, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&p_A, size * sizeof(float)));
+        }
+        else
+        {
+            free_mass = (float*)malloc(size * sizeof(float));
+            occ_mass = (float*)malloc(size * sizeof(float));
+            likelihood = (float*)malloc(size * sizeof(float));
+            p_A = (float*)malloc(size * sizeof(float));
+        }
+    }
+
+    void free()
+    {
+        if (device)
+        {
+            CUDA_CALL(cudaFree(free_mass));
+            CUDA_CALL(cudaFree(occ_mass));
+            CUDA_CALL(cudaFree(likelihood));
+            CUDA_CALL(cudaFree(p_A));
+        }
+        else
+        {
+            ::free(free_mass);
+            ::free(occ_mass);
+            ::free(likelihood);
+            ::free(p_A);
+        }
+    }
+
+    void copy(const MeasurementCellsSoA& other, cudaMemcpyKind kind)
+    {
+        CUDA_CALL(cudaMemcpy(free_mass, other.free_mass, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(occ_mass, other.occ_mass, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(likelihood, other.likelihood, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(p_A, other.p_A, size * sizeof(float), kind));
+    }
+
+    MeasurementCellsSoA& operator=(const MeasurementCellsSoA& other)
+    {
+        if (this != &other)
+        {
+            copy(other, cudaMemcpyDeviceToDevice);
+        }
+
+        return *this;
+    }
+
+    __device__ void copy(const MeasurementCellsSoA& other, int index, int other_index)
+    {
+        free_mass[index] = other.free_mass[other_index];
+        occ_mass[index] = other.occ_mass[other_index];
+        likelihood[index] = other.likelihood[other_index];
+        p_A[index] = other.p_A[other_index];
+    }
+};
+
 struct ParticlesSoA
 {
     glm::vec4* state;
@@ -68,10 +324,10 @@ struct ParticlesSoA
         device = is_device;
         if (device)
         {
-            CHECK_ERROR(cudaMalloc((void**)&state, size * sizeof(glm::vec4)));
-            CHECK_ERROR(cudaMalloc((void**)&grid_cell_idx, size * sizeof(int)));
-            CHECK_ERROR(cudaMalloc((void**)&weight, size * sizeof(float)));
-            CHECK_ERROR(cudaMalloc((void**)&associated, size * sizeof(bool)));
+            CUDA_CALL(cudaMalloc((void**)&state, size * sizeof(glm::vec4)));
+            CUDA_CALL(cudaMalloc((void**)&grid_cell_idx, size * sizeof(int)));
+            CUDA_CALL(cudaMalloc((void**)&weight, size * sizeof(float)));
+            CUDA_CALL(cudaMalloc((void**)&associated, size * sizeof(bool)));
         }
         else
         {
@@ -86,10 +342,10 @@ struct ParticlesSoA
     {
         if (device)
         {
-            CHECK_ERROR(cudaFree(state));
-            CHECK_ERROR(cudaFree(grid_cell_idx));
-            CHECK_ERROR(cudaFree(weight));
-            CHECK_ERROR(cudaFree(associated));
+            CUDA_CALL(cudaFree(state));
+            CUDA_CALL(cudaFree(grid_cell_idx));
+            CUDA_CALL(cudaFree(weight));
+            CUDA_CALL(cudaFree(associated));
         }
         else
         {
@@ -102,10 +358,10 @@ struct ParticlesSoA
 
     void copy(const ParticlesSoA& other, cudaMemcpyKind kind)
     {
-        CHECK_ERROR(cudaMemcpy(grid_cell_idx, other.grid_cell_idx, size * sizeof(int), kind));
-        CHECK_ERROR(cudaMemcpy(weight, other.weight, size * sizeof(float), kind));
-        CHECK_ERROR(cudaMemcpy(associated, other.associated, size * sizeof(bool), kind));
-        CHECK_ERROR(cudaMemcpy(state, other.state, size * sizeof(glm::vec4), kind));
+        CUDA_CALL(cudaMemcpy(grid_cell_idx, other.grid_cell_idx, size * sizeof(int), kind));
+        CUDA_CALL(cudaMemcpy(weight, other.weight, size * sizeof(float), kind));
+        CUDA_CALL(cudaMemcpy(associated, other.associated, size * sizeof(bool), kind));
+        CUDA_CALL(cudaMemcpy(state, other.state, size * sizeof(glm::vec4), kind));
     }
 
     ParticlesSoA& operator=(const ParticlesSoA& other)
diff --git a/dogm/include/dogm/kernel/ego_motion_compensation.h b/dogm/include/dogm/kernel/ego_motion_compensation.h
index 144b56c..e6999aa 100644
--- a/dogm/include/dogm/kernel/ego_motion_compensation.h
+++ b/dogm/include/dogm/kernel/ego_motion_compensation.h
@@ -10,12 +10,10 @@
 namespace dogm
 {
 
-struct GridCell;
-struct ParticlesSoA;
-
 __global__ void moveParticlesKernel(ParticlesSoA particle_array, int x_move, int y_move, int particle_count);
 
-__global__ void moveMapKernel(GridCell* __restrict__ grid_cell_array, const GridCell* __restrict__ old_grid_cell_array,
-                              int x_move, int y_move, int grid_size);
+__global__ void moveMapKernel(GridCellsSoA grid_cell_array, GridCellsSoA old_grid_cell_array,
+                              MeasurementCellsSoA meas_cell_array, ParticlesSoA particle_array, int x_move, int y_move,
+                              int grid_size);
 
 } /* namespace dogm */
diff --git a/dogm/include/dogm/kernel/init.h b/dogm/include/dogm/kernel/init.h
index b143215..a90558c 100644
--- a/dogm/include/dogm/kernel/init.h
+++ b/dogm/include/dogm/kernel/init.h
@@ -22,9 +22,9 @@ __global__ void initParticlesKernel(ParticlesSoA particle_array, curandState* __
 __global__ void initBirthParticlesKernel(ParticlesSoA birth_particle_array, curandState* __restrict__ global_state,
                                          float velocity, int grid_size, int particle_count);
 
-__global__ void initGridCellsKernel(GridCell* __restrict__ grid_cell_array,
-                                    MeasurementCell* __restrict__ meas_cell_array, int grid_size, int cell_count);
+__global__ void initGridCellsKernel(GridCellsSoA grid_cell_array, MeasurementCellsSoA meas_cell_array, int grid_size,
+                                    int cell_count);
 
-__global__ void reinitGridParticleIndices(GridCell* __restrict__ grid_cell_array, int cell_count);
+__global__ void reinitGridParticleIndices(GridCellsSoA grid_cell_array, int cell_count);
 
 } /* namespace dogm */
diff --git a/dogm/include/dogm/kernel/init_new_particles.h b/dogm/include/dogm/kernel/init_new_particles.h
index cecfafc..cfa049c 100644
--- a/dogm/include/dogm/kernel/init_new_particles.h
+++ b/dogm/include/dogm/kernel/init_new_particles.h
@@ -16,24 +16,20 @@ struct Particle;
 
 void normalize_particle_orders(float* particle_orders_array_accum, int particle_orders_count, int v_B);
 
-__global__ void copyMassesKernel(const MeasurementCell* __restrict__ meas_cell_array, float* __restrict__ masses,
-                                 int cell_count);
+__global__ void copyMassesKernel(const MeasurementCellsSoA meas_cell_array, float* __restrict__ masses, int cell_count);
 
-__global__ void initParticlesKernel1(GridCell* __restrict__ grid_cell_array,
-                                     const MeasurementCell* __restrict__ meas_cell_array, ParticlesSoA particle_array,
-                                     const float* __restrict__ particle_orders_array_accum, int cell_count);
+__global__ void initParticlesKernel1(ParticlesSoA particle_array, const float* __restrict__ particle_orders_array_accum,
+                                     int cell_count);
 
-__global__ void initParticlesKernel2(ParticlesSoA particle_array, const GridCell* __restrict__ grid_cell_array,
-                                     curandState* __restrict__ global_state, float velocity, int grid_size,
-                                     float new_weight, int particle_count);
+__global__ void initParticlesKernel2(ParticlesSoA particle_array, curandState* __restrict__ global_state,
+                                     float velocity, int grid_size, float new_weight, int particle_count);
 
-__global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array,
-                                        const MeasurementCell* __restrict__ meas_cell_array,
+__global__ void initNewParticlesKernel1(GridCellsSoA grid_cell_array, const MeasurementCellsSoA meas_cell_array,
                                         const float* __restrict__ weight_array,
                                         const float* __restrict__ born_masses_array, ParticlesSoA birth_particle_array,
                                         const float* __restrict__ particle_orders_array_accum, int cell_count);
 
-__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCell* __restrict__ grid_cell_array,
+__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCellsSoA grid_cell_array,
                                         curandState* __restrict__ global_state, float stddev_velocity,
                                         float max_velocity, int grid_size, int particle_count);
 
diff --git a/dogm/include/dogm/kernel/mass_update.h b/dogm/include/dogm/kernel/mass_update.h
index 67f16f9..d45546e 100644
--- a/dogm/include/dogm/kernel/mass_update.h
+++ b/dogm/include/dogm/kernel/mass_update.h
@@ -9,14 +9,10 @@
 namespace dogm
 {
 
-struct GridCell;
-struct MeasurementCell;
-struct Particle;
-
-__global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_array, ParticlesSoA particle_array,
+__global__ void gridCellPredictionUpdateKernel(GridCellsSoA grid_cell_array, ParticlesSoA particle_array,
                                                float* __restrict__ weight_array,
                                                const float* __restrict__ weight_array_accum,
-                                               const MeasurementCell* __restrict__ meas_cell_array,
+                                               const MeasurementCellsSoA meas_cell_array,
                                                float* __restrict__ born_masses_array, float p_B, int cell_count);
 
 } /* namespace dogm */
diff --git a/dogm/include/dogm/kernel/particle_to_grid.h b/dogm/include/dogm/kernel/particle_to_grid.h
index ef03861..26ccdc5 100644
--- a/dogm/include/dogm/kernel/particle_to_grid.h
+++ b/dogm/include/dogm/kernel/particle_to_grid.h
@@ -9,10 +9,7 @@
 namespace dogm
 {
 
-struct GridCell;
-struct Particle;
-
-__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCell* __restrict__ grid_cell_array,
+__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCellsSoA grid_cell_array,
                                      float* __restrict__ weight_array, int particle_count);
 
 } /* namespace dogm */
diff --git a/dogm/include/dogm/kernel/statistical_moments.h b/dogm/include/dogm/kernel/statistical_moments.h
index 8f715a1..31b62a3 100644
--- a/dogm/include/dogm/kernel/statistical_moments.h
+++ b/dogm/include/dogm/kernel/statistical_moments.h
@@ -9,17 +9,13 @@
 namespace dogm
 {
 
-struct GridCell;
-struct Particle;
-
 __global__ void statisticalMomentsKernel1(const ParticlesSoA particle_array, const float* __restrict__ weight_array,
                                           float* __restrict__ vel_x_array, float* __restrict__ vel_y_array,
                                           float* __restrict__ vel_x_squared_array,
                                           float* __restrict__ vel_y_squared_array, float* __restrict__ vel_xy_array,
                                           int particle_count);
 
-__global__ void statisticalMomentsKernel2(GridCell* __restrict__ grid_cell_array,
-                                          const float* __restrict__ vel_x_array_accum,
+__global__ void statisticalMomentsKernel2(GridCellsSoA grid_cell_array, const float* __restrict__ vel_x_array_accum,
                                           const float* __restrict__ vel_y_array_accum,
                                           const float* __restrict__ vel_x_squared_array_accum,
                                           const float* __restrict__ vel_y_squared_array_accum,
diff --git a/dogm/include/dogm/kernel/update_persistent_particles.h b/dogm/include/dogm/kernel/update_persistent_particles.h
index 9b309fa..44f5c09 100644
--- a/dogm/include/dogm/kernel/update_persistent_particles.h
+++ b/dogm/include/dogm/kernel/update_persistent_particles.h
@@ -9,20 +9,16 @@
 namespace dogm
 {
 
-struct GridCell;
-struct MeasurementCell;
-struct Particle;
-
 __global__ void updatePersistentParticlesKernel1(const ParticlesSoA particle_array,
-                                                 const MeasurementCell* __restrict__ meas_cell_array,
+                                                 const MeasurementCellsSoA meas_cell_array,
                                                  float* __restrict__ weight_array, int particle_count);
 
-__global__ void updatePersistentParticlesKernel2(GridCell* __restrict__ grid_cell_array,
+__global__ void updatePersistentParticlesKernel2(GridCellsSoA grid_cell_array,
                                                  const float* __restrict__ weight_array_accum, int cell_count);
 
 __global__ void updatePersistentParticlesKernel3(const ParticlesSoA particle_array,
-                                                 const MeasurementCell* __restrict__ meas_cell_array,
-                                                 const GridCell* __restrict__ grid_cell_array,
-                                                 float* __restrict__ weight_array, int particle_count);
+                                                 const MeasurementCellsSoA meas_cell_array,
+                                                 const GridCellsSoA grid_cell_array, float* __restrict__ weight_array,
+                                                 int particle_count);
 
 } /* namespace dogm */
diff --git a/dogm/src/dogm.cu b/dogm/src/dogm.cu
index fd63d40..e399a8f 100644
--- a/dogm/src/dogm.cu
+++ b/dogm/src/dogm.cu
@@ -36,10 +36,10 @@ DOGM::DOGM(const Params& params)
       first_measurement_received(false), position_x(0.0f), position_y(0.0f)
 {
     int device;
-    CHECK_ERROR(cudaGetDevice(&device));
+    CUDA_CALL(cudaGetDevice(&device));
 
     cudaDeviceProp device_prop;
-    CHECK_ERROR(cudaGetDeviceProperties(&device_prop, device));
+    CUDA_CALL(cudaGetDeviceProperties(&device_prop, device));
 
     int blocks_per_sm = device_prop.maxThreadsPerMultiProcessor / block_dim.x;
     dim3 dim(device_prop.multiProcessorCount * blocks_per_sm);
@@ -49,22 +49,22 @@ DOGM::DOGM(const Params& params)
     particle_array_next.init(particle_count, true);
     birth_particle_array.init(new_born_particle_count, true);
 
-    CHECK_ERROR(cudaMalloc(&grid_cell_array, grid_cell_count * sizeof(GridCell)));
-    CHECK_ERROR(cudaMalloc(&meas_cell_array, grid_cell_count * sizeof(MeasurementCell)));
+    grid_cell_array.init(grid_cell_count, true);
+    meas_cell_array.init(grid_cell_count, true);
 
-    CHECK_ERROR(cudaMalloc(&weight_array, particle_count * sizeof(float)));
-    CHECK_ERROR(cudaMalloc(&birth_weight_array, new_born_particle_count * sizeof(float)));
-    CHECK_ERROR(cudaMalloc(&born_masses_array, grid_cell_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&weight_array, particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&birth_weight_array, new_born_particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&born_masses_array, grid_cell_count * sizeof(float)));
 
-    CHECK_ERROR(cudaMalloc(&vel_x_array, particle_count * sizeof(float)));
-    CHECK_ERROR(cudaMalloc(&vel_y_array, particle_count * sizeof(float)));
-    CHECK_ERROR(cudaMalloc(&vel_x_squared_array, particle_count * sizeof(float)));
-    CHECK_ERROR(cudaMalloc(&vel_y_squared_array, particle_count * sizeof(float)));
-    CHECK_ERROR(cudaMalloc(&vel_xy_array, particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&vel_x_array, particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&vel_y_array, particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&vel_x_squared_array, particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&vel_y_squared_array, particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&vel_xy_array, particle_count * sizeof(float)));
 
-    CHECK_ERROR(cudaMalloc(&rand_array, particle_count * sizeof(float)));
+    CUDA_CALL(cudaMalloc(&rand_array, particle_count * sizeof(float)));
 
-    CHECK_ERROR(cudaMalloc(&rng_states, particles_grid.x * block_dim.x * sizeof(curandState)));
+    CUDA_CALL(cudaMalloc(&rng_states, particles_grid.x * block_dim.x * sizeof(curandState)));
 
     initialize();
 }
@@ -75,43 +75,44 @@ DOGM::~DOGM()
     particle_array_next.free();
     birth_particle_array.free();
 
-    CHECK_ERROR(cudaFree(grid_cell_array));
-    CHECK_ERROR(cudaFree(meas_cell_array));
+    grid_cell_array.free();
+    meas_cell_array.free();
 
-    CHECK_ERROR(cudaFree(weight_array));
-    CHECK_ERROR(cudaFree(birth_weight_array));
-    CHECK_ERROR(cudaFree(born_masses_array));
+    CUDA_CALL(cudaFree(weight_array));
+    CUDA_CALL(cudaFree(birth_weight_array));
+    CUDA_CALL(cudaFree(born_masses_array));
 
-    CHECK_ERROR(cudaFree(vel_x_array));
-    CHECK_ERROR(cudaFree(vel_y_array));
-    CHECK_ERROR(cudaFree(vel_x_squared_array));
-    CHECK_ERROR(cudaFree(vel_y_squared_array));
-    CHECK_ERROR(cudaFree(vel_xy_array));
+    CUDA_CALL(cudaFree(vel_x_array));
+    CUDA_CALL(cudaFree(vel_y_array));
+    CUDA_CALL(cudaFree(vel_x_squared_array));
+    CUDA_CALL(cudaFree(vel_y_squared_array));
+    CUDA_CALL(cudaFree(vel_xy_array));
 
-    CHECK_ERROR(cudaFree(rng_states));
+    CUDA_CALL(cudaFree(rand_array));
+
+    CUDA_CALL(cudaFree(rng_states));
 }
 
 void DOGM::initialize()
 {
     cudaStream_t particles_stream, grid_stream;
-    CHECK_ERROR(cudaStreamCreate(&particles_stream));
-    CHECK_ERROR(cudaStreamCreate(&grid_stream));
+    CUDA_CALL(cudaStreamCreate(&particles_stream));
+    CUDA_CALL(cudaStreamCreate(&grid_stream));
 
     setupRandomStatesKernel<<<particles_grid, block_dim>>>(rng_states, 123456, particles_grid.x * block_dim.x);
 
-    CHECK_ERROR(cudaGetLastError());
-    CHECK_ERROR(cudaDeviceSynchronize());
-
+    CUDA_CALL(cudaGetLastError());
+    CUDA_CALL(cudaDeviceSynchronize());
     initGridCellsKernel<<<grid_map_grid, block_dim, 0, grid_stream>>>(grid_cell_array, meas_cell_array, grid_size,
                                                                       grid_cell_count);
+    CUDA_CALL(cudaGetLastError());
 
-    CHECK_ERROR(cudaGetLastError());
-
-    CHECK_ERROR(cudaStreamDestroy(particles_stream));
-    CHECK_ERROR(cudaStreamDestroy(grid_stream));
+    CUDA_CALL(cudaStreamDestroy(particles_stream));
+    CUDA_CALL(cudaStreamDestroy(grid_stream));
 }
 
-void DOGM::updateGrid(MeasurementCell* measurement_grid, float new_x, float new_y, float new_yaw, float dt, bool device)
+void DOGM::updateGrid(MeasurementCellsSoA measurement_grid, float new_x, float new_y, float new_yaw, float dt,
+                      bool device)
 {
     updateMeasurementGrid(measurement_grid, device);
     updatePose(new_x, new_y, new_yaw);
@@ -126,25 +127,21 @@ void DOGM::updateGrid(MeasurementCell* measurement_grid, float new_x, float new_
 
     particle_array = particle_array_next;
 
-    CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaDeviceSynchronize());
 }
 
-std::vector<GridCell> DOGM::getGridCells() const
+GridCellsSoA DOGM::getGridCells() const
 {
-    std::vector<GridCell> grid_cells(static_cast<std::vector<GridCell>::size_type>(grid_cell_count));
-
-    CHECK_ERROR(
-        cudaMemcpy(grid_cells.data(), grid_cell_array, grid_cell_count * sizeof(GridCell), cudaMemcpyDeviceToHost));
+    GridCellsSoA grid_cells(grid_cell_count, false);
+    grid_cells.copy(grid_cell_array, cudaMemcpyDeviceToHost);
 
     return grid_cells;
 }
 
-std::vector<MeasurementCell> DOGM::getMeasurementCells() const
+MeasurementCellsSoA DOGM::getMeasurementCells() const
 {
-    std::vector<MeasurementCell> meas_cells(static_cast<std::vector<GridCell>::size_type>(grid_cell_count));
-
-    CHECK_ERROR(cudaMemcpy(meas_cells.data(), meas_cell_array, grid_cell_count * sizeof(MeasurementCell),
-                           cudaMemcpyDeviceToHost));
+    MeasurementCellsSoA meas_cells(grid_cell_count, false);
+    meas_cells.copy(meas_cell_array, cudaMemcpyDeviceToHost);
 
     return meas_cells;
 }
@@ -176,23 +173,18 @@ void DOGM::updatePose(float new_x, float new_y, float new_yaw)
             const int x_move = -static_cast<int>(x_diff / params.resolution);
             const int y_move = -static_cast<int>(y_diff / params.resolution);
 
-            GridCell* old_grid_cell_array;
-            CHECK_ERROR(cudaMalloc(&old_grid_cell_array, grid_cell_count * sizeof(GridCell)));
-
-            CHECK_ERROR(cudaMemcpy(old_grid_cell_array, grid_cell_array, grid_cell_count * sizeof(GridCell),
-                                   cudaMemcpyDeviceToDevice));
-            CHECK_ERROR(cudaMemset(grid_cell_array, 0, grid_cell_count * sizeof(GridCell)));
+            moveParticlesKernel<<<particles_grid, block_dim>>>(particle_array, x_move, y_move, particle_count);
 
             dim3 dim_block(32, 32);
             dim3 grid_dim(divUp(grid_size, dim_block.x), divUp(grid_size, dim_block.y));
 
-            moveParticlesKernel<<<particles_grid, block_dim>>>(particle_array, x_move, y_move, particle_count);
-            CHECK_ERROR(cudaGetLastError());
+            GridCellsSoA old_grid_cell_array(grid_cell_count, true);
+            old_grid_cell_array.copy(grid_cell_array, cudaMemcpyDeviceToDevice);
 
-            moveMapKernel<<<grid_dim, dim_block>>>(grid_cell_array, old_grid_cell_array, x_move, y_move, grid_size);
-            CHECK_ERROR(cudaGetLastError());
+            moveMapKernel<<<grid_dim, dim_block>>>(grid_cell_array, old_grid_cell_array, meas_cell_array,
+                                                   particle_array, x_move, y_move, grid_size);
 
-            CHECK_ERROR(cudaFree(old_grid_cell_array));
+            old_grid_cell_array.free();
 
             position_x = new_x;
             position_y = new_y;
@@ -201,10 +193,10 @@ void DOGM::updatePose(float new_x, float new_y, float new_yaw)
     }
 }
 
-void DOGM::updateMeasurementGrid(MeasurementCell* measurement_grid, bool device)
+void DOGM::updateMeasurementGrid(MeasurementCellsSoA measurement_grid, bool device)
 {
     cudaMemcpyKind kind = device ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice;
-    CHECK_ERROR(cudaMemcpy(meas_cell_array, measurement_grid, grid_cell_count * sizeof(MeasurementCell), kind));
+    meas_cell_array.copy(measurement_grid, kind);
 
     if (!first_measurement_received)
     {
@@ -217,8 +209,8 @@ void DOGM::initializeParticles()
 {
     copyMassesKernel<<<grid_map_grid, block_dim>>>(meas_cell_array, born_masses_array, grid_cell_count);
 
-    CHECK_ERROR(cudaGetLastError());
-    CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaGetLastError());
+    CUDA_CALL(cudaDeviceSynchronize());
 
     thrust::device_vector<float> particle_orders_accum(grid_cell_count);
     accumulate(born_masses_array, particle_orders_accum);
@@ -228,15 +220,14 @@ void DOGM::initializeParticles()
 
     normalize_particle_orders(particle_orders_array_accum, grid_cell_count, particle_count);
 
-    initParticlesKernel1<<<grid_map_grid, block_dim>>>(grid_cell_array, meas_cell_array, particle_array,
-                                                       particle_orders_array_accum, grid_cell_count);
+    initParticlesKernel1<<<grid_map_grid, block_dim>>>(particle_array, particle_orders_array_accum, grid_cell_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 
-    initParticlesKernel2<<<particles_grid, block_dim>>>(
-        particle_array, grid_cell_array, rng_states, params.init_max_velocity, grid_size, new_weight, particle_count);
+    initParticlesKernel2<<<particles_grid, block_dim>>>(particle_array, rng_states, params.init_max_velocity, grid_size,
+                                                        new_weight, particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 void DOGM::particlePrediction(float dt)
@@ -255,15 +246,15 @@ void DOGM::particlePrediction(float dt)
         particle_array, rng_states, params.stddev_velocity, grid_size, params.persistence_prob, transition_matrix,
         params.stddev_process_noise_position, params.stddev_process_noise_velocity, particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 void DOGM::particleAssignment()
 {
     reinitGridParticleIndices<<<grid_map_grid, block_dim>>>(grid_cell_array, grid_cell_count);
 
-    CHECK_ERROR(cudaGetLastError());
-    // CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaGetLastError());
+    // CUDA_CALL(cudaDeviceSynchronize());
 
     // sort particles
     thrust::device_ptr<int> grid_index_ptr(particle_array.grid_cell_idx);
@@ -276,12 +267,12 @@ void DOGM::particleAssignment()
 
     particleToGridKernel<<<particles_grid, block_dim>>>(particle_array, grid_cell_array, weight_array, particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 void DOGM::gridCellOccupancyUpdate()
 {
-    // CHECK_ERROR(cudaDeviceSynchronize());
+    // CUDA_CALL(cudaDeviceSynchronize());
 
     thrust::device_vector<float> weights_accum(particle_count);
     accumulate(weight_array, weights_accum);
@@ -291,7 +282,7 @@ void DOGM::gridCellOccupancyUpdate()
                                                                  weight_array_accum, meas_cell_array, born_masses_array,
                                                                  params.birth_prob, grid_cell_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 void DOGM::updatePersistentParticles()
@@ -299,8 +290,8 @@ void DOGM::updatePersistentParticles()
     updatePersistentParticlesKernel1<<<particles_grid, block_dim>>>(particle_array, meas_cell_array, weight_array,
                                                                     particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
-    // CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaGetLastError());
+    // CUDA_CALL(cudaDeviceSynchronize());
 
     thrust::device_vector<float> weights_accum(particle_count);
     accumulate(weight_array, weights_accum);
@@ -309,12 +300,12 @@ void DOGM::updatePersistentParticles()
     updatePersistentParticlesKernel2<<<divUp(grid_cell_count, BLOCK_SIZE), BLOCK_SIZE>>>(
         grid_cell_array, weight_array_accum, grid_cell_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 
     updatePersistentParticlesKernel3<<<particles_grid, block_dim>>>(particle_array, meas_cell_array, grid_cell_array,
                                                                     weight_array, particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 void DOGM::initializeNewParticles()
@@ -322,8 +313,8 @@ void DOGM::initializeNewParticles()
     initBirthParticlesKernel<<<birth_particles_grid, block_dim>>>(
         birth_particle_array, rng_states, params.stddev_velocity, grid_size, new_born_particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
-    // CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaGetLastError());
+    // CUDA_CALL(cudaDeviceSynchronize());
 
     thrust::device_vector<float> particle_orders_accum(grid_cell_count);
     accumulate(born_masses_array, particle_orders_accum);
@@ -335,13 +326,13 @@ void DOGM::initializeNewParticles()
                                                           born_masses_array, birth_particle_array,
                                                           particle_orders_array_accum, grid_cell_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 
     initNewParticlesKernel2<<<birth_particles_grid, block_dim>>>(birth_particle_array, grid_cell_array, rng_states,
                                                                  params.stddev_velocity, params.init_max_velocity,
                                                                  grid_size, new_born_particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 void DOGM::statisticalMoments()
@@ -350,8 +341,8 @@ void DOGM::statisticalMoments()
                                                              vel_x_squared_array, vel_y_squared_array, vel_xy_array,
                                                              particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
-    // CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaGetLastError());
+    // CUDA_CALL(cudaDeviceSynchronize());
 
     thrust::device_vector<float> vel_x_accum(particle_count);
     accumulate(vel_x_array, vel_x_accum);
@@ -377,12 +368,12 @@ void DOGM::statisticalMoments()
                                                             vel_x_squared_array_accum, vel_y_squared_array_accum,
                                                             vel_xy_array_accum, grid_cell_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 void DOGM::resampling()
 {
-    // CHECK_ERROR(cudaDeviceSynchronize());
+    // CUDA_CALL(cudaDeviceSynchronize());
 
     thrust::device_ptr<float> persistent_weights(weight_array);
     thrust::device_ptr<float> new_born_weights(birth_particle_array.weight);
@@ -399,8 +390,8 @@ void DOGM::resampling()
     resamplingGenerateRandomNumbersKernel<<<particles_grid, block_dim>>>(rand_array, rng_states, joint_max,
                                                                          particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
-    // CHECK_ERROR(cudaDeviceSynchronize());
+    CUDA_CALL(cudaGetLastError());
+    // CUDA_CALL(cudaDeviceSynchronize());
 
     thrust::device_ptr<float> rand_ptr(rand_array);
     thrust::device_vector<float> rand_vector(rand_ptr, rand_ptr + particle_count);
@@ -416,7 +407,7 @@ void DOGM::resampling()
     resamplingKernel<<<particles_grid, block_dim>>>(particle_array, particle_array_next, birth_particle_array,
                                                     idx_array_resampled, new_weight, particle_count);
 
-    CHECK_ERROR(cudaGetLastError());
+    CUDA_CALL(cudaGetLastError());
 }
 
 } /* namespace dogm */
diff --git a/dogm/src/kernel/ego_motion_compensation.cu b/dogm/src/kernel/ego_motion_compensation.cu
index 5f833fe..847680f 100644
--- a/dogm/src/kernel/ego_motion_compensation.cu
+++ b/dogm/src/kernel/ego_motion_compensation.cu
@@ -22,8 +22,9 @@ __global__ void moveParticlesKernel(ParticlesSoA particle_array, int x_move, int
     }
 }
 
-__global__ void moveMapKernel(GridCell* __restrict__ grid_cell_array, const GridCell* __restrict__ old_grid_cell_array,
-                              int x_move, int y_move, int grid_size)
+__global__ void moveMapKernel(GridCellsSoA grid_cell_array, GridCellsSoA old_grid_cell_array,
+                              MeasurementCellsSoA meas_cell_array, ParticlesSoA particle_array, int x_move, int y_move,
+                              int grid_size)
 {
     const int x = blockIdx.x * blockDim.x + threadIdx.x;
     const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -37,7 +38,7 @@ __global__ void moveMapKernel(GridCell* __restrict__ grid_cell_array, const Grid
 
         if (new_x > 0 && new_x < grid_size && new_y > 0 && new_y < grid_size)
         {
-            grid_cell_array[index] = old_grid_cell_array[new_index];
+            grid_cell_array.copy(old_grid_cell_array, index, new_index);
         }
     }
 }
diff --git a/dogm/src/kernel/init.cu b/dogm/src/kernel/init.cu
index df4bdec..7b8db97 100644
--- a/dogm/src/kernel/init.cu
+++ b/dogm/src/kernel/init.cu
@@ -66,29 +66,29 @@ __global__ void initBirthParticlesKernel(ParticlesSoA birth_particle_array, cura
     // global_state[thread_id] = local_state;
 }
 
-__global__ void initGridCellsKernel(GridCell* __restrict__ grid_cell_array,
-                                    MeasurementCell* __restrict__ meas_cell_array, int grid_size, int cell_count)
+__global__ void initGridCellsKernel(GridCellsSoA grid_cell_array, MeasurementCellsSoA meas_cell_array, int grid_size,
+                                    int cell_count)
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x)
     {
-        grid_cell_array[i].free_mass = 0.0f;
-        grid_cell_array[i].occ_mass = 0.0f;
-        grid_cell_array[i].start_idx = -1;
-        grid_cell_array[i].end_idx = -1;
-
-        meas_cell_array[i].occ_mass = 0.0f;
-        meas_cell_array[i].free_mass = 0.0f;
-        meas_cell_array[i].likelihood = 1.0f;
-        meas_cell_array[i].p_A = 1.0f;
+        grid_cell_array.free_mass[i] = 0.0f;
+        grid_cell_array.occ_mass[i] = 0.0f;
+        grid_cell_array.start_idx[i] = -1;
+        grid_cell_array.end_idx[i] = -1;
+
+        meas_cell_array.occ_mass[i] = 0.0f;
+        meas_cell_array.free_mass[i] = 0.0f;
+        meas_cell_array.likelihood[i] = 1.0f;
+        meas_cell_array.p_A[i] = 1.0f;
     }
 }
 
-__global__ void reinitGridParticleIndices(GridCell* __restrict__ grid_cell_array, int cell_count)
+__global__ void reinitGridParticleIndices(GridCellsSoA grid_cell_array, int cell_count)
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x)
     {
-        grid_cell_array[i].start_idx = -1;
-        grid_cell_array[i].end_idx = -1;
+        grid_cell_array.start_idx[i] = -1;
+        grid_cell_array.end_idx[i] = -1;
     }
 }
 
diff --git a/dogm/src/kernel/init_new_particles.cu b/dogm/src/kernel/init_new_particles.cu
index f5328b8..ea0b13b 100644
--- a/dogm/src/kernel/init_new_particles.cu
+++ b/dogm/src/kernel/init_new_particles.cu
@@ -57,10 +57,10 @@ __device__ float calc_weight_unassoc(int nu_UA, float p_A, float born_mass)
     return nu_UA > 0 ? ((1.0f - p_A) * born_mass) / nu_UA : 0.0f;
 }
 
-__device__ void store_weights(float w_A, float w_UA, GridCell* __restrict__ grid_cell_array, int j)
+__device__ void store_weights(float w_A, float w_UA, GridCellsSoA grid_cell_array, int j)
 {
-    grid_cell_array[j].w_A = w_A;
-    grid_cell_array[j].w_UA = w_UA;
+    grid_cell_array.w_A[j] = w_A;
+    grid_cell_array.w_UA[j] = w_UA;
 }
 
 void normalize_particle_orders(float* particle_orders_array_accum, int particle_orders_count, int v_B)
@@ -73,18 +73,16 @@ void normalize_particle_orders(float* particle_orders_array_accum, int particle_
                       GPU_LAMBDA(float x) { return x * (v_B / max); });
 }
 
-__global__ void copyMassesKernel(const MeasurementCell* __restrict__ meas_cell_array, float* __restrict__ masses,
-                                 int cell_count)
+__global__ void copyMassesKernel(const MeasurementCellsSoA meas_cell_array, float* __restrict__ masses, int cell_count)
 {
     for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < cell_count; j += blockDim.x * gridDim.x)
     {
-        masses[j] = meas_cell_array[j].occ_mass;
+        masses[j] = meas_cell_array.occ_mass[j];
     }
 }
 
-__global__ void initParticlesKernel1(GridCell* __restrict__ grid_cell_array,
-                                     const MeasurementCell* __restrict__ meas_cell_array, ParticlesSoA particle_array,
-                                     const float* __restrict__ particle_orders_array_accum, int cell_count)
+__global__ void initParticlesKernel1(ParticlesSoA particle_array, const float* __restrict__ particle_orders_array_accum,
+                                     int cell_count)
 {
     for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < cell_count; j += blockDim.x * gridDim.x)
     {
@@ -98,9 +96,8 @@ __global__ void initParticlesKernel1(GridCell* __restrict__ grid_cell_array,
     }
 }
 
-__global__ void initParticlesKernel2(ParticlesSoA particle_array, const GridCell* __restrict__ grid_cell_array,
-                                     curandState* __restrict__ global_state, float velocity, int grid_size,
-                                     float new_weight, int particle_count)
+__global__ void initParticlesKernel2(ParticlesSoA particle_array, curandState* __restrict__ global_state,
+                                     float velocity, int grid_size, float new_weight, int particle_count)
 {
     int thread_id = blockIdx.x * blockDim.x + threadIdx.x;
     int stride = blockDim.x * gridDim.x;
@@ -123,8 +120,7 @@ __global__ void initParticlesKernel2(ParticlesSoA particle_array, const GridCell
     global_state[thread_id] = local_state;
 }
 
-__global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array,
-                                        const MeasurementCell* __restrict__ meas_cell_array,
+__global__ void initNewParticlesKernel1(GridCellsSoA grid_cell_array, const MeasurementCellsSoA meas_cell_array,
                                         const float* __restrict__ weight_array,
                                         const float* __restrict__ born_masses_array, ParticlesSoA birth_particle_array,
                                         const float* __restrict__ particle_orders_array_accum, int cell_count)
@@ -135,7 +131,7 @@ __global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array,
         int end_idx = calc_end_idx(particle_orders_array_accum, j);
 
         int num_new_particles = start_idx <= end_idx ? end_idx - start_idx + 1 : 0;
-        float p_A = meas_cell_array[j].p_A;
+        float p_A = meas_cell_array.p_A[j];
         int nu_A = calc_num_assoc(num_new_particles, p_A);
         int nu_UA = num_new_particles - nu_A;
         float w_A = calc_weight_assoc(nu_A, p_A, born_masses_array[j]);
@@ -154,7 +150,7 @@ __global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array,
     }
 }
 
-__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCell* __restrict__ grid_cell_array,
+__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCellsSoA grid_cell_array,
                                         curandState* __restrict__ global_state, float stddev_velocity,
                                         float max_velocity, int grid_size, int particle_count)
 {
@@ -166,7 +162,6 @@ __global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const
     for (int i = thread_id; i < particle_count; i += stride)
     {
         int cell_idx = birth_particle_array.grid_cell_idx[i];
-        const GridCell& grid_cell = grid_cell_array[cell_idx];
         bool associated = birth_particle_array.associated[i];
 
         float x = cell_idx % grid_size + 0.5f;
@@ -178,7 +173,7 @@ __global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const
             float vel_x = curand_normal(&local_state, 0.0f, stddev_velocity);
             float vel_y = curand_normal(&local_state, 0.0f, stddev_velocity);
 
-            birth_particle_array.weight[i] = grid_cell.w_A;
+            birth_particle_array.weight[i] = grid_cell_array.w_A[cell_idx];
             birth_particle_array.state[i] = glm::vec4(x, y, vel_x, vel_y);
         }
         else
@@ -186,7 +181,7 @@ __global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const
             float vel_x = curand_normal(&local_state, 0.0f, stddev_velocity);
             float vel_y = curand_normal(&local_state, 0.0f, stddev_velocity);
 
-            birth_particle_array.weight[i] = grid_cell.w_UA;
+            birth_particle_array.weight[i] = grid_cell_array.w_UA[cell_idx];
             birth_particle_array.state[i] = glm::vec4(x, y, vel_x, vel_y);
         }
     }
diff --git a/dogm/src/kernel/mass_update.cu b/dogm/src/kernel/mass_update.cu
index 57c46a5..3af1b54 100644
--- a/dogm/src/kernel/mass_update.cu
+++ b/dogm/src/kernel/mass_update.cu
@@ -13,22 +13,23 @@
 namespace dogm
 {
 
-__device__ float predict_free_mass(const GridCell& grid_cell, float m_occ_pred, float alpha = 0.9)
+__device__ float predict_free_mass(float grid_cell_free_mass, float m_occ_pred, float alpha = 0.9)
 {
-    return min(alpha * grid_cell.free_mass, 1.0f - m_occ_pred);
+    return min(alpha * grid_cell_free_mass, 1.0f - m_occ_pred);
 }
 
-__device__ float2 update_masses(float m_occ_pred, float m_free_pred, const MeasurementCell& meas_cell)
+__device__ float2 update_masses(float m_occ_pred, float m_free_pred, const MeasurementCellsSoA meas_cells, int meas_idx)
 {
     float unknown_pred = 1.0f - m_occ_pred - m_free_pred;
-    float meas_unknown = 1.0f - meas_cell.free_mass - meas_cell.occ_mass;
-    float K = m_free_pred * meas_cell.occ_mass + m_occ_pred * meas_cell.free_mass;
+    float meas_unknown = 1.0f - meas_cells.free_mass[meas_idx] - meas_cells.occ_mass[meas_idx];
+    float K = m_free_pred * meas_cells.occ_mass[meas_idx] + m_occ_pred * meas_cells.free_mass[meas_idx];
 
-    float occ_mass =
-        (m_occ_pred * meas_unknown + unknown_pred * meas_cell.occ_mass + m_occ_pred * meas_cell.occ_mass) / (1.0f - K);
-    float free_mass =
-        (m_free_pred * meas_unknown + unknown_pred * meas_cell.free_mass + m_free_pred * meas_cell.free_mass) /
-        (1.0f - K);
+    float occ_mass = (m_occ_pred * meas_unknown + unknown_pred * meas_cells.occ_mass[meas_idx] +
+                      m_occ_pred * meas_cells.occ_mass[meas_idx]) /
+                     (1.0f - K);
+    float free_mass = (m_free_pred * meas_unknown + unknown_pred * meas_cells.free_mass[meas_idx] +
+                       m_free_pred * meas_cells.free_mass[meas_idx]) /
+                      (1.0f - K);
 
     return make_float2(occ_mass, free_mass);
 }
@@ -39,13 +40,13 @@ __device__ float separate_newborn_part(float m_occ_pred, float m_occ_up, float p
 }
 
 __device__ void store_values(float rho_b, float rho_p, float m_free_up, float m_occ_up, float m_occ_pred,
-                             GridCell* __restrict__ grid_cell_array, int i)
+                             GridCellsSoA grid_cell_array, int i)
 {
-    grid_cell_array[i].pers_occ_mass = rho_p;
-    grid_cell_array[i].new_born_occ_mass = rho_b;
-    grid_cell_array[i].free_mass = m_free_up;
-    grid_cell_array[i].occ_mass = m_occ_up;
-    grid_cell_array[i].pred_occ_mass = m_occ_pred;
+    grid_cell_array.pers_occ_mass[i] = rho_p;
+    grid_cell_array.new_born_occ_mass[i] = rho_b;
+    grid_cell_array.free_mass[i] = m_free_up;
+    grid_cell_array.occ_mass[i] = m_occ_up;
+    grid_cell_array.pred_occ_mass[i] = m_occ_pred;
 }
 
 __device__ void normalize_weights(const ParticlesSoA& particle_array, float* __restrict__ weight_array, int start_idx,
@@ -58,16 +59,16 @@ __device__ void normalize_weights(const ParticlesSoA& particle_array, float* __r
     }
 }
 
-__global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_array, ParticlesSoA particle_array,
+__global__ void gridCellPredictionUpdateKernel(GridCellsSoA grid_cell_array, ParticlesSoA particle_array,
                                                float* __restrict__ weight_array,
                                                const float* __restrict__ weight_array_accum,
-                                               const MeasurementCell* __restrict__ meas_cell_array,
+                                               const MeasurementCellsSoA meas_cell_array,
                                                float* __restrict__ born_masses_array, float p_B, int cell_count)
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x)
     {
-        int start_idx = grid_cell_array[i].start_idx;
-        int end_idx = grid_cell_array[i].end_idx;
+        int start_idx = grid_cell_array.start_idx[i];
+        int end_idx = grid_cell_array.end_idx[i];
 
         if (start_idx != -1)
         {
@@ -79,8 +80,8 @@ __global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_
                 m_occ_pred = 1.0f;
             }
 
-            float m_free_pred = predict_free_mass(grid_cell_array[i], m_occ_pred);
-            float2 masses_up = update_masses(m_occ_pred, m_free_pred, meas_cell_array[i]);
+            float m_free_pred = predict_free_mass(grid_cell_array.free_mass[i], m_occ_pred);
+            float2 masses_up = update_masses(m_occ_pred, m_free_pred, meas_cell_array, i);
             float rho_b = separate_newborn_part(m_occ_pred, masses_up.x, p_B);
             float rho_p = masses_up.x - rho_b;
             born_masses_array[i] = rho_b;
@@ -89,9 +90,9 @@ __global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_
         }
         else
         {
-            float m_occ = grid_cell_array[i].occ_mass;
-            float m_free = predict_free_mass(grid_cell_array[i], m_occ);
-            float2 masses_up = update_masses(m_occ, m_free, meas_cell_array[i]);
+            float m_occ = grid_cell_array.occ_mass[i];
+            float m_free = predict_free_mass(grid_cell_array.free_mass[i], m_occ);
+            float2 masses_up = update_masses(m_occ, m_free, meas_cell_array, i);
             born_masses_array[i] = 0.0f;
             store_values(0.0f, masses_up.x, masses_up.y, masses_up.x, 0.0f, grid_cell_array, i);
         }
diff --git a/dogm/src/kernel/particle_to_grid.cu b/dogm/src/kernel/particle_to_grid.cu
index ef5d426..3bca9c0 100644
--- a/dogm/src/kernel/particle_to_grid.cu
+++ b/dogm/src/kernel/particle_to_grid.cu
@@ -23,7 +23,7 @@ __device__ bool is_last_particle(const ParticlesSoA& particle_array, int particl
     return i == particle_count - 1 || particle_array.grid_cell_idx[i] != particle_array.grid_cell_idx[i + 1];
 }
 
-__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCell* __restrict__ grid_cell_array,
+__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCellsSoA grid_cell_array,
                                      float* __restrict__ weight_array, int particle_count)
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particle_count; i += blockDim.x * gridDim.x)
@@ -32,11 +32,11 @@ __global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCell
 
         if (is_first_particle(particle_array, i))
         {
-            grid_cell_array[cell_idx].start_idx = i;
+            grid_cell_array.start_idx[cell_idx] = i;
         }
         if (is_last_particle(particle_array, particle_count, i))
         {
-            grid_cell_array[cell_idx].end_idx = i;
+            grid_cell_array.end_idx[cell_idx] = i;
         }
 
         weight_array[i] = particle_array.weight[i];
diff --git a/dogm/src/kernel/statistical_moments.cu b/dogm/src/kernel/statistical_moments.cu
index 74b4db0..80c0181 100644
--- a/dogm/src/kernel/statistical_moments.cu
+++ b/dogm/src/kernel/statistical_moments.cu
@@ -45,14 +45,14 @@ __device__ float calc_covariance(const float* __restrict__ vel_xy_array_accum, i
     return 0.0f;
 }
 
-__device__ void store(GridCell* __restrict__ grid_cell_array, int cell_idx, float mean_x_vel, float mean_y_vel,
-                      float var_x_vel, float var_y_vel, float covar_xy_vel)
+__device__ void store(GridCellsSoA grid_cell_array, int cell_idx, float mean_x_vel, float mean_y_vel, float var_x_vel,
+                      float var_y_vel, float covar_xy_vel)
 {
-    grid_cell_array[cell_idx].mean_x_vel = mean_x_vel;
-    grid_cell_array[cell_idx].mean_y_vel = mean_y_vel;
-    grid_cell_array[cell_idx].var_x_vel = var_x_vel;
-    grid_cell_array[cell_idx].var_y_vel = var_y_vel;
-    grid_cell_array[cell_idx].covar_xy_vel = covar_xy_vel;
+    grid_cell_array.mean_x_vel[cell_idx] = mean_x_vel;
+    grid_cell_array.mean_y_vel[cell_idx] = mean_y_vel;
+    grid_cell_array.var_x_vel[cell_idx] = var_x_vel;
+    grid_cell_array.var_y_vel[cell_idx] = var_y_vel;
+    grid_cell_array.covar_xy_vel[cell_idx] = covar_xy_vel;
 }
 
 __global__ void statisticalMomentsKernel1(const ParticlesSoA particle_array, const float* __restrict__ weight_array,
@@ -75,8 +75,7 @@ __global__ void statisticalMomentsKernel1(const ParticlesSoA particle_array, con
     }
 }
 
-__global__ void statisticalMomentsKernel2(GridCell* __restrict__ grid_cell_array,
-                                          const float* __restrict__ vel_x_array_accum,
+__global__ void statisticalMomentsKernel2(GridCellsSoA grid_cell_array, const float* __restrict__ vel_x_array_accum,
                                           const float* __restrict__ vel_y_array_accum,
                                           const float* __restrict__ vel_x_squared_array_accum,
                                           const float* __restrict__ vel_y_squared_array_accum,
@@ -84,9 +83,9 @@ __global__ void statisticalMomentsKernel2(GridCell* __restrict__ grid_cell_array
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x)
     {
-        int start_idx = grid_cell_array[i].start_idx;
-        int end_idx = grid_cell_array[i].end_idx;
-        float rho_p = grid_cell_array[i].pers_occ_mass;
+        int start_idx = grid_cell_array.start_idx[i];
+        int end_idx = grid_cell_array.end_idx[i];
+        float rho_p = grid_cell_array.pers_occ_mass[i];
 
         if (start_idx != -1)
         {
diff --git a/dogm/src/kernel/update_persistent_particles.cu b/dogm/src/kernel/update_persistent_particles.cu
index c9ce782..f1f7a64 100644
--- a/dogm/src/kernel/update_persistent_particles.cu
+++ b/dogm/src/kernel/update_persistent_particles.cu
@@ -18,36 +18,34 @@ __device__ float calc_norm_assoc(float occ_accum, float rho_p)
     return occ_accum > 0.0f ? rho_p / occ_accum : 0.0f;
 }
 
-__device__ float calc_norm_unassoc(const GridCell& grid_cell)
+__device__ float calc_norm_unassoc(float pred_occ_mass, float pers_occ_mass)
 {
-    float pred_occ_mass = grid_cell.pred_occ_mass;
-    return pred_occ_mass > 0.0f ? grid_cell.pers_occ_mass / pred_occ_mass : 0.0f;
+    return pred_occ_mass > 0.0f ? pers_occ_mass / pred_occ_mass : 0.0f;
 }
 
-__device__ void set_normalization_components(GridCell* __restrict__ grid_cell_array, int i, float mu_A, float mu_UA)
+__device__ void set_normalization_components(GridCellsSoA grid_cell_array, int i, float mu_A, float mu_UA)
 {
-    grid_cell_array[i].mu_A = mu_A;
-    grid_cell_array[i].mu_UA = mu_UA;
+    grid_cell_array.mu_A[i] = mu_A;
+    grid_cell_array.mu_UA[i] = mu_UA;
 }
 
-__device__ float update_unnorm(const ParticlesSoA& particle_array, int i,
-                               const MeasurementCell* __restrict__ meas_cell_array)
+__device__ float update_unnorm(const ParticlesSoA& particle_array, int i, const MeasurementCellsSoA meas_cell_array)
 {
-    return meas_cell_array[particle_array.grid_cell_idx[i]].likelihood * particle_array.weight[i];
+    return meas_cell_array.likelihood[particle_array.grid_cell_idx[i]] * particle_array.weight[i];
 }
 
-__device__ float normalize(const ParticlesSoA& particle, int i, const GridCell* __restrict__ grid_cell_array,
-                           const MeasurementCell* __restrict__ meas_cell_array, float weight)
+__device__ float normalize(const ParticlesSoA& particle, int i, const GridCellsSoA grid_cell_array,
+                           const MeasurementCellsSoA meas_cell_array, float weight)
 {
     const int cell_idx = particle.grid_cell_idx[i];
-    const GridCell& cell = grid_cell_array[cell_idx];
-    const MeasurementCell& meas_cell = meas_cell_array[cell_idx];
+    const float p_A = meas_cell_array.p_A[cell_idx];
 
-    return meas_cell.p_A * cell.mu_A * weight + (1.0f - meas_cell.p_A) * cell.mu_UA * particle.weight[i];
+    return p_A * grid_cell_array.mu_A[cell_idx] * weight +
+           (1.0f - p_A) * grid_cell_array.mu_UA[cell_idx] * particle.weight[i];
 }
 
 __global__ void updatePersistentParticlesKernel1(const ParticlesSoA particle_array,
-                                                 const MeasurementCell* __restrict__ meas_cell_array,
+                                                 const MeasurementCellsSoA meas_cell_array,
                                                  float* __restrict__ weight_array, int particle_count)
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particle_count; i += blockDim.x * gridDim.x)
@@ -56,29 +54,29 @@ __global__ void updatePersistentParticlesKernel1(const ParticlesSoA particle_arr
     }
 }
 
-__global__ void updatePersistentParticlesKernel2(GridCell* __restrict__ grid_cell_array,
+__global__ void updatePersistentParticlesKernel2(GridCellsSoA grid_cell_array,
                                                  const float* __restrict__ weight_array_accum, int cell_count)
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x)
     {
-        int start_idx = grid_cell_array[i].start_idx;
-        int end_idx = grid_cell_array[i].end_idx;
+        int start_idx = grid_cell_array.start_idx[i];
+        int end_idx = grid_cell_array.end_idx[i];
 
         if (start_idx != -1)
         {
             float m_occ_accum = subtract(weight_array_accum, start_idx, end_idx);
-            float rho_p = grid_cell_array[i].pers_occ_mass;
+            float rho_p = grid_cell_array.pers_occ_mass[i];
             float mu_A = calc_norm_assoc(m_occ_accum, rho_p);
-            float mu_UA = calc_norm_unassoc(grid_cell_array[i]);
+            float mu_UA = calc_norm_unassoc(grid_cell_array.pred_occ_mass[i], grid_cell_array.pers_occ_mass[i]);
             set_normalization_components(grid_cell_array, i, mu_A, mu_UA);
         }
     }
 }
 
 __global__ void updatePersistentParticlesKernel3(const ParticlesSoA particle_array,
-                                                 const MeasurementCell* __restrict__ meas_cell_array,
-                                                 const GridCell* __restrict__ grid_cell_array,
-                                                 float* __restrict__ weight_array, int particle_count)
+                                                 const MeasurementCellsSoA meas_cell_array,
+                                                 const GridCellsSoA grid_cell_array, float* __restrict__ weight_array,
+                                                 int particle_count)
 {
     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particle_count; i += blockDim.x * gridDim.x)
     {
diff --git a/dogm/test/dogm_spec.cpp b/dogm/test/dogm_spec.cpp
index a0b5322..4ce4583 100644
--- a/dogm/test/dogm_spec.cpp
+++ b/dogm/test/dogm_spec.cpp
@@ -28,8 +28,10 @@ TEST(DOGM, EgoMotionCompensation)
     glm::vec4 old_state = particles.state[0];
     glm::vec2 pose{10.0f, 10.0f};
 
+    dogm::MeasurementCellsSoA meas_grid(dogm.getGridSize() * dogm.getGridSize(), false);
+
     // Set initial pose (no position update)
-    dogm.updateGrid(nullptr, pose.x, pose.y, 0.0f, 0.0f);
+    dogm.updateGrid(meas_grid, pose.x, pose.y, 0.0f, 0.0f);
     cudaDeviceSynchronize();
     EXPECT_EQ(pose.x, dogm.getPositionX());
     EXPECT_EQ(pose.y, dogm.getPositionY());
@@ -37,7 +39,7 @@ TEST(DOGM, EgoMotionCompensation)
     EXPECT_EQ(old_state, new_particles.state[0]);
 
     // Change lower than resolution doesn't lead to update after initial position is set
-    dogm.updateGrid(nullptr, pose.x + 0.5f, pose.y + 0.5f, 0.0f, 0.0f);
+    dogm.updateGrid(meas_grid, pose.x + 0.5f, pose.y + 0.5f, 0.0f, 0.0f);
     cudaDeviceSynchronize();
     EXPECT_EQ(pose.x, dogm.getPositionX());
     EXPECT_EQ(pose.y, dogm.getPositionY());
@@ -47,7 +49,7 @@ TEST(DOGM, EgoMotionCompensation)
     // Update pose -> position update
     const float x_change = 3.0f;
     pose.x += x_change;
-    dogm.updateGrid(nullptr, pose.x, pose.y, 0.0f, 0.0f);
+    dogm.updateGrid(meas_grid, pose.x, pose.y, 0.0f, 0.0f);
     cudaDeviceSynchronize();
     EXPECT_EQ(pose.x, dogm.getPositionX());
     EXPECT_EQ(pose.y, dogm.getPositionY());