diff --git a/dogm/demo/main.cpp b/dogm/demo/main.cpp index 2be2c89..cc6ffa8 100644 --- a/dogm/demo/main.cpp +++ b/dogm/demo/main.cpp @@ -80,7 +80,7 @@ int main(int argc, const char** argv) for (int step = 0; step < num_simulation_steps; ++step) { - dogm::MeasurementCell* meas_grid = grid_generator.generateGrid(sim_data[step].measurements); + dogm::MeasurementCellsSoA meas_grid = grid_generator.generateGrid(sim_data[step].measurements); const auto update_grid_caller = [&grid_map](auto&&... args) { grid_map.updateGrid(std::forward(args)...); diff --git a/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h b/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h index 49eb32d..a89d29a 100644 --- a/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h +++ b/dogm/demo/simulator/include/mapping/kernel/measurement_grid.h @@ -6,19 +6,14 @@ #include -namespace dogm -{ -struct MeasurementCell; -} - __global__ void createPolarGridTextureKernel(cudaSurfaceObject_t polar, const float* __restrict__ measurements, int width, int height, float resolution); __global__ void fusePolarGridTextureKernel(cudaSurfaceObject_t polar, const float* __restrict__ measurements, int width, int height, float resolution); -__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid, - cudaSurfaceObject_t cart, int grid_size); +__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, cudaSurfaceObject_t cart, + int grid_size); -__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid, - const float2* __restrict__ grid, int grid_size); +__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, const float2* __restrict__ grid, + int grid_size); diff --git a/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h b/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h index 0d210e5..7c74fea 100644 --- a/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h +++ b/dogm/demo/simulator/include/mapping/laser_to_meas_grid.h @@ -23,10 +23,10 @@ class LaserMeasurementGrid LaserMeasurementGrid(const Params& params, float grid_length, float resolution); ~LaserMeasurementGrid(); - dogm::MeasurementCell* generateGrid(const std::vector& measurements); + dogm::MeasurementCellsSoA generateGrid(const std::vector& measurements); private: - dogm::MeasurementCell* meas_grid; + dogm::MeasurementCellsSoA meas_grid; int grid_size; Params params; diff --git a/dogm/demo/simulator/mapping/kernel/measurement_grid.cu b/dogm/demo/simulator/mapping/kernel/measurement_grid.cu index 4337ba9..16147c3 100644 --- a/dogm/demo/simulator/mapping/kernel/measurement_grid.cu +++ b/dogm/demo/simulator/mapping/kernel/measurement_grid.cu @@ -112,8 +112,8 @@ __global__ void fusePolarGridTextureKernel(cudaSurfaceObject_t polar, const floa } } -__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid, - cudaSurfaceObject_t cart, int grid_size) +__global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, cudaSurfaceObject_t cart, + int grid_size) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -123,16 +123,16 @@ __global__ void cartesianGridToMeasurementGridKernel(dogm::MeasurementCell* __re { float4 color = surf2Dread(cart, x * sizeof(float4), y); - meas_grid[index].occ_mass = color.x; - meas_grid[index].free_mass = color.y; + meas_grid.occ_mass[index] = color.x; + meas_grid.free_mass[index] = color.y; - meas_grid[index].likelihood = 1.0f; - meas_grid[index].p_A = 1.0f; + meas_grid.likelihood[index] = 1.0f; + meas_grid.p_A[index] = 1.0f; } } -__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCell* __restrict__ meas_grid, - const float2* __restrict__ grid, int grid_size) +__global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCellsSoA meas_grid, const float2* __restrict__ grid, + int grid_size) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -142,10 +142,10 @@ __global__ void gridArrayToMeasurementGridKernel(dogm::MeasurementCell* __restri { float2 masses = grid[index]; - meas_grid[index].occ_mass = masses.x; - meas_grid[index].free_mass = masses.y; + meas_grid.occ_mass[index] = masses.x; + meas_grid.free_mass[index] = masses.y; - meas_grid[index].likelihood = 1.0f; - meas_grid[index].p_A = 1.0f; + meas_grid.likelihood[index] = 1.0f; + meas_grid.p_A[index] = 1.0f; } } diff --git a/dogm/demo/simulator/mapping/laser_to_meas_grid.cu b/dogm/demo/simulator/mapping/laser_to_meas_grid.cu index 035afd6..0c1ecb4 100644 --- a/dogm/demo/simulator/mapping/laser_to_meas_grid.cu +++ b/dogm/demo/simulator/mapping/laser_to_meas_grid.cu @@ -12,23 +12,23 @@ LaserMeasurementGrid::LaserMeasurementGrid(const Params& params, float grid_leng { int grid_cell_count = grid_size * grid_size; - CHECK_ERROR(cudaMalloc(&meas_grid, grid_cell_count * sizeof(dogm::MeasurementCell))); + meas_grid.init(grid_cell_count, true); renderer = std::make_unique(grid_size, params.fov, grid_length, params.max_range); } LaserMeasurementGrid::~LaserMeasurementGrid() { - CHECK_ERROR(cudaFree(meas_grid)); + meas_grid.free(); } -dogm::MeasurementCell* LaserMeasurementGrid::generateGrid(const std::vector& measurements) +dogm::MeasurementCellsSoA LaserMeasurementGrid::generateGrid(const std::vector& measurements) { const int num_measurements = measurements.size(); float* d_measurements; - CHECK_ERROR(cudaMalloc(&d_measurements, num_measurements * sizeof(float))); - CHECK_ERROR( + CUDA_CALL(cudaMalloc(&d_measurements, num_measurements * sizeof(float))); + CUDA_CALL( cudaMemcpy(d_measurements, measurements.data(), num_measurements * sizeof(float), cudaMemcpyHostToDevice)); const int polar_width = num_measurements; @@ -47,7 +47,7 @@ dogm::MeasurementCell* LaserMeasurementGrid::generateGrid(const std::vector>>(polar_surface, d_measurements, polar_width, polar_height, params.resolution); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); polar_texture.endCudaAccess(polar_surface); // render cartesian image to texture using polar texture @@ -60,11 +60,11 @@ dogm::MeasurementCell* LaserMeasurementGrid::generateGrid(const std::vector>>(meas_grid, cartesian_surface, grid_size); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); framebuffer->endCudaAccess(cartesian_surface); - CHECK_ERROR(cudaFree(d_measurements)); - CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaFree(d_measurements)); + CUDA_CALL(cudaDeviceSynchronize()); return meas_grid; } diff --git a/dogm/demo/simulator/mapping/opengl/framebuffer.cpp b/dogm/demo/simulator/mapping/opengl/framebuffer.cpp index 8c67d33..4c7144e 100644 --- a/dogm/demo/simulator/mapping/opengl/framebuffer.cpp +++ b/dogm/demo/simulator/mapping/opengl/framebuffer.cpp @@ -23,7 +23,7 @@ Framebuffer::Framebuffer(int width, int height) glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, 0); - CHECK_ERROR(cudaGraphicsGLRegisterImage(&resource, texture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsReadOnly)); + CUDA_CALL(cudaGraphicsGLRegisterImage(&resource, texture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsReadOnly)); glBindFramebuffer(GL_FRAMEBUFFER, 0); } @@ -36,23 +36,23 @@ Framebuffer::~Framebuffer() void Framebuffer::beginCudaAccess(cudaSurfaceObject_t* surfaceObject) { - CHECK_ERROR(cudaGraphicsMapResources(1, &resource, nullptr)); + CUDA_CALL(cudaGraphicsMapResources(1, &resource, nullptr)); cudaArray_t cudaArray; - CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0)); + CUDA_CALL(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0)); cudaResourceDesc resourceDesc; memset(&resourceDesc, 0, sizeof(cudaResourceDesc)); resourceDesc.resType = cudaResourceTypeArray; resourceDesc.res.array.array = cudaArray; - CHECK_ERROR(cudaCreateSurfaceObject(surfaceObject, &resourceDesc)); + CUDA_CALL(cudaCreateSurfaceObject(surfaceObject, &resourceDesc)); } void Framebuffer::endCudaAccess(cudaSurfaceObject_t surfaceObject) { - CHECK_ERROR(cudaGraphicsUnmapResources(1, &resource, nullptr)); - CHECK_ERROR(cudaDestroySurfaceObject(surfaceObject)); + CUDA_CALL(cudaGraphicsUnmapResources(1, &resource, nullptr)); + CUDA_CALL(cudaDestroySurfaceObject(surfaceObject)); } void Framebuffer::bind() diff --git a/dogm/demo/simulator/mapping/opengl/texture.cpp b/dogm/demo/simulator/mapping/opengl/texture.cpp index 682a440..63a06a5 100644 --- a/dogm/demo/simulator/mapping/opengl/texture.cpp +++ b/dogm/demo/simulator/mapping/opengl/texture.cpp @@ -31,7 +31,7 @@ Texture::Texture(int width, int height, float anisotropy_level) float color[] = {0.0f, 0.0f, 1.0f, 1.0f}; glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color); - CHECK_ERROR( + CUDA_CALL( cudaGraphicsGLRegisterImage(&resource, texture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsSurfaceLoadStore)); glBindTexture(GL_TEXTURE_2D, 0); @@ -44,24 +44,24 @@ Texture::~Texture() void Texture::beginCudaAccess(cudaSurfaceObject_t* surfaceObject) { - CHECK_ERROR(cudaGraphicsMapResources(1, &resource, nullptr)); + CUDA_CALL(cudaGraphicsMapResources(1, &resource, nullptr)); cudaArray_t cudaArray; - CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0)); + CUDA_CALL(cudaGraphicsSubResourceGetMappedArray(&cudaArray, resource, 0, 0)); cudaResourceDesc resourceDesc; memset(&resourceDesc, 0, sizeof(cudaResourceDesc)); resourceDesc.resType = cudaResourceTypeArray; resourceDesc.res.array.array = cudaArray; - CHECK_ERROR(cudaCreateSurfaceObject(surfaceObject, &resourceDesc)); + CUDA_CALL(cudaCreateSurfaceObject(surfaceObject, &resourceDesc)); } void Texture::endCudaAccess(cudaSurfaceObject_t surfaceObject) { - CHECK_ERROR(cudaGraphicsUnmapResources(1, &resource, nullptr)); - CHECK_ERROR(cudaGraphicsUnregisterResource(resource)); - CHECK_ERROR(cudaDestroySurfaceObject(surfaceObject)); + CUDA_CALL(cudaGraphicsUnmapResources(1, &resource, nullptr)); + CUDA_CALL(cudaGraphicsUnregisterResource(resource)); + CUDA_CALL(cudaDestroySurfaceObject(surfaceObject)); } void Texture::generateMipMap() diff --git a/dogm/demo/utils/image_creation.cpp b/dogm/demo/utils/image_creation.cpp index cde8244..4b70c3a 100644 --- a/dogm/demo/utils/image_creation.cpp +++ b/dogm/demo/utils/image_creation.cpp @@ -32,29 +32,34 @@ std::vector> computeCellsWithVelocity(const dogm::DOGM& gr { int index = y * grid_map.getGridSize() + x; - const dogm::GridCell& cell = grid_cells[index]; - float occ = pignistic_transformation(cell.free_mass, cell.occ_mass); + float occ = pignistic_transformation(grid_cells.free_mass[index], grid_cells.occ_mass[index]); cv::Mat velocity_mean(2, 1, CV_32FC1); - velocity_mean.at(0) = cell.mean_x_vel; - velocity_mean.at(1) = cell.mean_y_vel; + velocity_mean.at(0) = grid_cells.mean_x_vel[index]; + velocity_mean.at(1) = grid_cells.mean_y_vel[index]; cv::Mat velocity_covar(2, 2, CV_32FC1); - velocity_covar.at(0, 0) = cell.var_x_vel; - velocity_covar.at(1, 0) = cell.covar_xy_vel; - velocity_covar.at(0, 1) = cell.covar_xy_vel; - velocity_covar.at(1, 1) = cell.var_y_vel; + velocity_covar.at(0, 0) = grid_cells.var_x_vel[index]; + velocity_covar.at(1, 0) = grid_cells.covar_xy_vel[index]; + velocity_covar.at(0, 1) = grid_cells.covar_xy_vel[index]; + velocity_covar.at(1, 1) = grid_cells.var_y_vel[index]; cv::Mat velocity_normalized_by_variance = velocity_mean.t() * velocity_covar.inv() * velocity_mean; if (occ >= min_occupancy_threshold && velocity_normalized_by_variance.at(0, 0) >= min_velocity_threshold) { - Point point; + Point point{}; // Storing the point as grid index to be consistent with cell.mean_x_vel and cell.mean_y_vel point.x = static_cast(x); point.y = static_cast(y); - point.data = cell; + point.data.free_mass = grid_cells.free_mass[index]; + point.data.occ_mass = grid_cells.occ_mass[index]; + point.data.mean_x_vel = grid_cells.mean_x_vel[index]; + point.data.mean_y_vel = grid_cells.mean_y_vel[index]; + point.data.var_x_vel = grid_cells.var_x_vel[index]; + point.data.var_y_vel = grid_cells.var_y_vel[index]; + point.data.covar_xy_vel = grid_cells.covar_xy_vel[index]; point.cluster_id = UNCLASSIFIED; cells_with_velocity.push_back(point); @@ -76,8 +81,7 @@ cv::Mat compute_measurement_grid_image(const dogm::DOGM& grid_map) { int index = y * grid_map.getGridSize() + x; - const dogm::MeasurementCell& cell = meas_cells[index]; - float occ = pignistic_transformation(cell.free_mass, cell.occ_mass); + float occ = pignistic_transformation(meas_cells.free_mass[index], meas_cells.occ_mass[index]); auto temp = static_cast(occ * 255.0f); row_ptr[x] = cv::Vec3b(255 - temp, 255 - temp, 255 - temp); @@ -97,9 +101,8 @@ cv::Mat compute_raw_measurement_grid_image(const dogm::DOGM& grid_map) for (int x = 0; x < grid_map.getGridSize(); x++) { int index = y * grid_map.getGridSize() + x; - const dogm::MeasurementCell& cell = meas_cells[index]; - auto red = static_cast(cell.occ_mass * 255.0f); - auto green = static_cast(cell.free_mass * 255.0f); + auto red = static_cast(meas_cells.occ_mass[index] * 255.0f); + auto green = static_cast(meas_cells.free_mass[index] * 255.0f); int blue = 255 - red - green; row_ptr[x] = cv::Vec3b(blue, green, red); @@ -120,8 +123,7 @@ cv::Mat compute_dogm_image(const dogm::DOGM& grid_map, const std::vector(floor(occ * 255)); row_ptr[x] = cv::Vec3b(grayscale_value, grayscale_value, grayscale_value); diff --git a/dogm/include/dogm/cuda_utils.h b/dogm/include/dogm/cuda_utils.h index 3efdbeb..7143117 100644 --- a/dogm/include/dogm/cuda_utils.h +++ b/dogm/include/dogm/cuda_utils.h @@ -10,18 +10,15 @@ #define GPU_LAMBDA [=] __host__ __device__ -#define CHECK_ERROR(ans) \ +#ifndef CUDA_CALL +#define CUDA_CALL(call) \ { \ - checkError((ans), __FILE__, __LINE__); \ + auto status = static_cast(call); \ + if (status != cudaSuccess) \ + fprintf(stderr, "ERROR: CUDA RT call \"%s\" in line %d of file %s failed with %s (%d).\n", #call, \ + __LINE__, __FILE__, cudaGetErrorString(status), status); \ } - -inline void checkError(cudaError_t code, const char* file, int line) -{ - if (code != cudaSuccess) - { - printf("GPU Kernel Error: %s %s %d\n", cudaGetErrorString(code), file, line); - } -} +#endif inline int divUp(int total, int grain) { diff --git a/dogm/include/dogm/dogm.h b/dogm/include/dogm/dogm.h index 9dc064b..2110889 100644 --- a/dogm/include/dogm/dogm.h +++ b/dogm/include/dogm/dogm.h @@ -76,7 +76,7 @@ class DOGM * @param dt delta time since the last update. * @param device whether the measurement grid resides in GPU memory (default: true). */ - void updateGrid(MeasurementCell* measurement_grid, float new_x, float new_y, float new_yaw, float dt, + void updateGrid(MeasurementCellsSoA measurement_grid, float new_x, float new_y, float new_yaw, float dt, bool device = true); /** @@ -84,14 +84,14 @@ class DOGM * * @return grid map. */ - std::vector getGridCells() const; + GridCellsSoA getGridCells() const; /** * Returns the measurement grid map in the host memory. * * @return measurement grid map. */ - std::vector getMeasurementCells() const; + MeasurementCellsSoA getMeasurementCells() const; /** * Returns the persistent particles of the particle filter. @@ -139,7 +139,7 @@ class DOGM void initialize(); void updatePose(float new_x, float new_y, float new_yaw); - void updateMeasurementGrid(MeasurementCell* measurement_grid, bool device); + void updateMeasurementGrid(MeasurementCellsSoA measurement_grid, bool device); public: void initializeParticles(); @@ -155,11 +155,11 @@ class DOGM public: Params params; - GridCell* grid_cell_array; + GridCellsSoA grid_cell_array; ParticlesSoA particle_array; ParticlesSoA particle_array_next; ParticlesSoA birth_particle_array; - MeasurementCell* meas_cell_array; + MeasurementCellsSoA meas_cell_array; float* weight_array; float* birth_weight_array; diff --git a/dogm/include/dogm/dogm_types.h b/dogm/include/dogm/dogm_types.h index 3c250ba..e856232 100644 --- a/dogm/include/dogm/dogm_types.h +++ b/dogm/include/dogm/dogm_types.h @@ -48,6 +48,262 @@ struct Particle glm::vec4 state; }; +struct GridCellsSoA +{ + int* start_idx; + int* end_idx; + float* new_born_occ_mass; + float* pers_occ_mass; + float* free_mass; + float* occ_mass; + float* pred_occ_mass; + float* mu_A; + float* mu_UA; + + float* w_A; + float* w_UA; + + float* mean_x_vel; + float* mean_y_vel; + float* var_x_vel; + float* var_y_vel; + float* covar_xy_vel; + + int size; + bool device; + + GridCellsSoA() : size(0), device(true) {} + + GridCellsSoA(int new_size, bool is_device) { init(new_size, is_device); } + + void init(int new_size, bool is_device) + { + size = new_size; + device = is_device; + if (device) + { + CUDA_CALL(cudaMalloc((void**)&start_idx, size * sizeof(int))); + CUDA_CALL(cudaMalloc((void**)&end_idx, size * sizeof(int))); + CUDA_CALL(cudaMalloc((void**)&new_born_occ_mass, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&pers_occ_mass, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&free_mass, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&occ_mass, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&pred_occ_mass, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&mu_A, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&mu_UA, size * sizeof(float))); + + CUDA_CALL(cudaMalloc((void**)&w_A, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&w_UA, size * sizeof(float))); + + CUDA_CALL(cudaMalloc((void**)&mean_x_vel, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&mean_y_vel, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&var_x_vel, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&var_y_vel, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&covar_xy_vel, size * sizeof(float))); + } + else + { + start_idx = (int*)malloc(size * sizeof(int)); + end_idx = (int*)malloc(size * sizeof(int)); + new_born_occ_mass = (float*)malloc(size * sizeof(float)); + pers_occ_mass = (float*)malloc(size * sizeof(float)); + free_mass = (float*)malloc(size * sizeof(float)); + occ_mass = (float*)malloc(size * sizeof(float)); + pred_occ_mass = (float*)malloc(size * sizeof(float)); + mu_A = (float*)malloc(size * sizeof(float)); + mu_UA = (float*)malloc(size * sizeof(float)); + + w_A = (float*)malloc(size * sizeof(float)); + w_UA = (float*)malloc(size * sizeof(float)); + + mean_x_vel = (float*)malloc(size * sizeof(float)); + mean_y_vel = (float*)malloc(size * sizeof(float)); + var_x_vel = (float*)malloc(size * sizeof(float)); + var_y_vel = (float*)malloc(size * sizeof(float)); + covar_xy_vel = (float*)malloc(size * sizeof(float)); + } + } + + void free() + { + if (device) + { + CUDA_CALL(cudaFree(start_idx)); + CUDA_CALL(cudaFree(end_idx)); + CUDA_CALL(cudaFree(new_born_occ_mass)); + CUDA_CALL(cudaFree(pers_occ_mass)); + CUDA_CALL(cudaFree(free_mass)); + CUDA_CALL(cudaFree(occ_mass)); + CUDA_CALL(cudaFree(pred_occ_mass)); + CUDA_CALL(cudaFree(mu_A)); + CUDA_CALL(cudaFree(mu_UA)); + + CUDA_CALL(cudaFree(w_A)); + CUDA_CALL(cudaFree(w_UA)); + + CUDA_CALL(cudaFree(mean_x_vel)); + CUDA_CALL(cudaFree(mean_y_vel)); + CUDA_CALL(cudaFree(var_x_vel)); + CUDA_CALL(cudaFree(var_y_vel)); + CUDA_CALL(cudaFree(covar_xy_vel)); + } + else + { + ::free(start_idx); + ::free(end_idx); + ::free(new_born_occ_mass); + ::free(pers_occ_mass); + ::free(free_mass); + ::free(occ_mass); + ::free(pred_occ_mass); + ::free(mu_A); + ::free(mu_UA); + + ::free(w_A); + ::free(w_UA); + + ::free(mean_x_vel); + ::free(mean_y_vel); + ::free(var_x_vel); + ::free(var_y_vel); + ::free(covar_xy_vel); + } + } + + void copy(const GridCellsSoA& other, cudaMemcpyKind kind) + { + CUDA_CALL(cudaMemcpy(start_idx, other.start_idx, size * sizeof(int), kind)); + CUDA_CALL(cudaMemcpy(end_idx, other.end_idx, size * sizeof(int), kind)); + CUDA_CALL(cudaMemcpy(new_born_occ_mass, other.new_born_occ_mass, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(pers_occ_mass, other.pers_occ_mass, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(free_mass, other.free_mass, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(occ_mass, other.occ_mass, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(pred_occ_mass, other.pred_occ_mass, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(mu_A, other.mu_A, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(mu_UA, other.mu_UA, size * sizeof(float), kind)); + + CUDA_CALL(cudaMemcpy(w_A, other.w_A, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(w_UA, other.w_UA, size * sizeof(float), kind)); + + CUDA_CALL(cudaMemcpy(mean_x_vel, other.mean_x_vel, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(mean_y_vel, other.mean_y_vel, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(var_x_vel, other.var_x_vel, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(var_y_vel, other.var_y_vel, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(covar_xy_vel, other.covar_xy_vel, size * sizeof(float), kind)); + } + + GridCellsSoA& operator=(const GridCellsSoA& other) + { + if (this != &other) + { + copy(other, cudaMemcpyDeviceToDevice); + } + + return *this; + } + + __device__ void copy(const GridCellsSoA& other, int index, int other_index) + { + start_idx[index] = other.start_idx[other_index]; + end_idx[index] = other.end_idx[other_index]; + new_born_occ_mass[index] = other.new_born_occ_mass[other_index]; + pers_occ_mass[index] = other.pers_occ_mass[other_index]; + free_mass[index] = other.free_mass[other_index]; + occ_mass[index] = other.occ_mass[other_index]; + pred_occ_mass[index] = other.pred_occ_mass[other_index]; + mu_A[index] = other.mu_A[other_index]; + mu_UA[index] = other.mu_UA[other_index]; + + w_A[index] = other.w_A[other_index]; + w_UA[index] = other.w_UA[other_index]; + + mean_x_vel[index] = other.mean_x_vel[other_index]; + mean_y_vel[index] = other.mean_y_vel[other_index]; + var_x_vel[index] = other.var_x_vel[other_index]; + var_y_vel[index] = other.var_y_vel[other_index]; + covar_xy_vel[index] = other.covar_xy_vel[other_index]; + } +}; + +struct MeasurementCellsSoA +{ + float* free_mass; + float* occ_mass; + float* likelihood; + float* p_A; + + int size; + bool device; + + MeasurementCellsSoA() : size(0), device(true) {} + + MeasurementCellsSoA(int new_size, bool is_device) { init(new_size, is_device); } + + void init(int new_size, bool is_device) + { + size = new_size; + device = is_device; + if (device) + { + CUDA_CALL(cudaMalloc((void**)&free_mass, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&occ_mass, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&likelihood, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&p_A, size * sizeof(float))); + } + else + { + free_mass = (float*)malloc(size * sizeof(float)); + occ_mass = (float*)malloc(size * sizeof(float)); + likelihood = (float*)malloc(size * sizeof(float)); + p_A = (float*)malloc(size * sizeof(float)); + } + } + + void free() + { + if (device) + { + CUDA_CALL(cudaFree(free_mass)); + CUDA_CALL(cudaFree(occ_mass)); + CUDA_CALL(cudaFree(likelihood)); + CUDA_CALL(cudaFree(p_A)); + } + else + { + ::free(free_mass); + ::free(occ_mass); + ::free(likelihood); + ::free(p_A); + } + } + + void copy(const MeasurementCellsSoA& other, cudaMemcpyKind kind) + { + CUDA_CALL(cudaMemcpy(free_mass, other.free_mass, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(occ_mass, other.occ_mass, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(likelihood, other.likelihood, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(p_A, other.p_A, size * sizeof(float), kind)); + } + + MeasurementCellsSoA& operator=(const MeasurementCellsSoA& other) + { + if (this != &other) + { + copy(other, cudaMemcpyDeviceToDevice); + } + + return *this; + } + + __device__ void copy(const MeasurementCellsSoA& other, int index, int other_index) + { + free_mass[index] = other.free_mass[other_index]; + occ_mass[index] = other.occ_mass[other_index]; + likelihood[index] = other.likelihood[other_index]; + p_A[index] = other.p_A[other_index]; + } +}; + struct ParticlesSoA { glm::vec4* state; @@ -68,10 +324,10 @@ struct ParticlesSoA device = is_device; if (device) { - CHECK_ERROR(cudaMalloc((void**)&state, size * sizeof(glm::vec4))); - CHECK_ERROR(cudaMalloc((void**)&grid_cell_idx, size * sizeof(int))); - CHECK_ERROR(cudaMalloc((void**)&weight, size * sizeof(float))); - CHECK_ERROR(cudaMalloc((void**)&associated, size * sizeof(bool))); + CUDA_CALL(cudaMalloc((void**)&state, size * sizeof(glm::vec4))); + CUDA_CALL(cudaMalloc((void**)&grid_cell_idx, size * sizeof(int))); + CUDA_CALL(cudaMalloc((void**)&weight, size * sizeof(float))); + CUDA_CALL(cudaMalloc((void**)&associated, size * sizeof(bool))); } else { @@ -86,10 +342,10 @@ struct ParticlesSoA { if (device) { - CHECK_ERROR(cudaFree(state)); - CHECK_ERROR(cudaFree(grid_cell_idx)); - CHECK_ERROR(cudaFree(weight)); - CHECK_ERROR(cudaFree(associated)); + CUDA_CALL(cudaFree(state)); + CUDA_CALL(cudaFree(grid_cell_idx)); + CUDA_CALL(cudaFree(weight)); + CUDA_CALL(cudaFree(associated)); } else { @@ -102,10 +358,10 @@ struct ParticlesSoA void copy(const ParticlesSoA& other, cudaMemcpyKind kind) { - CHECK_ERROR(cudaMemcpy(grid_cell_idx, other.grid_cell_idx, size * sizeof(int), kind)); - CHECK_ERROR(cudaMemcpy(weight, other.weight, size * sizeof(float), kind)); - CHECK_ERROR(cudaMemcpy(associated, other.associated, size * sizeof(bool), kind)); - CHECK_ERROR(cudaMemcpy(state, other.state, size * sizeof(glm::vec4), kind)); + CUDA_CALL(cudaMemcpy(grid_cell_idx, other.grid_cell_idx, size * sizeof(int), kind)); + CUDA_CALL(cudaMemcpy(weight, other.weight, size * sizeof(float), kind)); + CUDA_CALL(cudaMemcpy(associated, other.associated, size * sizeof(bool), kind)); + CUDA_CALL(cudaMemcpy(state, other.state, size * sizeof(glm::vec4), kind)); } ParticlesSoA& operator=(const ParticlesSoA& other) diff --git a/dogm/include/dogm/kernel/ego_motion_compensation.h b/dogm/include/dogm/kernel/ego_motion_compensation.h index 144b56c..e6999aa 100644 --- a/dogm/include/dogm/kernel/ego_motion_compensation.h +++ b/dogm/include/dogm/kernel/ego_motion_compensation.h @@ -10,12 +10,10 @@ namespace dogm { -struct GridCell; -struct ParticlesSoA; - __global__ void moveParticlesKernel(ParticlesSoA particle_array, int x_move, int y_move, int particle_count); -__global__ void moveMapKernel(GridCell* __restrict__ grid_cell_array, const GridCell* __restrict__ old_grid_cell_array, - int x_move, int y_move, int grid_size); +__global__ void moveMapKernel(GridCellsSoA grid_cell_array, GridCellsSoA old_grid_cell_array, + MeasurementCellsSoA meas_cell_array, ParticlesSoA particle_array, int x_move, int y_move, + int grid_size); } /* namespace dogm */ diff --git a/dogm/include/dogm/kernel/init.h b/dogm/include/dogm/kernel/init.h index b143215..a90558c 100644 --- a/dogm/include/dogm/kernel/init.h +++ b/dogm/include/dogm/kernel/init.h @@ -22,9 +22,9 @@ __global__ void initParticlesKernel(ParticlesSoA particle_array, curandState* __ __global__ void initBirthParticlesKernel(ParticlesSoA birth_particle_array, curandState* __restrict__ global_state, float velocity, int grid_size, int particle_count); -__global__ void initGridCellsKernel(GridCell* __restrict__ grid_cell_array, - MeasurementCell* __restrict__ meas_cell_array, int grid_size, int cell_count); +__global__ void initGridCellsKernel(GridCellsSoA grid_cell_array, MeasurementCellsSoA meas_cell_array, int grid_size, + int cell_count); -__global__ void reinitGridParticleIndices(GridCell* __restrict__ grid_cell_array, int cell_count); +__global__ void reinitGridParticleIndices(GridCellsSoA grid_cell_array, int cell_count); } /* namespace dogm */ diff --git a/dogm/include/dogm/kernel/init_new_particles.h b/dogm/include/dogm/kernel/init_new_particles.h index cecfafc..cfa049c 100644 --- a/dogm/include/dogm/kernel/init_new_particles.h +++ b/dogm/include/dogm/kernel/init_new_particles.h @@ -16,24 +16,20 @@ struct Particle; void normalize_particle_orders(float* particle_orders_array_accum, int particle_orders_count, int v_B); -__global__ void copyMassesKernel(const MeasurementCell* __restrict__ meas_cell_array, float* __restrict__ masses, - int cell_count); +__global__ void copyMassesKernel(const MeasurementCellsSoA meas_cell_array, float* __restrict__ masses, int cell_count); -__global__ void initParticlesKernel1(GridCell* __restrict__ grid_cell_array, - const MeasurementCell* __restrict__ meas_cell_array, ParticlesSoA particle_array, - const float* __restrict__ particle_orders_array_accum, int cell_count); +__global__ void initParticlesKernel1(ParticlesSoA particle_array, const float* __restrict__ particle_orders_array_accum, + int cell_count); -__global__ void initParticlesKernel2(ParticlesSoA particle_array, const GridCell* __restrict__ grid_cell_array, - curandState* __restrict__ global_state, float velocity, int grid_size, - float new_weight, int particle_count); +__global__ void initParticlesKernel2(ParticlesSoA particle_array, curandState* __restrict__ global_state, + float velocity, int grid_size, float new_weight, int particle_count); -__global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array, - const MeasurementCell* __restrict__ meas_cell_array, +__global__ void initNewParticlesKernel1(GridCellsSoA grid_cell_array, const MeasurementCellsSoA meas_cell_array, const float* __restrict__ weight_array, const float* __restrict__ born_masses_array, ParticlesSoA birth_particle_array, const float* __restrict__ particle_orders_array_accum, int cell_count); -__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCell* __restrict__ grid_cell_array, +__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCellsSoA grid_cell_array, curandState* __restrict__ global_state, float stddev_velocity, float max_velocity, int grid_size, int particle_count); diff --git a/dogm/include/dogm/kernel/mass_update.h b/dogm/include/dogm/kernel/mass_update.h index 67f16f9..d45546e 100644 --- a/dogm/include/dogm/kernel/mass_update.h +++ b/dogm/include/dogm/kernel/mass_update.h @@ -9,14 +9,10 @@ namespace dogm { -struct GridCell; -struct MeasurementCell; -struct Particle; - -__global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_array, ParticlesSoA particle_array, +__global__ void gridCellPredictionUpdateKernel(GridCellsSoA grid_cell_array, ParticlesSoA particle_array, float* __restrict__ weight_array, const float* __restrict__ weight_array_accum, - const MeasurementCell* __restrict__ meas_cell_array, + const MeasurementCellsSoA meas_cell_array, float* __restrict__ born_masses_array, float p_B, int cell_count); } /* namespace dogm */ diff --git a/dogm/include/dogm/kernel/particle_to_grid.h b/dogm/include/dogm/kernel/particle_to_grid.h index ef03861..26ccdc5 100644 --- a/dogm/include/dogm/kernel/particle_to_grid.h +++ b/dogm/include/dogm/kernel/particle_to_grid.h @@ -9,10 +9,7 @@ namespace dogm { -struct GridCell; -struct Particle; - -__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCell* __restrict__ grid_cell_array, +__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCellsSoA grid_cell_array, float* __restrict__ weight_array, int particle_count); } /* namespace dogm */ diff --git a/dogm/include/dogm/kernel/statistical_moments.h b/dogm/include/dogm/kernel/statistical_moments.h index 8f715a1..31b62a3 100644 --- a/dogm/include/dogm/kernel/statistical_moments.h +++ b/dogm/include/dogm/kernel/statistical_moments.h @@ -9,17 +9,13 @@ namespace dogm { -struct GridCell; -struct Particle; - __global__ void statisticalMomentsKernel1(const ParticlesSoA particle_array, const float* __restrict__ weight_array, float* __restrict__ vel_x_array, float* __restrict__ vel_y_array, float* __restrict__ vel_x_squared_array, float* __restrict__ vel_y_squared_array, float* __restrict__ vel_xy_array, int particle_count); -__global__ void statisticalMomentsKernel2(GridCell* __restrict__ grid_cell_array, - const float* __restrict__ vel_x_array_accum, +__global__ void statisticalMomentsKernel2(GridCellsSoA grid_cell_array, const float* __restrict__ vel_x_array_accum, const float* __restrict__ vel_y_array_accum, const float* __restrict__ vel_x_squared_array_accum, const float* __restrict__ vel_y_squared_array_accum, diff --git a/dogm/include/dogm/kernel/update_persistent_particles.h b/dogm/include/dogm/kernel/update_persistent_particles.h index 9b309fa..44f5c09 100644 --- a/dogm/include/dogm/kernel/update_persistent_particles.h +++ b/dogm/include/dogm/kernel/update_persistent_particles.h @@ -9,20 +9,16 @@ namespace dogm { -struct GridCell; -struct MeasurementCell; -struct Particle; - __global__ void updatePersistentParticlesKernel1(const ParticlesSoA particle_array, - const MeasurementCell* __restrict__ meas_cell_array, + const MeasurementCellsSoA meas_cell_array, float* __restrict__ weight_array, int particle_count); -__global__ void updatePersistentParticlesKernel2(GridCell* __restrict__ grid_cell_array, +__global__ void updatePersistentParticlesKernel2(GridCellsSoA grid_cell_array, const float* __restrict__ weight_array_accum, int cell_count); __global__ void updatePersistentParticlesKernel3(const ParticlesSoA particle_array, - const MeasurementCell* __restrict__ meas_cell_array, - const GridCell* __restrict__ grid_cell_array, - float* __restrict__ weight_array, int particle_count); + const MeasurementCellsSoA meas_cell_array, + const GridCellsSoA grid_cell_array, float* __restrict__ weight_array, + int particle_count); } /* namespace dogm */ diff --git a/dogm/src/dogm.cu b/dogm/src/dogm.cu index fd63d40..e399a8f 100644 --- a/dogm/src/dogm.cu +++ b/dogm/src/dogm.cu @@ -36,10 +36,10 @@ DOGM::DOGM(const Params& params) first_measurement_received(false), position_x(0.0f), position_y(0.0f) { int device; - CHECK_ERROR(cudaGetDevice(&device)); + CUDA_CALL(cudaGetDevice(&device)); cudaDeviceProp device_prop; - CHECK_ERROR(cudaGetDeviceProperties(&device_prop, device)); + CUDA_CALL(cudaGetDeviceProperties(&device_prop, device)); int blocks_per_sm = device_prop.maxThreadsPerMultiProcessor / block_dim.x; dim3 dim(device_prop.multiProcessorCount * blocks_per_sm); @@ -49,22 +49,22 @@ DOGM::DOGM(const Params& params) particle_array_next.init(particle_count, true); birth_particle_array.init(new_born_particle_count, true); - CHECK_ERROR(cudaMalloc(&grid_cell_array, grid_cell_count * sizeof(GridCell))); - CHECK_ERROR(cudaMalloc(&meas_cell_array, grid_cell_count * sizeof(MeasurementCell))); + grid_cell_array.init(grid_cell_count, true); + meas_cell_array.init(grid_cell_count, true); - CHECK_ERROR(cudaMalloc(&weight_array, particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&birth_weight_array, new_born_particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&born_masses_array, grid_cell_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&weight_array, particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&birth_weight_array, new_born_particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&born_masses_array, grid_cell_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&vel_x_array, particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&vel_y_array, particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&vel_x_squared_array, particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&vel_y_squared_array, particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&vel_xy_array, particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&vel_x_array, particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&vel_y_array, particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&vel_x_squared_array, particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&vel_y_squared_array, particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&vel_xy_array, particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&rand_array, particle_count * sizeof(float))); + CUDA_CALL(cudaMalloc(&rand_array, particle_count * sizeof(float))); - CHECK_ERROR(cudaMalloc(&rng_states, particles_grid.x * block_dim.x * sizeof(curandState))); + CUDA_CALL(cudaMalloc(&rng_states, particles_grid.x * block_dim.x * sizeof(curandState))); initialize(); } @@ -75,43 +75,44 @@ DOGM::~DOGM() particle_array_next.free(); birth_particle_array.free(); - CHECK_ERROR(cudaFree(grid_cell_array)); - CHECK_ERROR(cudaFree(meas_cell_array)); + grid_cell_array.free(); + meas_cell_array.free(); - CHECK_ERROR(cudaFree(weight_array)); - CHECK_ERROR(cudaFree(birth_weight_array)); - CHECK_ERROR(cudaFree(born_masses_array)); + CUDA_CALL(cudaFree(weight_array)); + CUDA_CALL(cudaFree(birth_weight_array)); + CUDA_CALL(cudaFree(born_masses_array)); - CHECK_ERROR(cudaFree(vel_x_array)); - CHECK_ERROR(cudaFree(vel_y_array)); - CHECK_ERROR(cudaFree(vel_x_squared_array)); - CHECK_ERROR(cudaFree(vel_y_squared_array)); - CHECK_ERROR(cudaFree(vel_xy_array)); + CUDA_CALL(cudaFree(vel_x_array)); + CUDA_CALL(cudaFree(vel_y_array)); + CUDA_CALL(cudaFree(vel_x_squared_array)); + CUDA_CALL(cudaFree(vel_y_squared_array)); + CUDA_CALL(cudaFree(vel_xy_array)); - CHECK_ERROR(cudaFree(rng_states)); + CUDA_CALL(cudaFree(rand_array)); + + CUDA_CALL(cudaFree(rng_states)); } void DOGM::initialize() { cudaStream_t particles_stream, grid_stream; - CHECK_ERROR(cudaStreamCreate(&particles_stream)); - CHECK_ERROR(cudaStreamCreate(&grid_stream)); + CUDA_CALL(cudaStreamCreate(&particles_stream)); + CUDA_CALL(cudaStreamCreate(&grid_stream)); setupRandomStatesKernel<<>>(rng_states, 123456, particles_grid.x * block_dim.x); - CHECK_ERROR(cudaGetLastError()); - CHECK_ERROR(cudaDeviceSynchronize()); - + CUDA_CALL(cudaGetLastError()); + CUDA_CALL(cudaDeviceSynchronize()); initGridCellsKernel<<>>(grid_cell_array, meas_cell_array, grid_size, grid_cell_count); + CUDA_CALL(cudaGetLastError()); - CHECK_ERROR(cudaGetLastError()); - - CHECK_ERROR(cudaStreamDestroy(particles_stream)); - CHECK_ERROR(cudaStreamDestroy(grid_stream)); + CUDA_CALL(cudaStreamDestroy(particles_stream)); + CUDA_CALL(cudaStreamDestroy(grid_stream)); } -void DOGM::updateGrid(MeasurementCell* measurement_grid, float new_x, float new_y, float new_yaw, float dt, bool device) +void DOGM::updateGrid(MeasurementCellsSoA measurement_grid, float new_x, float new_y, float new_yaw, float dt, + bool device) { updateMeasurementGrid(measurement_grid, device); updatePose(new_x, new_y, new_yaw); @@ -126,25 +127,21 @@ void DOGM::updateGrid(MeasurementCell* measurement_grid, float new_x, float new_ particle_array = particle_array_next; - CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaDeviceSynchronize()); } -std::vector DOGM::getGridCells() const +GridCellsSoA DOGM::getGridCells() const { - std::vector grid_cells(static_cast::size_type>(grid_cell_count)); - - CHECK_ERROR( - cudaMemcpy(grid_cells.data(), grid_cell_array, grid_cell_count * sizeof(GridCell), cudaMemcpyDeviceToHost)); + GridCellsSoA grid_cells(grid_cell_count, false); + grid_cells.copy(grid_cell_array, cudaMemcpyDeviceToHost); return grid_cells; } -std::vector DOGM::getMeasurementCells() const +MeasurementCellsSoA DOGM::getMeasurementCells() const { - std::vector meas_cells(static_cast::size_type>(grid_cell_count)); - - CHECK_ERROR(cudaMemcpy(meas_cells.data(), meas_cell_array, grid_cell_count * sizeof(MeasurementCell), - cudaMemcpyDeviceToHost)); + MeasurementCellsSoA meas_cells(grid_cell_count, false); + meas_cells.copy(meas_cell_array, cudaMemcpyDeviceToHost); return meas_cells; } @@ -176,23 +173,18 @@ void DOGM::updatePose(float new_x, float new_y, float new_yaw) const int x_move = -static_cast(x_diff / params.resolution); const int y_move = -static_cast(y_diff / params.resolution); - GridCell* old_grid_cell_array; - CHECK_ERROR(cudaMalloc(&old_grid_cell_array, grid_cell_count * sizeof(GridCell))); - - CHECK_ERROR(cudaMemcpy(old_grid_cell_array, grid_cell_array, grid_cell_count * sizeof(GridCell), - cudaMemcpyDeviceToDevice)); - CHECK_ERROR(cudaMemset(grid_cell_array, 0, grid_cell_count * sizeof(GridCell))); + moveParticlesKernel<<>>(particle_array, x_move, y_move, particle_count); dim3 dim_block(32, 32); dim3 grid_dim(divUp(grid_size, dim_block.x), divUp(grid_size, dim_block.y)); - moveParticlesKernel<<>>(particle_array, x_move, y_move, particle_count); - CHECK_ERROR(cudaGetLastError()); + GridCellsSoA old_grid_cell_array(grid_cell_count, true); + old_grid_cell_array.copy(grid_cell_array, cudaMemcpyDeviceToDevice); - moveMapKernel<<>>(grid_cell_array, old_grid_cell_array, x_move, y_move, grid_size); - CHECK_ERROR(cudaGetLastError()); + moveMapKernel<<>>(grid_cell_array, old_grid_cell_array, meas_cell_array, + particle_array, x_move, y_move, grid_size); - CHECK_ERROR(cudaFree(old_grid_cell_array)); + old_grid_cell_array.free(); position_x = new_x; position_y = new_y; @@ -201,10 +193,10 @@ void DOGM::updatePose(float new_x, float new_y, float new_yaw) } } -void DOGM::updateMeasurementGrid(MeasurementCell* measurement_grid, bool device) +void DOGM::updateMeasurementGrid(MeasurementCellsSoA measurement_grid, bool device) { cudaMemcpyKind kind = device ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice; - CHECK_ERROR(cudaMemcpy(meas_cell_array, measurement_grid, grid_cell_count * sizeof(MeasurementCell), kind)); + meas_cell_array.copy(measurement_grid, kind); if (!first_measurement_received) { @@ -217,8 +209,8 @@ void DOGM::initializeParticles() { copyMassesKernel<<>>(meas_cell_array, born_masses_array, grid_cell_count); - CHECK_ERROR(cudaGetLastError()); - CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaGetLastError()); + CUDA_CALL(cudaDeviceSynchronize()); thrust::device_vector particle_orders_accum(grid_cell_count); accumulate(born_masses_array, particle_orders_accum); @@ -228,15 +220,14 @@ void DOGM::initializeParticles() normalize_particle_orders(particle_orders_array_accum, grid_cell_count, particle_count); - initParticlesKernel1<<>>(grid_cell_array, meas_cell_array, particle_array, - particle_orders_array_accum, grid_cell_count); + initParticlesKernel1<<>>(particle_array, particle_orders_array_accum, grid_cell_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); - initParticlesKernel2<<>>( - particle_array, grid_cell_array, rng_states, params.init_max_velocity, grid_size, new_weight, particle_count); + initParticlesKernel2<<>>(particle_array, rng_states, params.init_max_velocity, grid_size, + new_weight, particle_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } void DOGM::particlePrediction(float dt) @@ -255,15 +246,15 @@ void DOGM::particlePrediction(float dt) particle_array, rng_states, params.stddev_velocity, grid_size, params.persistence_prob, transition_matrix, params.stddev_process_noise_position, params.stddev_process_noise_velocity, particle_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } void DOGM::particleAssignment() { reinitGridParticleIndices<<>>(grid_cell_array, grid_cell_count); - CHECK_ERROR(cudaGetLastError()); - // CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaGetLastError()); + // CUDA_CALL(cudaDeviceSynchronize()); // sort particles thrust::device_ptr grid_index_ptr(particle_array.grid_cell_idx); @@ -276,12 +267,12 @@ void DOGM::particleAssignment() particleToGridKernel<<>>(particle_array, grid_cell_array, weight_array, particle_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } void DOGM::gridCellOccupancyUpdate() { - // CHECK_ERROR(cudaDeviceSynchronize()); + // CUDA_CALL(cudaDeviceSynchronize()); thrust::device_vector weights_accum(particle_count); accumulate(weight_array, weights_accum); @@ -291,7 +282,7 @@ void DOGM::gridCellOccupancyUpdate() weight_array_accum, meas_cell_array, born_masses_array, params.birth_prob, grid_cell_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } void DOGM::updatePersistentParticles() @@ -299,8 +290,8 @@ void DOGM::updatePersistentParticles() updatePersistentParticlesKernel1<<>>(particle_array, meas_cell_array, weight_array, particle_count); - CHECK_ERROR(cudaGetLastError()); - // CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaGetLastError()); + // CUDA_CALL(cudaDeviceSynchronize()); thrust::device_vector weights_accum(particle_count); accumulate(weight_array, weights_accum); @@ -309,12 +300,12 @@ void DOGM::updatePersistentParticles() updatePersistentParticlesKernel2<<>>( grid_cell_array, weight_array_accum, grid_cell_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); updatePersistentParticlesKernel3<<>>(particle_array, meas_cell_array, grid_cell_array, weight_array, particle_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } void DOGM::initializeNewParticles() @@ -322,8 +313,8 @@ void DOGM::initializeNewParticles() initBirthParticlesKernel<<>>( birth_particle_array, rng_states, params.stddev_velocity, grid_size, new_born_particle_count); - CHECK_ERROR(cudaGetLastError()); - // CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaGetLastError()); + // CUDA_CALL(cudaDeviceSynchronize()); thrust::device_vector particle_orders_accum(grid_cell_count); accumulate(born_masses_array, particle_orders_accum); @@ -335,13 +326,13 @@ void DOGM::initializeNewParticles() born_masses_array, birth_particle_array, particle_orders_array_accum, grid_cell_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); initNewParticlesKernel2<<>>(birth_particle_array, grid_cell_array, rng_states, params.stddev_velocity, params.init_max_velocity, grid_size, new_born_particle_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } void DOGM::statisticalMoments() @@ -350,8 +341,8 @@ void DOGM::statisticalMoments() vel_x_squared_array, vel_y_squared_array, vel_xy_array, particle_count); - CHECK_ERROR(cudaGetLastError()); - // CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaGetLastError()); + // CUDA_CALL(cudaDeviceSynchronize()); thrust::device_vector vel_x_accum(particle_count); accumulate(vel_x_array, vel_x_accum); @@ -377,12 +368,12 @@ void DOGM::statisticalMoments() vel_x_squared_array_accum, vel_y_squared_array_accum, vel_xy_array_accum, grid_cell_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } void DOGM::resampling() { - // CHECK_ERROR(cudaDeviceSynchronize()); + // CUDA_CALL(cudaDeviceSynchronize()); thrust::device_ptr persistent_weights(weight_array); thrust::device_ptr new_born_weights(birth_particle_array.weight); @@ -399,8 +390,8 @@ void DOGM::resampling() resamplingGenerateRandomNumbersKernel<<>>(rand_array, rng_states, joint_max, particle_count); - CHECK_ERROR(cudaGetLastError()); - // CHECK_ERROR(cudaDeviceSynchronize()); + CUDA_CALL(cudaGetLastError()); + // CUDA_CALL(cudaDeviceSynchronize()); thrust::device_ptr rand_ptr(rand_array); thrust::device_vector rand_vector(rand_ptr, rand_ptr + particle_count); @@ -416,7 +407,7 @@ void DOGM::resampling() resamplingKernel<<>>(particle_array, particle_array_next, birth_particle_array, idx_array_resampled, new_weight, particle_count); - CHECK_ERROR(cudaGetLastError()); + CUDA_CALL(cudaGetLastError()); } } /* namespace dogm */ diff --git a/dogm/src/kernel/ego_motion_compensation.cu b/dogm/src/kernel/ego_motion_compensation.cu index 5f833fe..847680f 100644 --- a/dogm/src/kernel/ego_motion_compensation.cu +++ b/dogm/src/kernel/ego_motion_compensation.cu @@ -22,8 +22,9 @@ __global__ void moveParticlesKernel(ParticlesSoA particle_array, int x_move, int } } -__global__ void moveMapKernel(GridCell* __restrict__ grid_cell_array, const GridCell* __restrict__ old_grid_cell_array, - int x_move, int y_move, int grid_size) +__global__ void moveMapKernel(GridCellsSoA grid_cell_array, GridCellsSoA old_grid_cell_array, + MeasurementCellsSoA meas_cell_array, ParticlesSoA particle_array, int x_move, int y_move, + int grid_size) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -37,7 +38,7 @@ __global__ void moveMapKernel(GridCell* __restrict__ grid_cell_array, const Grid if (new_x > 0 && new_x < grid_size && new_y > 0 && new_y < grid_size) { - grid_cell_array[index] = old_grid_cell_array[new_index]; + grid_cell_array.copy(old_grid_cell_array, index, new_index); } } } diff --git a/dogm/src/kernel/init.cu b/dogm/src/kernel/init.cu index df4bdec..7b8db97 100644 --- a/dogm/src/kernel/init.cu +++ b/dogm/src/kernel/init.cu @@ -66,29 +66,29 @@ __global__ void initBirthParticlesKernel(ParticlesSoA birth_particle_array, cura // global_state[thread_id] = local_state; } -__global__ void initGridCellsKernel(GridCell* __restrict__ grid_cell_array, - MeasurementCell* __restrict__ meas_cell_array, int grid_size, int cell_count) +__global__ void initGridCellsKernel(GridCellsSoA grid_cell_array, MeasurementCellsSoA meas_cell_array, int grid_size, + int cell_count) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x) { - grid_cell_array[i].free_mass = 0.0f; - grid_cell_array[i].occ_mass = 0.0f; - grid_cell_array[i].start_idx = -1; - grid_cell_array[i].end_idx = -1; - - meas_cell_array[i].occ_mass = 0.0f; - meas_cell_array[i].free_mass = 0.0f; - meas_cell_array[i].likelihood = 1.0f; - meas_cell_array[i].p_A = 1.0f; + grid_cell_array.free_mass[i] = 0.0f; + grid_cell_array.occ_mass[i] = 0.0f; + grid_cell_array.start_idx[i] = -1; + grid_cell_array.end_idx[i] = -1; + + meas_cell_array.occ_mass[i] = 0.0f; + meas_cell_array.free_mass[i] = 0.0f; + meas_cell_array.likelihood[i] = 1.0f; + meas_cell_array.p_A[i] = 1.0f; } } -__global__ void reinitGridParticleIndices(GridCell* __restrict__ grid_cell_array, int cell_count) +__global__ void reinitGridParticleIndices(GridCellsSoA grid_cell_array, int cell_count) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x) { - grid_cell_array[i].start_idx = -1; - grid_cell_array[i].end_idx = -1; + grid_cell_array.start_idx[i] = -1; + grid_cell_array.end_idx[i] = -1; } } diff --git a/dogm/src/kernel/init_new_particles.cu b/dogm/src/kernel/init_new_particles.cu index f5328b8..ea0b13b 100644 --- a/dogm/src/kernel/init_new_particles.cu +++ b/dogm/src/kernel/init_new_particles.cu @@ -57,10 +57,10 @@ __device__ float calc_weight_unassoc(int nu_UA, float p_A, float born_mass) return nu_UA > 0 ? ((1.0f - p_A) * born_mass) / nu_UA : 0.0f; } -__device__ void store_weights(float w_A, float w_UA, GridCell* __restrict__ grid_cell_array, int j) +__device__ void store_weights(float w_A, float w_UA, GridCellsSoA grid_cell_array, int j) { - grid_cell_array[j].w_A = w_A; - grid_cell_array[j].w_UA = w_UA; + grid_cell_array.w_A[j] = w_A; + grid_cell_array.w_UA[j] = w_UA; } void normalize_particle_orders(float* particle_orders_array_accum, int particle_orders_count, int v_B) @@ -73,18 +73,16 @@ void normalize_particle_orders(float* particle_orders_array_accum, int particle_ GPU_LAMBDA(float x) { return x * (v_B / max); }); } -__global__ void copyMassesKernel(const MeasurementCell* __restrict__ meas_cell_array, float* __restrict__ masses, - int cell_count) +__global__ void copyMassesKernel(const MeasurementCellsSoA meas_cell_array, float* __restrict__ masses, int cell_count) { for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < cell_count; j += blockDim.x * gridDim.x) { - masses[j] = meas_cell_array[j].occ_mass; + masses[j] = meas_cell_array.occ_mass[j]; } } -__global__ void initParticlesKernel1(GridCell* __restrict__ grid_cell_array, - const MeasurementCell* __restrict__ meas_cell_array, ParticlesSoA particle_array, - const float* __restrict__ particle_orders_array_accum, int cell_count) +__global__ void initParticlesKernel1(ParticlesSoA particle_array, const float* __restrict__ particle_orders_array_accum, + int cell_count) { for (int j = blockIdx.x * blockDim.x + threadIdx.x; j < cell_count; j += blockDim.x * gridDim.x) { @@ -98,9 +96,8 @@ __global__ void initParticlesKernel1(GridCell* __restrict__ grid_cell_array, } } -__global__ void initParticlesKernel2(ParticlesSoA particle_array, const GridCell* __restrict__ grid_cell_array, - curandState* __restrict__ global_state, float velocity, int grid_size, - float new_weight, int particle_count) +__global__ void initParticlesKernel2(ParticlesSoA particle_array, curandState* __restrict__ global_state, + float velocity, int grid_size, float new_weight, int particle_count) { int thread_id = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -123,8 +120,7 @@ __global__ void initParticlesKernel2(ParticlesSoA particle_array, const GridCell global_state[thread_id] = local_state; } -__global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array, - const MeasurementCell* __restrict__ meas_cell_array, +__global__ void initNewParticlesKernel1(GridCellsSoA grid_cell_array, const MeasurementCellsSoA meas_cell_array, const float* __restrict__ weight_array, const float* __restrict__ born_masses_array, ParticlesSoA birth_particle_array, const float* __restrict__ particle_orders_array_accum, int cell_count) @@ -135,7 +131,7 @@ __global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array, int end_idx = calc_end_idx(particle_orders_array_accum, j); int num_new_particles = start_idx <= end_idx ? end_idx - start_idx + 1 : 0; - float p_A = meas_cell_array[j].p_A; + float p_A = meas_cell_array.p_A[j]; int nu_A = calc_num_assoc(num_new_particles, p_A); int nu_UA = num_new_particles - nu_A; float w_A = calc_weight_assoc(nu_A, p_A, born_masses_array[j]); @@ -154,7 +150,7 @@ __global__ void initNewParticlesKernel1(GridCell* __restrict__ grid_cell_array, } } -__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCell* __restrict__ grid_cell_array, +__global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const GridCellsSoA grid_cell_array, curandState* __restrict__ global_state, float stddev_velocity, float max_velocity, int grid_size, int particle_count) { @@ -166,7 +162,6 @@ __global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const for (int i = thread_id; i < particle_count; i += stride) { int cell_idx = birth_particle_array.grid_cell_idx[i]; - const GridCell& grid_cell = grid_cell_array[cell_idx]; bool associated = birth_particle_array.associated[i]; float x = cell_idx % grid_size + 0.5f; @@ -178,7 +173,7 @@ __global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const float vel_x = curand_normal(&local_state, 0.0f, stddev_velocity); float vel_y = curand_normal(&local_state, 0.0f, stddev_velocity); - birth_particle_array.weight[i] = grid_cell.w_A; + birth_particle_array.weight[i] = grid_cell_array.w_A[cell_idx]; birth_particle_array.state[i] = glm::vec4(x, y, vel_x, vel_y); } else @@ -186,7 +181,7 @@ __global__ void initNewParticlesKernel2(ParticlesSoA birth_particle_array, const float vel_x = curand_normal(&local_state, 0.0f, stddev_velocity); float vel_y = curand_normal(&local_state, 0.0f, stddev_velocity); - birth_particle_array.weight[i] = grid_cell.w_UA; + birth_particle_array.weight[i] = grid_cell_array.w_UA[cell_idx]; birth_particle_array.state[i] = glm::vec4(x, y, vel_x, vel_y); } } diff --git a/dogm/src/kernel/mass_update.cu b/dogm/src/kernel/mass_update.cu index 57c46a5..3af1b54 100644 --- a/dogm/src/kernel/mass_update.cu +++ b/dogm/src/kernel/mass_update.cu @@ -13,22 +13,23 @@ namespace dogm { -__device__ float predict_free_mass(const GridCell& grid_cell, float m_occ_pred, float alpha = 0.9) +__device__ float predict_free_mass(float grid_cell_free_mass, float m_occ_pred, float alpha = 0.9) { - return min(alpha * grid_cell.free_mass, 1.0f - m_occ_pred); + return min(alpha * grid_cell_free_mass, 1.0f - m_occ_pred); } -__device__ float2 update_masses(float m_occ_pred, float m_free_pred, const MeasurementCell& meas_cell) +__device__ float2 update_masses(float m_occ_pred, float m_free_pred, const MeasurementCellsSoA meas_cells, int meas_idx) { float unknown_pred = 1.0f - m_occ_pred - m_free_pred; - float meas_unknown = 1.0f - meas_cell.free_mass - meas_cell.occ_mass; - float K = m_free_pred * meas_cell.occ_mass + m_occ_pred * meas_cell.free_mass; + float meas_unknown = 1.0f - meas_cells.free_mass[meas_idx] - meas_cells.occ_mass[meas_idx]; + float K = m_free_pred * meas_cells.occ_mass[meas_idx] + m_occ_pred * meas_cells.free_mass[meas_idx]; - float occ_mass = - (m_occ_pred * meas_unknown + unknown_pred * meas_cell.occ_mass + m_occ_pred * meas_cell.occ_mass) / (1.0f - K); - float free_mass = - (m_free_pred * meas_unknown + unknown_pred * meas_cell.free_mass + m_free_pred * meas_cell.free_mass) / - (1.0f - K); + float occ_mass = (m_occ_pred * meas_unknown + unknown_pred * meas_cells.occ_mass[meas_idx] + + m_occ_pred * meas_cells.occ_mass[meas_idx]) / + (1.0f - K); + float free_mass = (m_free_pred * meas_unknown + unknown_pred * meas_cells.free_mass[meas_idx] + + m_free_pred * meas_cells.free_mass[meas_idx]) / + (1.0f - K); return make_float2(occ_mass, free_mass); } @@ -39,13 +40,13 @@ __device__ float separate_newborn_part(float m_occ_pred, float m_occ_up, float p } __device__ void store_values(float rho_b, float rho_p, float m_free_up, float m_occ_up, float m_occ_pred, - GridCell* __restrict__ grid_cell_array, int i) + GridCellsSoA grid_cell_array, int i) { - grid_cell_array[i].pers_occ_mass = rho_p; - grid_cell_array[i].new_born_occ_mass = rho_b; - grid_cell_array[i].free_mass = m_free_up; - grid_cell_array[i].occ_mass = m_occ_up; - grid_cell_array[i].pred_occ_mass = m_occ_pred; + grid_cell_array.pers_occ_mass[i] = rho_p; + grid_cell_array.new_born_occ_mass[i] = rho_b; + grid_cell_array.free_mass[i] = m_free_up; + grid_cell_array.occ_mass[i] = m_occ_up; + grid_cell_array.pred_occ_mass[i] = m_occ_pred; } __device__ void normalize_weights(const ParticlesSoA& particle_array, float* __restrict__ weight_array, int start_idx, @@ -58,16 +59,16 @@ __device__ void normalize_weights(const ParticlesSoA& particle_array, float* __r } } -__global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_array, ParticlesSoA particle_array, +__global__ void gridCellPredictionUpdateKernel(GridCellsSoA grid_cell_array, ParticlesSoA particle_array, float* __restrict__ weight_array, const float* __restrict__ weight_array_accum, - const MeasurementCell* __restrict__ meas_cell_array, + const MeasurementCellsSoA meas_cell_array, float* __restrict__ born_masses_array, float p_B, int cell_count) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x) { - int start_idx = grid_cell_array[i].start_idx; - int end_idx = grid_cell_array[i].end_idx; + int start_idx = grid_cell_array.start_idx[i]; + int end_idx = grid_cell_array.end_idx[i]; if (start_idx != -1) { @@ -79,8 +80,8 @@ __global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_ m_occ_pred = 1.0f; } - float m_free_pred = predict_free_mass(grid_cell_array[i], m_occ_pred); - float2 masses_up = update_masses(m_occ_pred, m_free_pred, meas_cell_array[i]); + float m_free_pred = predict_free_mass(grid_cell_array.free_mass[i], m_occ_pred); + float2 masses_up = update_masses(m_occ_pred, m_free_pred, meas_cell_array, i); float rho_b = separate_newborn_part(m_occ_pred, masses_up.x, p_B); float rho_p = masses_up.x - rho_b; born_masses_array[i] = rho_b; @@ -89,9 +90,9 @@ __global__ void gridCellPredictionUpdateKernel(GridCell* __restrict__ grid_cell_ } else { - float m_occ = grid_cell_array[i].occ_mass; - float m_free = predict_free_mass(grid_cell_array[i], m_occ); - float2 masses_up = update_masses(m_occ, m_free, meas_cell_array[i]); + float m_occ = grid_cell_array.occ_mass[i]; + float m_free = predict_free_mass(grid_cell_array.free_mass[i], m_occ); + float2 masses_up = update_masses(m_occ, m_free, meas_cell_array, i); born_masses_array[i] = 0.0f; store_values(0.0f, masses_up.x, masses_up.y, masses_up.x, 0.0f, grid_cell_array, i); } diff --git a/dogm/src/kernel/particle_to_grid.cu b/dogm/src/kernel/particle_to_grid.cu index ef5d426..3bca9c0 100644 --- a/dogm/src/kernel/particle_to_grid.cu +++ b/dogm/src/kernel/particle_to_grid.cu @@ -23,7 +23,7 @@ __device__ bool is_last_particle(const ParticlesSoA& particle_array, int particl return i == particle_count - 1 || particle_array.grid_cell_idx[i] != particle_array.grid_cell_idx[i + 1]; } -__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCell* __restrict__ grid_cell_array, +__global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCellsSoA grid_cell_array, float* __restrict__ weight_array, int particle_count) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particle_count; i += blockDim.x * gridDim.x) @@ -32,11 +32,11 @@ __global__ void particleToGridKernel(const ParticlesSoA particle_array, GridCell if (is_first_particle(particle_array, i)) { - grid_cell_array[cell_idx].start_idx = i; + grid_cell_array.start_idx[cell_idx] = i; } if (is_last_particle(particle_array, particle_count, i)) { - grid_cell_array[cell_idx].end_idx = i; + grid_cell_array.end_idx[cell_idx] = i; } weight_array[i] = particle_array.weight[i]; diff --git a/dogm/src/kernel/statistical_moments.cu b/dogm/src/kernel/statistical_moments.cu index 74b4db0..80c0181 100644 --- a/dogm/src/kernel/statistical_moments.cu +++ b/dogm/src/kernel/statistical_moments.cu @@ -45,14 +45,14 @@ __device__ float calc_covariance(const float* __restrict__ vel_xy_array_accum, i return 0.0f; } -__device__ void store(GridCell* __restrict__ grid_cell_array, int cell_idx, float mean_x_vel, float mean_y_vel, - float var_x_vel, float var_y_vel, float covar_xy_vel) +__device__ void store(GridCellsSoA grid_cell_array, int cell_idx, float mean_x_vel, float mean_y_vel, float var_x_vel, + float var_y_vel, float covar_xy_vel) { - grid_cell_array[cell_idx].mean_x_vel = mean_x_vel; - grid_cell_array[cell_idx].mean_y_vel = mean_y_vel; - grid_cell_array[cell_idx].var_x_vel = var_x_vel; - grid_cell_array[cell_idx].var_y_vel = var_y_vel; - grid_cell_array[cell_idx].covar_xy_vel = covar_xy_vel; + grid_cell_array.mean_x_vel[cell_idx] = mean_x_vel; + grid_cell_array.mean_y_vel[cell_idx] = mean_y_vel; + grid_cell_array.var_x_vel[cell_idx] = var_x_vel; + grid_cell_array.var_y_vel[cell_idx] = var_y_vel; + grid_cell_array.covar_xy_vel[cell_idx] = covar_xy_vel; } __global__ void statisticalMomentsKernel1(const ParticlesSoA particle_array, const float* __restrict__ weight_array, @@ -75,8 +75,7 @@ __global__ void statisticalMomentsKernel1(const ParticlesSoA particle_array, con } } -__global__ void statisticalMomentsKernel2(GridCell* __restrict__ grid_cell_array, - const float* __restrict__ vel_x_array_accum, +__global__ void statisticalMomentsKernel2(GridCellsSoA grid_cell_array, const float* __restrict__ vel_x_array_accum, const float* __restrict__ vel_y_array_accum, const float* __restrict__ vel_x_squared_array_accum, const float* __restrict__ vel_y_squared_array_accum, @@ -84,9 +83,9 @@ __global__ void statisticalMomentsKernel2(GridCell* __restrict__ grid_cell_array { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x) { - int start_idx = grid_cell_array[i].start_idx; - int end_idx = grid_cell_array[i].end_idx; - float rho_p = grid_cell_array[i].pers_occ_mass; + int start_idx = grid_cell_array.start_idx[i]; + int end_idx = grid_cell_array.end_idx[i]; + float rho_p = grid_cell_array.pers_occ_mass[i]; if (start_idx != -1) { diff --git a/dogm/src/kernel/update_persistent_particles.cu b/dogm/src/kernel/update_persistent_particles.cu index c9ce782..f1f7a64 100644 --- a/dogm/src/kernel/update_persistent_particles.cu +++ b/dogm/src/kernel/update_persistent_particles.cu @@ -18,36 +18,34 @@ __device__ float calc_norm_assoc(float occ_accum, float rho_p) return occ_accum > 0.0f ? rho_p / occ_accum : 0.0f; } -__device__ float calc_norm_unassoc(const GridCell& grid_cell) +__device__ float calc_norm_unassoc(float pred_occ_mass, float pers_occ_mass) { - float pred_occ_mass = grid_cell.pred_occ_mass; - return pred_occ_mass > 0.0f ? grid_cell.pers_occ_mass / pred_occ_mass : 0.0f; + return pred_occ_mass > 0.0f ? pers_occ_mass / pred_occ_mass : 0.0f; } -__device__ void set_normalization_components(GridCell* __restrict__ grid_cell_array, int i, float mu_A, float mu_UA) +__device__ void set_normalization_components(GridCellsSoA grid_cell_array, int i, float mu_A, float mu_UA) { - grid_cell_array[i].mu_A = mu_A; - grid_cell_array[i].mu_UA = mu_UA; + grid_cell_array.mu_A[i] = mu_A; + grid_cell_array.mu_UA[i] = mu_UA; } -__device__ float update_unnorm(const ParticlesSoA& particle_array, int i, - const MeasurementCell* __restrict__ meas_cell_array) +__device__ float update_unnorm(const ParticlesSoA& particle_array, int i, const MeasurementCellsSoA meas_cell_array) { - return meas_cell_array[particle_array.grid_cell_idx[i]].likelihood * particle_array.weight[i]; + return meas_cell_array.likelihood[particle_array.grid_cell_idx[i]] * particle_array.weight[i]; } -__device__ float normalize(const ParticlesSoA& particle, int i, const GridCell* __restrict__ grid_cell_array, - const MeasurementCell* __restrict__ meas_cell_array, float weight) +__device__ float normalize(const ParticlesSoA& particle, int i, const GridCellsSoA grid_cell_array, + const MeasurementCellsSoA meas_cell_array, float weight) { const int cell_idx = particle.grid_cell_idx[i]; - const GridCell& cell = grid_cell_array[cell_idx]; - const MeasurementCell& meas_cell = meas_cell_array[cell_idx]; + const float p_A = meas_cell_array.p_A[cell_idx]; - return meas_cell.p_A * cell.mu_A * weight + (1.0f - meas_cell.p_A) * cell.mu_UA * particle.weight[i]; + return p_A * grid_cell_array.mu_A[cell_idx] * weight + + (1.0f - p_A) * grid_cell_array.mu_UA[cell_idx] * particle.weight[i]; } __global__ void updatePersistentParticlesKernel1(const ParticlesSoA particle_array, - const MeasurementCell* __restrict__ meas_cell_array, + const MeasurementCellsSoA meas_cell_array, float* __restrict__ weight_array, int particle_count) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particle_count; i += blockDim.x * gridDim.x) @@ -56,29 +54,29 @@ __global__ void updatePersistentParticlesKernel1(const ParticlesSoA particle_arr } } -__global__ void updatePersistentParticlesKernel2(GridCell* __restrict__ grid_cell_array, +__global__ void updatePersistentParticlesKernel2(GridCellsSoA grid_cell_array, const float* __restrict__ weight_array_accum, int cell_count) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < cell_count; i += blockDim.x * gridDim.x) { - int start_idx = grid_cell_array[i].start_idx; - int end_idx = grid_cell_array[i].end_idx; + int start_idx = grid_cell_array.start_idx[i]; + int end_idx = grid_cell_array.end_idx[i]; if (start_idx != -1) { float m_occ_accum = subtract(weight_array_accum, start_idx, end_idx); - float rho_p = grid_cell_array[i].pers_occ_mass; + float rho_p = grid_cell_array.pers_occ_mass[i]; float mu_A = calc_norm_assoc(m_occ_accum, rho_p); - float mu_UA = calc_norm_unassoc(grid_cell_array[i]); + float mu_UA = calc_norm_unassoc(grid_cell_array.pred_occ_mass[i], grid_cell_array.pers_occ_mass[i]); set_normalization_components(grid_cell_array, i, mu_A, mu_UA); } } } __global__ void updatePersistentParticlesKernel3(const ParticlesSoA particle_array, - const MeasurementCell* __restrict__ meas_cell_array, - const GridCell* __restrict__ grid_cell_array, - float* __restrict__ weight_array, int particle_count) + const MeasurementCellsSoA meas_cell_array, + const GridCellsSoA grid_cell_array, float* __restrict__ weight_array, + int particle_count) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < particle_count; i += blockDim.x * gridDim.x) { diff --git a/dogm/test/dogm_spec.cpp b/dogm/test/dogm_spec.cpp index a0b5322..4ce4583 100644 --- a/dogm/test/dogm_spec.cpp +++ b/dogm/test/dogm_spec.cpp @@ -28,8 +28,10 @@ TEST(DOGM, EgoMotionCompensation) glm::vec4 old_state = particles.state[0]; glm::vec2 pose{10.0f, 10.0f}; + dogm::MeasurementCellsSoA meas_grid(dogm.getGridSize() * dogm.getGridSize(), false); + // Set initial pose (no position update) - dogm.updateGrid(nullptr, pose.x, pose.y, 0.0f, 0.0f); + dogm.updateGrid(meas_grid, pose.x, pose.y, 0.0f, 0.0f); cudaDeviceSynchronize(); EXPECT_EQ(pose.x, dogm.getPositionX()); EXPECT_EQ(pose.y, dogm.getPositionY()); @@ -37,7 +39,7 @@ TEST(DOGM, EgoMotionCompensation) EXPECT_EQ(old_state, new_particles.state[0]); // Change lower than resolution doesn't lead to update after initial position is set - dogm.updateGrid(nullptr, pose.x + 0.5f, pose.y + 0.5f, 0.0f, 0.0f); + dogm.updateGrid(meas_grid, pose.x + 0.5f, pose.y + 0.5f, 0.0f, 0.0f); cudaDeviceSynchronize(); EXPECT_EQ(pose.x, dogm.getPositionX()); EXPECT_EQ(pose.y, dogm.getPositionY()); @@ -47,7 +49,7 @@ TEST(DOGM, EgoMotionCompensation) // Update pose -> position update const float x_change = 3.0f; pose.x += x_change; - dogm.updateGrid(nullptr, pose.x, pose.y, 0.0f, 0.0f); + dogm.updateGrid(meas_grid, pose.x, pose.y, 0.0f, 0.0f); cudaDeviceSynchronize(); EXPECT_EQ(pose.x, dogm.getPositionX()); EXPECT_EQ(pose.y, dogm.getPositionY());