diff --git a/src/data_structures/APR/APR.hpp b/src/data_structures/APR/APR.hpp index 83ae95b3..b2b34fd5 100644 --- a/src/data_structures/APR/APR.hpp +++ b/src/data_structures/APR/APR.hpp @@ -23,7 +23,7 @@ class APR { friend class APRConverterBatch; friend class APRBenchHelper; -protected: +public: // initialize tree RandomAccess void initialize_tree_random_sparse(); @@ -60,8 +60,6 @@ class APR { APRParameters parameters; // this is here to keep a record of what parameters were used, to then be written if needed. -public: - #ifdef APR_USE_CUDA @@ -86,6 +84,10 @@ class APR { * @param with_tree include the tree access */ void init_cuda(bool with_tree=true) { + gpuAccess.genInfo = &aprInfo; + gpuTreeAccess.genInfo = &treeInfo; + linearAccess.genInfo = &aprInfo; + linearAccessTree.genInfo = &treeInfo; auto apr_helper = gpuAPRHelper(); if(with_tree) { auto tree_helper = gpuTreeHelper(); @@ -190,6 +192,7 @@ class APR { tree_initialized = apr2copy.tree_initialized; apr_initialized = apr2copy.apr_initialized; name = apr2copy.name; + parameters = apr2copy.parameters; //old data structures apr_access = apr2copy.apr_access; diff --git a/src/data_structures/APR/access/GPUAccess.hpp b/src/data_structures/APR/access/GPUAccess.hpp index 9d57b09d..4d5f676a 100644 --- a/src/data_structures/APR/access/GPUAccess.hpp +++ b/src/data_structures/APR/access/GPUAccess.hpp @@ -67,7 +67,6 @@ class GPUAccessHelper { gpuAccess->init_y_vec(linearAccess->y_vec); gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec); gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec); - gpuAccess->genInfo = linearAccess->genInfo; gpuAccess->copy2Device(); gpuAccess->initialized = true; } @@ -78,7 +77,6 @@ class GPUAccessHelper { gpuAccess->init_y_vec(linearAccess->y_vec); gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec); gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec); - gpuAccess->genInfo = linearAccess->genInfo; gpuAccess->copy2Device(total_number_particles(tree_access.level_max()), tree_access.gpuAccess); gpuAccess->initialized = true; } @@ -88,7 +86,7 @@ class GPUAccessHelper { gpuAccess->copy2Host(); } - uint64_t total_number_particles() { return gpuAccess->total_number_particles(); } + uint64_t total_number_particles() { return gpuAccess->genInfo->total_number_particles; } uint64_t total_number_particles(const int level) { uint64_t index = linearAccess->level_xz_vec[level] + linearAccess->x_num(level) - 1 + (linearAccess->z_num(level)-1)*linearAccess->x_num(level); diff --git a/src/io/APRWriter.hpp b/src/io/APRWriter.hpp index 4ad6231a..2fbe4dee 100644 --- a/src/io/APRWriter.hpp +++ b/src/io/APRWriter.hpp @@ -702,7 +702,8 @@ class APRWriter { break; case Operation::WRITE: - fileId = hdf5_create_file_blosc(aFileName); +// fileId = hdf5_create_file_blosc(aFileName); + fileId = H5Fcreate(aFileName.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (fileId == -1) { std::cerr << "Could not create file [" << aFileName << "]" << std::endl; diff --git a/src/numerics/APRDownsampleGPU.cu b/src/numerics/APRDownsampleGPU.cu index 909728d4..9da3410e 100644 --- a/src/numerics/APRDownsampleGPU.cu +++ b/src/numerics/APRDownsampleGPU.cu @@ -900,6 +900,7 @@ template void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData& ne_count, ScopedCudaMemHandler& ne_rows_gpu) { ne_count.resize(tree_access.level_max() + 3); + ne_count[0] = 0; int z_blocks_max = (tree_access.z_num(tree_access.level_max()) + blockSize_z - 1) / blockSize_z; int num_levels = tree_access.level_max() - tree_access.level_min() + 1; @@ -973,12 +974,13 @@ void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData& ne ne_rows_gpu.get()); } - error_check(cudaFree(block_sums_device) ) + error_check(cudaFree(block_sums_device)) } void compute_ne_rows_tree(GPUAccessHelper& tree_access, VectorData& ne_counter, VectorData& ne_rows) { ne_counter.resize(tree_access.level_max() + 3); + ne_counter[0] = 0; int z = 0; int x = 0; diff --git a/src/numerics/miscCuda.cu b/src/numerics/miscCuda.cu index 4dc043b6..93b5b94e 100644 --- a/src/numerics/miscCuda.cu +++ b/src/numerics/miscCuda.cu @@ -237,6 +237,7 @@ __global__ void fill_ne_rows_cuda(const uint64_t* level_xz_vec, } + template void compute_ne_rows_cuda(GPUAccessHelper& access, VectorData& ne_count, ScopedCudaMemHandler& ne_rows_gpu, int blockSize) { @@ -264,12 +265,12 @@ void compute_ne_rows_cuda(GPUAccessHelper& access, VectorData& ne_count, Sc count_ne_rows_cuda << < grid_dim, block_dim >> > (access.get_level_xz_vec_ptr(), - access.get_xz_end_vec_ptr(), - access.z_num(level), - access.x_num(level), - level, - blockSize, - block_sums_device + offset); + access.get_xz_end_vec_ptr(), + access.z_num(level), + access.x_num(level), + level, + blockSize, + block_sums_device + offset); offset += z_blocks_max; } @@ -305,14 +306,14 @@ void compute_ne_rows_cuda(GPUAccessHelper& access, VectorData& ne_count, Sc fill_ne_rows_cuda<<< grid_dim, block_dim >>> (access.get_level_xz_vec_ptr(), - access.get_xz_end_vec_ptr(), - access.z_num(level), - access.x_num(level), - level, - blockSize, - ne_sz, - ne_count[level], - ne_rows_gpu.get()); + access.get_xz_end_vec_ptr(), + access.z_num(level), + access.x_num(level), + level, + blockSize, + ne_sz, + ne_count[level], + ne_rows_gpu.get()); } error_check( cudaFree(block_sums_device) ) diff --git a/vcpkg b/vcpkg index a3252282..486a4640 160000 --- a/vcpkg +++ b/vcpkg @@ -1 +1 @@ -Subproject commit a325228200d7f229f3337e612e0077f2a5307090 +Subproject commit 486a4640db740f5994e492eb60748111dfc48de7 diff --git a/vcpkg.json b/vcpkg.json index f7b8b7f0..5a553b4a 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -2,10 +2,20 @@ "name": "libapr", "version-string": "0.0.1", "dependencies": [ - "blosc", - "hdf5", - "szip", - "gtest", - "tiff" - ] -} \ No newline at end of file + { + "name": "blosc" + }, + { + "name": "hdf5", + "version>=": "1.8.20" + }, + { + "name": "szip" + }, + { + "name": "tiff", + "version>=": "4.0" + } + ], + "builtin-baseline": "486a4640db740f5994e492eb60748111dfc48de7" +}