diff --git a/ompi/mca/common/ompio/common_ompio.h b/ompi/mca/common/ompio/common_ompio.h index f6261dff078..8b12f7b8cc5 100644 --- a/ompi/mca/common/ompio/common_ompio.h +++ b/ompi/mca/common/ompio/common_ompio.h @@ -262,6 +262,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_write_at (ompio_file_t *fh, OMPI_MPI_OFF OMPI_DECLSPEC int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, size_t count, struct ompi_datatype_t *datatype, ompi_request_t **request); +OMPI_DECLSPEC int mca_common_ompio_file_iwrite_pregen (ompio_file_t *fh, ompi_request_t *request); + OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf, size_t count, struct ompi_datatype_t *datatype, ompi_request_t **request); @@ -297,6 +299,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_read_at (ompio_file_t *fh, OMPI_MPI_OFFS OMPI_DECLSPEC int mca_common_ompio_file_iread (ompio_file_t *fh, void *buf, size_t count, struct ompi_datatype_t *datatype, ompi_request_t **request); +OMPI_DECLSPEC int mca_common_ompio_file_iread_pregen (ompio_file_t *fh, ompi_request_t *request); + OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, void *buf, size_t count, struct ompi_datatype_t *datatype, ompi_request_t **request); diff --git a/ompi/mca/common/ompio/common_ompio_file_read.c b/ompi/mca/common/ompio/common_ompio_file_read.c index e49cfb42b59..15a62f455c1 100644 --- a/ompi/mca/common/ompio/common_ompio_file_read.c +++ b/ompi/mca/common/ompio/common_ompio_file_read.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -530,6 +530,68 @@ int mca_common_ompio_file_iread (ompio_file_t *fh, return ret; } +/* +** This routine is invoked from file_read_all. +** It is only used if the temporary buffer is a gpu buffer, +** and the fbtl supports the ipreadv operation. +** +** The io-array has already been generated in file_read_all, +** and we use the pre-computed offsets to created a pseudo fview. +** The position of the file pointer is updated in the file_read_all +** operation, not here. +*/ + +int mca_common_ompio_file_iread_pregen (ompio_file_t *fh, + ompi_request_t *request) +{ + uint32_t i; + size_t max_data; + size_t pipeline_buf_size; + mca_ompio_request_t *ompio_req = (mca_ompio_request_t *) request; + + max_data = fh->f_io_array[0].length; + pipeline_buf_size = OMPIO_MCA_GET(fh, pipeline_buffer_size); + + mca_common_ompio_register_progress (); + + OMPIO_PREPARE_READ_BUF (fh, fh->f_io_array[0].memory_address, max_data, MPI_BYTE, + ompio_req->req_tbuf, &ompio_req->req_convertor, max_data, + pipeline_buf_size, NULL, i); + + ompio_req->req_num_subreqs = ceil((double)max_data/pipeline_buf_size); + ompio_req->req_size = pipeline_buf_size; + ompio_req->req_max_data = max_data; + ompio_req->req_post_next_subreq = mca_common_ompio_post_next_read_subreq; + ompio_req->req_fh = fh; + ompio_req->req_ompi.req_status.MPI_ERROR = MPI_SUCCESS; + + ompio_req->req_fview = (struct ompio_fview_t *) calloc(1, sizeof(struct ompio_fview_t)); + if (NULL == ompio_req->req_fview) { + opal_output(1, "common_ompio: error allocating memory\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompio_req->req_fview->f_decoded_iov = (struct iovec*) malloc (fh->f_num_of_io_entries * + sizeof(struct iovec)); + if (NULL == ompio_req->req_fview->f_decoded_iov) { + opal_output(1, "common_ompio_file_iread_pregen: could not allocate memory\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompio_req->req_fview->f_iov_count = fh->f_num_of_io_entries; + for (i=0; i < ompio_req->req_fview->f_iov_count; i++) { + ompio_req->req_fview->f_decoded_iov[i].iov_base = fh->f_io_array[i].offset; + ompio_req->req_fview->f_decoded_iov[i].iov_len = fh->f_io_array[i].length ; + } + + fh->f_num_of_io_entries = 0; + free (fh->f_io_array); + fh->f_io_array = NULL; + + mca_common_ompio_post_next_read_subreq(ompio_req, 0); + return OMPI_SUCCESS; +} + int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, void *buf, diff --git a/ompi/mca/common/ompio/common_ompio_file_read_all.c b/ompi/mca/common/ompio/common_ompio_file_read_all.c index 6cfd5127033..1b2f8d6c474 100644 --- a/ompi/mca/common/ompio/common_ompio_file_read_all.c +++ b/ompi/mca/common/ompio/common_ompio_file_read_all.c @@ -15,6 +15,7 @@ * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,9 +31,12 @@ #include "ompi/mca/fcoll/base/fcoll_base_coll_array.h" #include "ompi/mca/fcoll/base/base.h" #include "ompi/mca/common/ompio/common_ompio.h" +#include "ompi/mca/common/ompio/common_ompio_request.h" +#include "ompi/mca/common/ompio/common_ompio_buffer.h" #include "ompi/mca/io/io.h" #include "math.h" #include "ompi/mca/pml/pml.h" +#include "opal/mca/accelerator/accelerator.h" #include #define DEBUG_ON 0 @@ -106,6 +110,9 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, int* blocklength_proc = NULL; ptrdiff_t* displs_proc = NULL; + int is_gpu, is_managed; + bool use_accelerator_buffer = false; + #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0; double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0; @@ -138,6 +145,12 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, goto exit; } + mca_common_ompio_check_gpu_buf (fh, buf, &is_gpu, &is_managed); + if (is_gpu && !is_managed && NULL != fh->f_fbtl->fbtl_ipreadv && + fh->f_get_mca_parameter_value ("use_accelerator_buffers", strlen("use_accelerator_buffers"))) { + use_accelerator_buffer = true; + } + ret = mca_common_ompio_set_aggregator_props ((struct ompio_file_t *) fh, base_num_io_procs, max_data); @@ -364,11 +377,22 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, goto exit; } - global_buf = (char *) malloc (bytes_per_cycle); - if (NULL == global_buf){ - opal_output(1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + if (use_accelerator_buffer) { + opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, + "Allocating GPU device buffer for aggregation\n"); + ret = opal_accelerator.mem_alloc(MCA_ACCELERATOR_NO_DEVICE_ID, (void**)&global_buf, + bytes_per_cycle); + if (OPAL_SUCCESS != ret) { + opal_output(1, "Could not allocate accelerator memory"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } else {global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == global_buf){ + opal_output(1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } } sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *)); @@ -686,10 +710,26 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, #endif if (fh->f_num_of_io_entries) { - if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { - opal_output (1, "READ FAILED\n"); - ret = OMPI_ERROR; - goto exit; + if (use_accelerator_buffer) { + mca_ompio_request_t *ompio_req = NULL; + mca_common_ompio_request_alloc (&ompio_req, MCA_OMPIO_REQUEST_READ); + + ret = mca_common_ompio_file_iread_pregen(fh, (ompi_request_t *) ompio_req); + if(0 > ret) { + opal_output (1, "common_ompio_file_read_all: mca_common_ompio_iread_pregen failed\n"); + ompio_req->req_ompi.req_status.MPI_ERROR = ret; + ompio_req->req_ompi.req_status._ucount = 0; + } + ret = ompi_request_wait ((ompi_request_t**)&ompio_req, MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != ret){ + goto exit; + } + } else { + if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) { + opal_output (1, "READ FAILED\n"); + ret = OMPI_ERROR; + goto exit; + } } } @@ -881,7 +921,11 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh, exit: if (NULL != global_buf) { - free (global_buf); + if (use_accelerator_buffer) { + opal_accelerator.mem_release(MCA_ACCELERATOR_NO_DEVICE_ID, global_buf); + } else { + free (global_buf); + } global_buf = NULL; } if (NULL != sorted) { diff --git a/ompi/mca/common/ompio/common_ompio_file_write.c b/ompi/mca/common/ompio/common_ompio_file_write.c index b970346f7a2..b5d3900e7a3 100644 --- a/ompi/mca/common/ompio/common_ompio_file_write.c +++ b/ompi/mca/common/ompio/common_ompio_file_write.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -329,6 +329,7 @@ static void mca_common_ompio_post_next_write_subreq(struct mca_ompio_request_t * decoded_iov.iov_base = req->req_tbuf; decoded_iov.iov_len = req->req_size; opal_convertor_pack (&req->req_convertor, &decoded_iov, &iov_count, &pos); + mca_common_ompio_build_io_array (req->req_fview, index, req->req_num_subreqs, bytes_per_cycle, pos, iov_count, &decoded_iov, @@ -472,6 +473,72 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh, return ret; } +/* +** This routine is invoked from the fcoll component. +** It is only used if the temporary buffer is a gpu buffer, +** and the fbtl supports the ipwritev operation. +** +** The io-array has already been generated in fcoll/xxx/file_write_all, +** and we use the pre-computed offsets to created a pseudo fview. +** The position of the file pointer is updated in the fcoll +** component, not here. +*/ + +int mca_common_ompio_file_iwrite_pregen (ompio_file_t *fh, + ompi_request_t *request) +{ + uint32_t i; + size_t max_data; + size_t pipeline_buf_size; + mca_ompio_request_t *ompio_req = (mca_ompio_request_t *) request; + + if (NULL == fh->f_fbtl->fbtl_ipwritev) { + return MPI_ERR_INTERN; + } + + max_data = fh->f_io_array[0].length; + pipeline_buf_size = OMPIO_MCA_GET(fh, pipeline_buffer_size); + + mca_common_ompio_register_progress (); + + OMPIO_PREPARE_BUF (fh, fh->f_io_array[0].memory_address, max_data, MPI_BYTE, + ompio_req->req_tbuf, &ompio_req->req_convertor, max_data, + pipeline_buf_size, NULL, i); + + ompio_req->req_num_subreqs = ceil((double)max_data/pipeline_buf_size); + ompio_req->req_size = pipeline_buf_size; + ompio_req->req_max_data = max_data; + ompio_req->req_post_next_subreq = mca_common_ompio_post_next_write_subreq; + ompio_req->req_fh = fh; + ompio_req->req_ompi.req_status.MPI_ERROR = MPI_SUCCESS; + + ompio_req->req_fview = (struct ompio_fview_t *) calloc(1, sizeof(struct ompio_fview_t)); + if (NULL == ompio_req->req_fview) { + opal_output(1, "common_ompio: error allocating memory\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompio_req->req_fview->f_decoded_iov = (struct iovec*) malloc ( fh->f_num_of_io_entries * + sizeof(struct iovec)); + if (NULL == ompio_req->req_fview->f_decoded_iov) { + opal_output(1, "common_ompio_file_iwrite_pregen: could not allocate memory\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompio_req->req_fview->f_iov_count = fh->f_num_of_io_entries; + for (i=0; i < ompio_req->req_fview->f_iov_count; i++) { + ompio_req->req_fview->f_decoded_iov[i].iov_base = fh->f_io_array[i].offset; + ompio_req->req_fview->f_decoded_iov[i].iov_len = fh->f_io_array[i].length ; + } + + fh->f_num_of_io_entries = 0; + free (fh->f_io_array); + fh->f_io_array = NULL; + + mca_common_ompio_post_next_write_subreq(ompio_req, 0); + return OMPI_SUCCESS; +} + int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf, diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan.h b/ompi/mca/fcoll/vulcan/fcoll_vulcan.h index ebddf429a5c..a2fd6ca82bc 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan.h +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan.h @@ -43,6 +43,7 @@ extern int mca_fcoll_vulcan_priority; extern int mca_fcoll_vulcan_num_groups; extern int mca_fcoll_vulcan_write_chunksize; extern int mca_fcoll_vulcan_async_io; +extern int mca_fcoll_vulcan_use_accelerator_buffers; OMPI_DECLSPEC extern mca_fcoll_base_component_3_0_0_t mca_fcoll_vulcan_component; diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c index 8318b0ef374..b928f795bfe 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c @@ -15,6 +15,7 @@ * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,10 +31,12 @@ #include "ompi/mca/fcoll/fcoll.h" #include "ompi/mca/fcoll/base/fcoll_base_coll_array.h" #include "ompi/mca/common/ompio/common_ompio.h" +#include "ompi/mca/common/ompio/common_ompio_buffer.h" #include "ompi/mca/io/io.h" #include "ompi/mca/common/ompio/common_ompio_request.h" #include "math.h" #include "ompi/mca/pml/pml.h" +#include "opal/mca/accelerator/accelerator.h" #include #define DEBUG_ON 0 @@ -88,13 +91,12 @@ typedef struct mca_io_ompio_aggregator_data { _aggr[_i]->prev_recvtype=(ompi_datatype_t **)_t; } \ } - - static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_io_ompio_aggregator_data *data, ompi_request_t **reqs ); static int write_init (ompio_file_t *fh, int aggregator, mca_io_ompio_aggregator_data *aggr_data, - int write_chunksize, int write_synchType, ompi_request_t **request); + int write_chunksize, int write_synchType, ompi_request_t **request, + bool is_accelerator_buffer); int mca_fcoll_vulcan_break_file_view ( struct iovec *decoded_iov, int iov_count, struct iovec *local_iov_array, int local_count, struct iovec ***broken_decoded_iovs, int **broken_iov_counts, @@ -155,6 +157,8 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, ompi_count_array_t fview_count_desc; ompi_disp_array_t displs_desc; + int is_gpu, is_managed; + bool use_accelerator_buffer = false; #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; @@ -180,6 +184,11 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, goto exit; } + mca_common_ompio_check_gpu_buf (fh, buf, &is_gpu, &is_managed); + if (is_gpu && !is_managed && + fh->f_get_mca_parameter_value ("use_accelerator_buffers", strlen("use_accelerator_buffers"))) { + use_accelerator_buffer = true; + } /* since we want to overlap 2 iterations, define the bytes_per_cycle to be half of what the user requested */ bytes_per_cycle =bytes_per_cycle/2; @@ -529,13 +538,31 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, goto exit; } - - aggr_data[i]->global_buf = (char *) malloc (bytes_per_cycle); - aggr_data[i]->prev_global_buf = (char *) malloc (bytes_per_cycle); - if (NULL == aggr_data[i]->global_buf || NULL == aggr_data[i]->prev_global_buf){ - opal_output(1, "OUT OF MEMORY"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + if (use_accelerator_buffer) { + opal_output_verbose(10, ompi_fcoll_base_framework.framework_output, + "Allocating GPU device buffer for aggregation\n"); + ret = opal_accelerator.mem_alloc(MCA_ACCELERATOR_NO_DEVICE_ID, (void**)&aggr_data[i]->global_buf, + bytes_per_cycle); + if (OPAL_SUCCESS != ret) { + opal_output(1, "Could not allocate accelerator memory"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + ret = opal_accelerator.mem_alloc(MCA_ACCELERATOR_NO_DEVICE_ID, (void**)&aggr_data[i]->prev_global_buf, + bytes_per_cycle); + if (OPAL_SUCCESS != ret) { + opal_output(1, "Could not allocate accelerator memory"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + } else { + aggr_data[i]->global_buf = (char *) malloc (bytes_per_cycle); + aggr_data[i]->prev_global_buf = (char *) malloc (bytes_per_cycle); + if (NULL == aggr_data[i]->global_buf || NULL == aggr_data[i]->prev_global_buf){ + opal_output(1, "OUT OF MEMORY"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } } aggr_data[i]->recvtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * @@ -605,7 +632,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, start_write_time = MPI_Wtime(); #endif ret = write_init (fh, fh->f_aggr_list[aggr_index], aggr_data[aggr_index], - write_chunksize, write_synch_type, &req_iwrite); + write_chunksize, write_synch_type, &req_iwrite, use_accelerator_buffer); if (OMPI_SUCCESS != ret){ goto exit; } @@ -645,7 +672,7 @@ int mca_fcoll_vulcan_file_write_all (struct ompio_file_t *fh, start_write_time = MPI_Wtime(); #endif ret = write_init (fh, fh->f_aggr_list[aggr_index], aggr_data[aggr_index], - write_chunksize, write_synch_type, &req_iwrite); + write_chunksize, write_synch_type, &req_iwrite, use_accelerator_buffer); if (OMPI_SUCCESS != ret){ goto exit; } @@ -704,8 +731,13 @@ exit : free (aggr_data[i]->disp_index); free (aggr_data[i]->max_disp_index); - free (aggr_data[i]->global_buf); - free (aggr_data[i]->prev_global_buf); + if (use_accelerator_buffer) { + opal_accelerator.mem_release(MCA_ACCELERATOR_NO_DEVICE_ID, aggr_data[i]->global_buf); + opal_accelerator.mem_release(MCA_ACCELERATOR_NO_DEVICE_ID, aggr_data[i]->prev_global_buf); + } else { + free (aggr_data[i]->global_buf); + free (aggr_data[i]->prev_global_buf); + } for(l=0;lprocs_per_group;l++){ free (aggr_data[i]->blocklen_per_process[l]); free (aggr_data[i]->displs_per_process[l]); @@ -749,7 +781,8 @@ static int write_init (ompio_file_t *fh, mca_io_ompio_aggregator_data *aggr_data, int write_chunksize, int write_synchType, - ompi_request_t **request ) + ompi_request_t **request, + bool is_accelerator_buffer) { int ret = OMPI_SUCCESS; ssize_t ret_temp = 0; @@ -770,11 +803,20 @@ static int write_init (ompio_file_t *fh, write_chunksize); if (1 == write_synchType) { - ret = fh->f_fbtl->fbtl_ipwritev(fh, (ompi_request_t *) ompio_req); - if(0 > ret) { - opal_output (1, "vulcan_write_all: fbtl_ipwritev failed\n"); - ompio_req->req_ompi.req_status.MPI_ERROR = ret; - ompio_req->req_ompi.req_status._ucount = 0; + if (is_accelerator_buffer) { + ret = mca_common_ompio_file_iwrite_pregen(fh, (ompi_request_t *) ompio_req); + if(0 > ret) { + opal_output (1, "vulcan_write_all: mca_common_ompio_iwrite_pregen failed\n"); + ompio_req->req_ompi.req_status.MPI_ERROR = ret; + ompio_req->req_ompi.req_status._ucount = 0; + } + } else { + ret = fh->f_fbtl->fbtl_ipwritev(fh, (ompi_request_t *) ompio_req); + if(0 > ret) { + opal_output (1, "vulcan_write_all: fbtl_ipwritev failed\n"); + ompio_req->req_ompi.req_status.MPI_ERROR = ret; + ompio_req->req_ompi.req_status._ucount = 0; + } } } else { diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index 2cc27e23ddc..506b6897e46 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -15,7 +15,7 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -556,6 +556,9 @@ int mca_io_ompio_get_mca_parameter_value ( char *mca_parameter_name, int name_le else if ( !strncmp ( mca_parameter_name, "coll_timing_info", name_length )) { return mca_io_ompio_coll_timing_info; } + else if ( !strncmp (mca_parameter_name, "use_accelerator_buffers", name_length)) { + return mca_io_ompio_use_accelerator_buffers; + } else { opal_output (1, "Error in mca_io_ompio_get_mca_parameter_value: unknown parameter name"); } diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 272c4e4a0c6..d38bfb1acb3 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -14,7 +14,7 @@ * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -57,7 +57,7 @@ extern int mca_io_ompio_max_aggregators_ratio; extern int mca_io_ompio_aggregators_cutoff_threshold; extern int mca_io_ompio_overwrite_amode; extern int mca_io_ompio_verbose_info_parsing; - +extern int mca_io_ompio_use_accelerator_buffers; OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; #define QUEUESIZE 2048 diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index a8151315f40..0e6fcd01f6c 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -17,7 +17,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 DataDirect Networks. All rights reserved. - * Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -49,7 +49,7 @@ int mca_io_ompio_max_aggregators_ratio=8; int mca_io_ompio_aggregators_cutoff_threshold=3; int mca_io_ompio_overwrite_amode = 1; int mca_io_ompio_verbose_info_parsing = 0; - +int mca_io_ompio_use_accelerator_buffers = 0; int mca_io_ompio_grouping_option=5; /* @@ -263,6 +263,14 @@ static int register_component(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_verbose_info_parsing); + mca_io_ompio_use_accelerator_buffers = 0; + (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, + "use_accelerator_buffers", "Allow using accelerator buffers" + "for data aggregation in collective I/O if input buffer is device memory", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_use_accelerator_buffers); + return OMPI_SUCCESS; }