@@ -931,11 +931,12 @@ uct_test::entity::entity(const resource& resource, uct_md_config_t *md_config,
931
931
932
932
void uct_test::entity::mem_alloc (size_t length, unsigned mem_flags,
933
933
uct_allocated_memory_t *mem,
934
- ucs_memory_type_t mem_type) const
934
+ ucs_memory_type_t mem_type,
935
+ unsigned num_retries) const
935
936
{
936
- void *address = NULL ;
937
- uct_md_h uct_md = md ();
938
- ucs_status_t status;
937
+ void *address = NULL ;
938
+ uct_md_h uct_md = md ();
939
+ ucs_status_t status = UCS_OK ;
939
940
uct_mem_alloc_params_t params;
940
941
941
942
params.field_mask = UCT_MEM_ALLOC_PARAM_FIELD_FLAGS |
@@ -947,22 +948,38 @@ void uct_test::entity::mem_alloc(size_t length, unsigned mem_flags,
947
948
params.mem_type = mem_type;
948
949
params.address = address;
949
950
950
- if ((md_attr ().flags & (UCT_MD_FLAG_ALLOC | UCT_MD_FLAG_REG)) &&
951
- (mem_type == UCS_MEMORY_TYPE_HOST)) {
952
- status = uct_iface_mem_alloc (m_iface, length, mem_flags, " uct_test" ,
953
- mem);
954
- ASSERT_UCS_OK (status);
955
- } else {
956
- uct_alloc_method_t alloc_methods[] = {UCT_ALLOC_METHOD_MMAP,
957
- UCT_ALLOC_METHOD_MD};
958
- params.field_mask |= UCT_MEM_ALLOC_PARAM_FIELD_MDS;
959
- params.mds .mds = &uct_md;
960
- params.mds .count = 1 ;
961
- status = uct_mem_alloc (length, alloc_methods,
962
- ucs_static_array_size (alloc_methods), ¶ms,
963
- mem);
964
- ASSERT_UCS_OK (status);
951
+ for (unsigned i = 0 ; i <= num_retries; ++i) {
952
+ scoped_log_handler slh (wrap_errors_logger);
953
+ if ((md_attr ().flags & (UCT_MD_FLAG_ALLOC | UCT_MD_FLAG_REG)) &&
954
+ (mem_type == UCS_MEMORY_TYPE_HOST)) {
955
+ status = uct_iface_mem_alloc (m_iface, length, mem_flags, " uct_test" ,
956
+ mem);
957
+ } else {
958
+ uct_alloc_method_t alloc_methods[] = {UCT_ALLOC_METHOD_MMAP,
959
+ UCT_ALLOC_METHOD_MD};
960
+ params.field_mask |= UCT_MEM_ALLOC_PARAM_FIELD_MDS;
961
+ params.mds .mds = &uct_md;
962
+ params.mds .count = 1 ;
963
+ status = uct_mem_alloc (length, alloc_methods,
964
+ ucs_static_array_size (alloc_methods),
965
+ ¶ms, mem);
966
+ }
967
+
968
+ if (status != UCS_ERR_NO_MEMORY) {
969
+ break ;
970
+ }
971
+
972
+ if (i < num_retries) {
973
+ UCS_TEST_MESSAGE << " Retry " << (i + 1 ) << " /" << num_retries
974
+ << " : Allocation failed - "
975
+ << ucs_status_string (status);
976
+ /* Sleep only if there are more retries remaining */
977
+ usleep (ucs::rand () % 10000 );
978
+ }
965
979
}
980
+
981
+ ASSERT_UCS_OK (status);
982
+
966
983
ucs_assert (mem->mem_type == mem_type);
967
984
}
968
985
@@ -1414,16 +1431,16 @@ void uct_test::mapped_buffer::reset()
1414
1431
uct_test::mapped_buffer::mapped_buffer (size_t size, uint64_t seed,
1415
1432
const entity &entity, size_t offset,
1416
1433
ucs_memory_type_t mem_type,
1417
- unsigned mem_flags) :
1418
- mapped_buffer(size, entity, offset, mem_type, mem_flags)
1434
+ unsigned mem_flags, unsigned num_retries ) :
1435
+ mapped_buffer(size, entity, offset, mem_type, mem_flags, num_retries )
1419
1436
{
1420
1437
pattern_fill (seed);
1421
1438
}
1422
1439
1423
- uct_test::mapped_buffer::mapped_buffer (size_t size,
1440
+ uct_test::mapped_buffer::mapped_buffer (size_t size,
1424
1441
const entity &entity, size_t offset,
1425
1442
ucs_memory_type_t mem_type,
1426
- unsigned mem_flags) :
1443
+ unsigned mem_flags, unsigned num_retries ) :
1427
1444
m_entity(entity)
1428
1445
{
1429
1446
if (size == 0 ) {
@@ -1433,7 +1450,7 @@ uct_test::mapped_buffer::mapped_buffer(size_t size,
1433
1450
1434
1451
size_t alloc_size = size + offset;
1435
1452
if ((mem_type == UCS_MEMORY_TYPE_HOST) || (mem_type == UCS_MEMORY_TYPE_RDMA)) {
1436
- m_entity.mem_alloc (alloc_size, mem_flags, &m_mem, mem_type);
1453
+ m_entity.mem_alloc (alloc_size, mem_flags, &m_mem, mem_type, num_retries );
1437
1454
} else {
1438
1455
m_mem.method = UCT_ALLOC_METHOD_LAST;
1439
1456
m_mem.address = mem_buffer::allocate (alloc_size, mem_type);
0 commit comments