From b744e8e34faacb8a014a7cab8536ffe50263f3d2 Mon Sep 17 00:00:00 2001 From: Evgeny Leksikov Date: Tue, 7 May 2024 18:59:38 +0300 Subject: [PATCH] UCT/GGA: enable testing for md, iface, ep_connect, CR2 --- src/uct/ib/base/ib_iface.c | 61 ++++++---- src/uct/ib/base/ib_iface.h | 4 + src/uct/ib/dc/dc_mlx5.c | 1 + src/uct/ib/mlx5/dv/ib_mlx5_dv.c | 8 +- src/uct/ib/mlx5/ib_mlx5.c | 4 + src/uct/ib/mlx5/ib_mlx5.h | 1 + src/uct/ib/rc/accel/gga_mlx5.c | 173 +++++++++++++++------------ src/uct/ib/rc/accel/rc_mlx5.h | 11 +- src/uct/ib/rc/accel/rc_mlx5.inl | 63 ++++++++++ src/uct/ib/rc/accel/rc_mlx5_common.c | 5 +- src/uct/ib/rc/accel/rc_mlx5_ep.c | 27 +++-- src/uct/ib/rc/accel/rc_mlx5_iface.c | 70 +---------- src/uct/ib/rc/base/rc_ep.c | 16 ++- src/uct/ib/rc/base/rc_ep.h | 2 +- src/uct/ib/rc/base/rc_iface.c | 31 +++-- src/uct/ib/rc/base/rc_iface.h | 2 + src/uct/ib/rc/verbs/rc_verbs_ep.c | 2 +- 17 files changed, 270 insertions(+), 211 deletions(-) diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 2226d6327b4..f37f1e4ec77 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -701,29 +701,39 @@ uct_ib_iface_roce_is_reachable(const uct_ib_device_gid_info_t *local_gid_info, return ret; } -int uct_ib_iface_is_same_device(const uct_ib_address_t *ib_addr, uint16_t dlid, - const union ibv_gid *dgid) +static int +uct_ib_iface_is_same_device_unpacked( + const uct_ib_address_pack_params_t *unpacked_addr, uint16_t dlid, + const union ibv_gid *dgid) { - uct_ib_address_pack_params_t params; - - uct_ib_address_unpack(ib_addr, ¶ms); - - if (!(params.flags & UCT_IB_ADDRESS_PACK_FLAG_ETH) && - (dlid != params.lid)) { + if (!(unpacked_addr->flags & UCT_IB_ADDRESS_PACK_FLAG_ETH) && + (dlid != unpacked_addr->lid)) { return 0; } if (dgid == NULL) { - return !(params.flags & (UCT_IB_ADDRESS_PACK_FLAG_ETH | - UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID)); + return !(unpacked_addr->flags & + (UCT_IB_ADDRESS_PACK_FLAG_ETH | + UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID)); } - if (params.flags & UCT_IB_ADDRESS_PACK_FLAG_ETH) { - return !memcmp(dgid->raw, params.gid.raw, sizeof(params.gid.raw)); + if (unpacked_addr->flags & UCT_IB_ADDRESS_PACK_FLAG_ETH) { + return !memcmp(dgid->raw, unpacked_addr->gid.raw, + sizeof(unpacked_addr->gid.raw)); } - return !(params.flags & UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID) || - (params.gid.global.interface_id == dgid->global.interface_id); + return !(unpacked_addr->flags & UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID) || + (unpacked_addr->gid.global.interface_id == + dgid->global.interface_id); +} + +int uct_ib_iface_is_same_device(const uct_ib_address_t *ib_addr, uint16_t dlid, + const union ibv_gid *dgid) +{ + uct_ib_address_pack_params_t params; + + uct_ib_address_unpack(ib_addr, ¶ms); + return uct_ib_iface_is_same_device_unpacked(¶ms, dlid, dgid); } static int uct_ib_iface_gid_extract_flid(const union ibv_gid *gid) @@ -742,14 +752,22 @@ static int uct_ib_iface_is_flid_enabled(uct_ib_iface_t *iface) (uct_ib_iface_gid_extract_flid(&iface->gid_info.gid) != 0); } -static int uct_ib_iface_dev_addr_is_reachable(uct_ib_iface_t *iface, - const uct_ib_address_t *ib_addr) +int uct_ib_iface_dev_addr_is_reachable(uct_ib_iface_t *iface, + const uct_ib_address_t *ib_addr, + uct_iface_reachability_scope_t scope) { - int is_local_eth = uct_ib_iface_is_roce(iface); + int is_local_eth = uct_ib_iface_is_roce(iface); uct_ib_address_pack_params_t params; uct_ib_address_unpack(ib_addr, ¶ms); + if ((scope == UCT_IFACE_REACHABILITY_SCOPE_DEVICE) && + !uct_ib_iface_is_same_device_unpacked(¶ms, + uct_ib_iface_port_attr(iface)->lid, + &iface->gid_info.gid)) { + return 0; + } + if (/* at least one PKEY has to be with full membership */ !((params.pkey | iface->pkey) & UCT_IB_PKEY_MEMBERSHIP_MASK) || /* PKEY values have to be equal */ @@ -797,16 +815,9 @@ int uct_ib_iface_is_reachable_v2(const uct_iface_h tl_iface, return 0; } - if (!uct_ib_iface_dev_addr_is_reachable(iface, device_addr)) { - return 0; - } - scope = UCS_PARAM_VALUE(UCT_IFACE_IS_REACHABLE_FIELD, params, scope, SCOPE, UCT_IFACE_REACHABILITY_SCOPE_NETWORK); - return (scope == UCT_IFACE_REACHABILITY_SCOPE_NETWORK) || - uct_ib_iface_is_same_device(device_addr, - uct_ib_iface_port_attr(iface)->lid, - &iface->gid_info.gid); + return uct_ib_iface_dev_addr_is_reachable(iface, device_addr, scope); } ucs_status_t uct_ib_iface_create_ah(uct_ib_iface_t *iface, diff --git a/src/uct/ib/base/ib_iface.h b/src/uct/ib/base/ib_iface.h index e840d72b694..73574e64740 100644 --- a/src/uct/ib/base/ib_iface.h +++ b/src/uct/ib/base/ib_iface.h @@ -718,6 +718,10 @@ uct_ib_iface_roce_dscp(uct_ib_iface_t *iface) return iface->config.traffic_class >> 2; } +int uct_ib_iface_dev_addr_is_reachable(uct_ib_iface_t *iface, + const uct_ib_address_t *ib_addr, + uct_iface_reachability_scope_t scope); + #if HAVE_DECL_IBV_CREATE_CQ_EX static UCS_F_ALWAYS_INLINE void uct_ib_fill_cq_attr(struct ibv_cq_init_attr_ex *cq_attr, diff --git a/src/uct/ib/dc/dc_mlx5.c b/src/uct/ib/dc/dc_mlx5.c index 50b5f2ca02c..d4e49b62a27 100644 --- a/src/uct/ib/dc/dc_mlx5.c +++ b/src/uct/ib/dc/dc_mlx5.c @@ -646,6 +646,7 @@ void uct_dc_mlx5_destroy_dct(uct_dc_mlx5_iface_t *iface) uct_ib_mlx5_devx_obj_destroy(iface->rx.dct.devx.obj, "DCT"); #endif break; + case UCT_IB_MLX5_OBJ_TYPE_NULL: case UCT_IB_MLX5_OBJ_TYPE_LAST: break; } diff --git a/src/uct/ib/mlx5/dv/ib_mlx5_dv.c b/src/uct/ib/mlx5/dv/ib_mlx5_dv.c index e8850cd7c41..5c8873eaa37 100644 --- a/src/uct/ib/mlx5/dv/ib_mlx5_dv.c +++ b/src/uct/ib/mlx5/dv/ib_mlx5_dv.c @@ -202,7 +202,9 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface, UCT_IB_MLX5DV_SET(qpc, qpc, pd, uct_ib_mlx5_devx_md_get_pdn(md)); UCT_IB_MLX5DV_SET(qpc, qpc, uar_page, uar->uar->page_id); ucs_assert((attr->super.srq == NULL) || (attr->super.srq_num != 0)); - UCT_IB_MLX5DV_SET(qpc, qpc, rq_type, !!attr->super.srq_num); + UCT_IB_MLX5DV_SET(qpc, qpc, rq_type, + attr->super.srq_num ? 1 /* SRQ */ : + max_rx ? 0 /* RQ */ : 3 /* no RQ */); UCT_IB_MLX5DV_SET(qpc, qpc, srqn_rmpn_xrqn, attr->super.srq_num); UCT_IB_MLX5DV_SET(qpc, qpc, cqn_snd, send_cq->cq_num); UCT_IB_MLX5DV_SET(qpc, qpc, cqn_rcv, recv_cq->cq_num); @@ -319,6 +321,8 @@ ucs_status_t uct_ib_mlx5_devx_modify_qp(uct_ib_mlx5_qp_t *qp, case UCT_IB_MLX5_OBJ_TYPE_DEVX: return uct_ib_mlx5_devx_obj_modify(qp->devx.obj, in, inlen, out, outlen, opcode_str); + case UCT_IB_MLX5_OBJ_TYPE_NULL: + return UCS_ERR_INVALID_PARAM; case UCT_IB_MLX5_OBJ_TYPE_LAST: return UCS_ERR_UNSUPPORTED; } @@ -352,6 +356,8 @@ uct_ib_mlx5_devx_query_qp(uct_ib_mlx5_qp_t *qp, void *in, size_t inlen, return UCS_ERR_IO_ERROR; } break; + case UCT_IB_MLX5_OBJ_TYPE_NULL: + return UCS_ERR_INVALID_PARAM; case UCT_IB_MLX5_OBJ_TYPE_LAST: return UCS_ERR_UNSUPPORTED; } diff --git a/src/uct/ib/mlx5/ib_mlx5.c b/src/uct/ib/mlx5/ib_mlx5.c index f77c928fc62..f4251f341cc 100644 --- a/src/uct/ib/mlx5/ib_mlx5.c +++ b/src/uct/ib/mlx5/ib_mlx5.c @@ -786,6 +786,8 @@ void uct_ib_mlx5_qp_mmio_cleanup(uct_ib_mlx5_qp_t *qp, uct_ib_mlx5_iface_put_res_domain(qp); uct_worker_tl_data_put(reg, uct_ib_mlx5_mmio_cleanup); break; + case UCT_IB_MLX5_OBJ_TYPE_NULL: + ucs_fatal("qp %p: TYPE_NULL", qp); case UCT_IB_MLX5_OBJ_TYPE_LAST: if (reg != NULL) { uct_worker_tl_data_put(reg, uct_ib_mlx5_mmio_cleanup); @@ -941,6 +943,8 @@ void uct_ib_mlx5_destroy_qp(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp) case UCT_IB_MLX5_OBJ_TYPE_DEVX: uct_ib_mlx5_devx_destroy_qp(md, qp); break; + case UCT_IB_MLX5_OBJ_TYPE_NULL: + ucs_fatal("md %p: qp %p: TYPE_NULL", md, qp); case UCT_IB_MLX5_OBJ_TYPE_LAST: break; } diff --git a/src/uct/ib/mlx5/ib_mlx5.h b/src/uct/ib/mlx5/ib_mlx5.h index f6acf832a73..2e5fadac9f0 100644 --- a/src/uct/ib/mlx5/ib_mlx5.h +++ b/src/uct/ib/mlx5/ib_mlx5.h @@ -437,6 +437,7 @@ typedef struct uct_ib_mlx5_dbrec { typedef enum { UCT_IB_MLX5_OBJ_TYPE_VERBS, UCT_IB_MLX5_OBJ_TYPE_DEVX, + UCT_IB_MLX5_OBJ_TYPE_NULL, UCT_IB_MLX5_OBJ_TYPE_LAST } uct_ib_mlx5_obj_type_t; diff --git a/src/uct/ib/rc/accel/gga_mlx5.c b/src/uct/ib/rc/accel/gga_mlx5.c index b407dd8725c..c0f99c9f7cf 100644 --- a/src/uct/ib/rc/accel/gga_mlx5.c +++ b/src/uct/ib/rc/accel/gga_mlx5.c @@ -12,6 +12,9 @@ #include #include +#include + +#define UCT_GGA_MLX5_OPAQUE_BUF_LEN 64 typedef struct { uct_ib_md_packed_mkey_t packed_mkey; @@ -24,39 +27,31 @@ typedef struct { uct_rc_mlx5_iface_common_t super; } uct_gga_mlx5_iface_t; -typedef struct uct_gga_mlx5_iface_config { +typedef struct { uct_rc_iface_config_t super; uct_rc_mlx5_iface_common_config_t rc_mlx5_common; } uct_gga_mlx5_iface_config_t; -typedef struct uct_gga_mlx5_dma_opaque { - uint32_t syndrom; - uint32_t reserved; - uint32_t scattered_length; - uint32_t gathered_length; - uint8_t reserved2[48]; -} UCS_S_PACKED uct_gga_mlx5_dma_opaque_t; - -typedef struct uct_gga_mlx5_dma_opaque_pair { - struct mlx5_dma_opaque *buf; - struct ibv_mr *mr; -} uct_gga_mlx5_dma_opaque_pair_t; - -typedef struct uct_gga_mlx5_iface_qp_cleanup_ctx { - uct_rc_mlx5_iface_common_qp_cleanup_ctx_t super; - uct_gga_mlx5_dma_opaque_pair_t dma_opaque; +typedef struct { + uint8_t buf[UCT_GGA_MLX5_OPAQUE_BUF_LEN]; + struct ibv_mr *mr; +} UCS_V_ALIGNED(UCS_SYS_CACHE_LINE_SIZE) uct_gga_mlx5_dma_opaque_buf_t; + +typedef struct { + uct_rc_mlx5_iface_common_qp_cleanup_ctx_t super; + uct_gga_mlx5_dma_opaque_buf_t dma_opaque; } uct_gga_mlx5_iface_qp_cleanup_ctx_t; -typedef struct uct_gga_mlx5_ep { - uct_rc_mlx5_base_ep_t super; - uct_gga_mlx5_dma_opaque_pair_t dma_opaque; +typedef struct { + uct_rc_mlx5_base_ep_t super; + uct_gga_mlx5_dma_opaque_buf_t dma_opaque; } uct_gga_mlx5_ep_t; enum { UCT_GGA_MLX5_EP_ADDRESS_FLAG_FLUSH_RKEY = UCS_BIT(0) }; -typedef struct uct_gga_mlx5_ep_address { +typedef struct { uint8_t flags; uct_ib_uint24_t qp_num; uint16_t flush_rkey; @@ -64,7 +59,7 @@ typedef struct uct_gga_mlx5_ep_address { * should be filled by 0 */ } UCS_S_PACKED uct_gga_mlx5_ep_address_t; -typedef struct uct_gga_mlx5_dev_addr { +typedef struct { uint64_t be_sys_image_guid; /* ID of xGVMI */ uct_ib_address_t ib_addr; /* common IB address */ } UCS_S_PACKED uct_gga_mlx5_dev_addr_t; @@ -227,6 +222,14 @@ uct_gga_mlx5_rkey_resolve(uct_ib_mlx5_md_t *md, uct_rkey_t rkey) static UCS_CLASS_DECLARE_DELETE_FUNC(uct_gga_mlx5_iface_t, uct_iface_t); +static unsigned uct_gga_mlx5_iface_progress(uct_iface_h iface) +{ + uct_rc_mlx5_iface_common_t *rc_iface = + ucs_derived_of(iface, uct_rc_mlx5_iface_common_t); + + return uct_rc_mlx5_iface_poll_tx(rc_iface, UCT_IB_MLX5_POLL_FLAG_HAS_EP); +} + static ucs_status_t uct_gga_mlx5_iface_event_fd_get(uct_iface_h tl_iface, int *fd_p) { @@ -281,33 +284,9 @@ uct_gga_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) static ucs_status_t uct_gga_mlx5_ep_enable_mmo(uct_gga_mlx5_ep_t *ep) { - uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(ep->super.super.super.super.iface, - uct_rc_mlx5_iface_common_t); - uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.md, - uct_ib_mlx5_md_t); - char in[UCT_IB_MLX5DV_ST_SZ_BYTES(init2init_qp_in)] = {}; char out[UCT_IB_MLX5DV_ST_SZ_BYTES(init2init_qp_out)] = {}; void *qpce = UCT_IB_MLX5DV_ADDR_OF(init2init_qp_in, in, qpc_data_extension); - int rc; - - /* TODO: built-in to EP */ - rc = ucs_posix_memalign((void**)&ep->dma_opaque.buf, - sizeof(uct_gga_mlx5_dma_opaque_t), - sizeof(uct_gga_mlx5_dma_opaque_t), "gga_opaque_buf"); - if (rc != 0) { - ucs_error("cannot allocate MMO opaque buffer: %m"); - return UCS_ERR_NO_MEMORY; - } - - ep->dma_opaque.mr = ibv_reg_mr(md->super.pd, ep->dma_opaque.buf, - sizeof(uct_gga_mlx5_dma_opaque_t), - IBV_ACCESS_LOCAL_WRITE); - if (ep->dma_opaque.mr == NULL) { - ucs_error("cannot register MMO opaque buffer: %m"); - ucs_free(ep->dma_opaque.buf); - return UCS_ERR_IO_ERROR; - } UCT_IB_MLX5DV_SET(init2init_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_INIT2INIT_QP); @@ -315,7 +294,6 @@ uct_gga_mlx5_ep_enable_mmo(uct_gga_mlx5_ep_t *ep) UCT_IB_MLX5DV_SET(init2init_qp_in, in, qpn, ep->super.tx.wq.super.qp_num); UCT_IB_MLX5DV_SET64(init2init_qp_in, in, opt_param_mask_95_32, UCT_IB_MLX5_QPC_OPT_MASK_32_INIT2INIT_MMO); - UCT_IB_MLX5DV_SET(qpc_ext, qpce, mmo, 1); return uct_ib_mlx5_devx_obj_modify(ep->super.tx.wq.super.devx.obj, in, @@ -325,21 +303,47 @@ uct_gga_mlx5_ep_enable_mmo(uct_gga_mlx5_ep_t *ep) static UCS_CLASS_INIT_FUNC(uct_gga_mlx5_ep_t, const uct_ep_params_t *params) { + uct_iface_t *tl_iface = UCT_EP_PARAM_VALUE(params, iface, IFACE, NULL); + uct_base_iface_t *iface; + uct_ib_mlx5_md_t *md; + ucs_status_t status; + UCS_CLASS_CALL_SUPER_INIT(uct_rc_mlx5_base_ep_t, params); - return uct_gga_mlx5_ep_enable_mmo(self); + + iface = ucs_derived_of(tl_iface, uct_base_iface_t); + md = ucs_derived_of(iface->md, uct_ib_mlx5_md_t); + + self->dma_opaque.mr = ibv_reg_mr(md->super.pd, self->dma_opaque.buf, + UCT_GGA_MLX5_OPAQUE_BUF_LEN, + IBV_ACCESS_LOCAL_WRITE); + if (self->dma_opaque.mr == NULL) { + ucs_error("cannot register MMO opaque buffer: %m"); + status = UCS_ERR_IO_ERROR; + goto err; + } + + status = uct_gga_mlx5_ep_enable_mmo(self); + if (status != UCS_OK) { + goto err_dereg_buf; + } + + return UCS_OK; + +err_dereg_buf: + ibv_dereg_mr(self->dma_opaque.mr); +err: + return status; } static UCS_CLASS_CLEANUP_FUNC(uct_gga_mlx5_ep_t) { - uct_gga_mlx5_iface_qp_cleanup_ctx_t *cleanup_ctx; - - cleanup_ctx = ucs_malloc(sizeof(*cleanup_ctx), "mlx5_qp_cleanup_ctx"); - ucs_assert_always(cleanup_ctx != NULL); - cleanup_ctx->super.qp = self->super.tx.wq.super; - cleanup_ctx->super.reg = self->super.tx.wq.reg; - cleanup_ctx->dma_opaque = self->dma_opaque; + uct_gga_mlx5_iface_qp_cleanup_ctx_t cleanup_ctx = { + .super.qp = self->super.tx.wq.super, + .super.reg = self->super.tx.wq.reg, + .dma_opaque = self->dma_opaque + }; - uct_rc_mlx5_base_ep_cleanup(&self->super, &cleanup_ctx->super); + uct_rc_mlx5_base_ep_cleanup(&self->super, &cleanup_ctx.super, 0); } UCS_CLASS_DEFINE(uct_gga_mlx5_ep_t, uct_rc_mlx5_base_ep_t); @@ -375,11 +379,10 @@ uct_gga_mlx5_ep_connect_to_ep_v2(uct_ep_h tl_ep, const uct_ep_addr_t *ep_addr, const uct_ep_connect_to_ep_params_t *params) { - uct_gga_mlx5_ep_t *ep = ucs_derived_of( - tl_ep, uct_gga_mlx5_ep_t); - uct_rc_mlx5_iface_common_t *iface = ucs_derived_of( - tl_ep->iface, uct_rc_mlx5_iface_common_t); - + uct_gga_mlx5_ep_t *ep = + ucs_derived_of(tl_ep, uct_gga_mlx5_ep_t); + uct_rc_mlx5_iface_common_t *iface = + ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_common_t); const uct_gga_mlx5_dev_addr_t *gga_dev_addr = (uct_gga_mlx5_dev_addr_t*)device_addr; const uct_ib_address_t *ib_addr = &gga_dev_addr->ib_addr; @@ -452,9 +455,9 @@ static uct_iface_ops_t uct_gga_mlx5_iface_tl_ops = { .ep_connect_to_ep = ucs_empty_function_return_unsupported, .iface_flush = uct_rc_iface_flush, .iface_fence = uct_rc_iface_fence, - .iface_progress_enable = uct_rc_mlx5_iface_progress_enable, + .iface_progress_enable = uct_base_iface_progress_enable, .iface_progress_disable = uct_base_iface_progress_disable, - .iface_progress = (uct_iface_progress_func_t)ucs_empty_function_do_assert, + .iface_progress = uct_gga_mlx5_iface_progress, .iface_event_fd_get = uct_gga_mlx5_iface_event_fd_get, .iface_event_arm = uct_gga_mlx5_iface_arm, .iface_close = uct_gga_mlx5_iface_t_delete, @@ -466,25 +469,40 @@ static uct_iface_ops_t uct_gga_mlx5_iface_tl_ops = { static int uct_gga_mlx5_iface_is_reachable_v2( - const uct_iface_h tl_iface, const uct_iface_is_reachable_params_t *params) + const uct_iface_h tl_iface, + const uct_iface_is_reachable_params_t *params) { uct_ib_iface_t *iface = ucs_derived_of(tl_iface, uct_ib_iface_t); uct_ib_device_t *device = uct_ib_iface_device(iface); - const uct_gga_mlx5_dev_addr_t *dev_addr = (const uct_gga_mlx5_dev_addr_t *) - UCS_PARAM_VALUE(UCT_IFACE_IS_REACHABLE_FIELD, params, device_addr, - DEVICE_ADDR, NULL); - uct_iface_is_reachable_params_t ib_params; - - if ((dev_addr == NULL) || - (be64toh(dev_addr->be_sys_image_guid) != - be64toh(device->dev_attr.orig_attr.sys_image_guid))) { + const uct_gga_mlx5_dev_addr_t *dev_addr; + uct_iface_reachability_scope_t scope; + + if (!uct_iface_is_reachable_params_addrs_valid(params)) { + return 0; + } + + dev_addr = (const uct_gga_mlx5_dev_addr_t*)params->device_addr; + if (dev_addr->be_sys_image_guid != + device->dev_attr.orig_attr.sys_image_guid) { return 0; } - ib_params = *params; - ib_params.device_addr = (const uct_device_addr_t*)&dev_addr->ib_addr; - return uct_ib_iface_is_reachable_v2(tl_iface, &ib_params); + scope = UCS_PARAM_VALUE(UCT_IFACE_IS_REACHABLE_FIELD, params, scope, SCOPE, + UCT_IFACE_REACHABILITY_SCOPE_NETWORK); + return uct_ib_iface_dev_addr_is_reachable(iface, &dev_addr->ib_addr, scope); +} + +static ucs_status_t +uct_gga_mlx5_iface_init_rx(uct_rc_iface_t *rc_iface, + const uct_rc_iface_common_config_t *rc_config) +{ + uct_rc_mlx5_iface_common_t *iface = + ucs_derived_of(rc_iface, uct_rc_mlx5_iface_common_t); + + iface->rx.srq.type = UCT_IB_MLX5_OBJ_TYPE_NULL; + iface->rx.srq.srq_num = 0; + return UCS_OK; } static void @@ -494,7 +512,6 @@ uct_gga_mlx5_iface_qp_cleanup(uct_rc_iface_qp_cleanup_ctx_t *ctx) ucs_derived_of(ctx, uct_gga_mlx5_iface_qp_cleanup_ctx_t); ibv_dereg_mr(gga_ctx->dma_opaque.mr); - ucs_free(gga_ctx->dma_opaque.buf); uct_rc_mlx5_iface_common_qp_cleanup(&gga_ctx->super); } @@ -514,8 +531,8 @@ static uct_rc_iface_ops_t uct_gga_mlx5_iface_ops = { .event_cq = uct_rc_mlx5_iface_common_event_cq, .handle_failure = (uct_ib_iface_handle_failure_func_t)ucs_empty_function_do_assert_void, }, - .init_rx = uct_rc_mlx5_iface_init_rx, - .cleanup_rx = uct_rc_mlx5_iface_cleanup_rx, + .init_rx = uct_gga_mlx5_iface_init_rx, + .cleanup_rx = ucs_empty_function, .fc_ctrl = ucs_empty_function_return_unsupported, .fc_handler = (uct_rc_iface_fc_handler_func_t)ucs_empty_function_do_assert, .cleanup_qp = uct_gga_mlx5_iface_qp_cleanup, diff --git a/src/uct/ib/rc/accel/rc_mlx5.h b/src/uct/ib/rc/accel/rc_mlx5.h index d071b4873aa..abf942142a7 100644 --- a/src/uct/ib/rc/accel/rc_mlx5.h +++ b/src/uct/ib/rc/accel/rc_mlx5.h @@ -232,15 +232,8 @@ ucs_status_t uct_rc_mlx5_iface_event_fd_get(uct_iface_h tl_iface, int *fd_p); ucs_status_t uct_rc_mlx5_iface_arm(uct_iface_h tl_iface, unsigned events); -ucs_status_t -uct_rc_mlx5_iface_init_rx(uct_rc_iface_t *rc_iface, - const uct_rc_iface_common_config_t *rc_config); - -void uct_rc_mlx5_iface_progress_enable(uct_iface_h tl_iface, unsigned flags); - -void uct_rc_mlx5_iface_cleanup_rx(uct_rc_iface_t *rc_iface); - void uct_rc_mlx5_base_ep_cleanup(uct_rc_mlx5_base_ep_t *ep, - uct_rc_mlx5_iface_common_qp_cleanup_ctx_t *ctx); + uct_rc_mlx5_iface_common_qp_cleanup_ctx_t *ctx, + int async); #endif diff --git a/src/uct/ib/rc/accel/rc_mlx5.inl b/src/uct/ib/rc/accel/rc_mlx5.inl index b5a7a68175d..4e503d212d3 100644 --- a/src/uct/ib/rc/accel/rc_mlx5.inl +++ b/src/uct/ib/rc/accel/rc_mlx5.inl @@ -1807,3 +1807,66 @@ uct_rc_mlx5_iface_common_atomic_data(unsigned opcode, unsigned size, uint64_t va } return UCS_OK; } + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_update_tx_res(uct_rc_iface_t *rc_iface, + uct_rc_mlx5_base_ep_t *rc_mlx5_base_ep, + uint16_t hw_ci) +{ + uct_ib_mlx5_txwq_t *txwq = &rc_mlx5_base_ep->tx.wq; + uct_rc_txqp_t *txqp = &rc_mlx5_base_ep->super.txqp; + uint16_t bb_num; + + bb_num = uct_ib_mlx5_txwq_update_bb(txwq, hw_ci) - + uct_rc_txqp_available(txqp); + + /* Must always have positive number of released resources. The first + * completion will report bb_num=1 (because prev_sw_pi is initialized to -1) + * and all the rest report the amount of BBs the previous WQE has consumed. + */ + ucs_assertv(bb_num > 0, "hw_ci=%d prev_sw_pi=%d available=%d bb_num=%d", + hw_ci, txwq->prev_sw_pi, txqp->available, bb_num); + + uct_rc_txqp_available_add(txqp, bb_num); + ucs_assert(uct_rc_txqp_available(txqp) <= txwq->bb_max); + + uct_rc_iface_update_reads(rc_iface); + uct_rc_iface_add_cq_credits(rc_iface, bb_num); +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_mlx5_iface_poll_tx(uct_rc_mlx5_iface_common_t *iface, int poll_flags) +{ + struct mlx5_cqe64 *cqe; + uct_rc_mlx5_base_ep_t *ep; + unsigned qp_num; + uint16_t hw_ci; + + cqe = uct_ib_mlx5_poll_cq(&iface->super.super, &iface->cq[UCT_IB_DIR_TX], + poll_flags, uct_ib_mlx5_check_completion); + if (cqe == NULL) { + return 0; + } + + UCS_STATS_UPDATE_COUNTER(iface->super.super.stats, + UCT_IB_IFACE_STAT_TX_COMPLETION, 1); + + ucs_memory_cpu_load_fence(); + + qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); + ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, qp_num), + uct_rc_mlx5_base_ep_t); + ucs_assert(ep != NULL); + + hw_ci = ntohs(cqe->wqe_counter); + ucs_trace_poll("rc_mlx5 iface %p tx_cqe: ep %p qpn 0x%x hw_ci %d", iface, + ep, qp_num, hw_ci); + + uct_rc_mlx5_txqp_process_tx_cqe(&ep->super.txqp, cqe, hw_ci); + ucs_arbiter_group_schedule(&iface->super.tx.arbiter, &ep->super.arb_group); + uct_rc_mlx5_iface_update_tx_res(&iface->super, ep, hw_ci); + uct_rc_iface_arbiter_dispatch(&iface->super); + uct_ib_mlx5_update_db_cq_ci(&iface->cq[UCT_IB_DIR_TX]); + + return 1; +} diff --git a/src/uct/ib/rc/accel/rc_mlx5_common.c b/src/uct/ib/rc/accel/rc_mlx5_common.c index 8c989c8954e..e4962b0faa1 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_common.c +++ b/src/uct/ib/rc/accel/rc_mlx5_common.c @@ -210,7 +210,8 @@ void uct_rc_mlx5_iface_common_prepost_recvs(uct_rc_mlx5_iface_common_t *iface) { /* prepost recvs only if quota available (recvs were not preposted * before) */ - if (iface->super.rx.srq.quota == 0) { + if ((iface->super.rx.srq.quota == 0) || + (iface->rx.srq.type == UCT_IB_MLX5_OBJ_TYPE_NULL)) { return; } @@ -526,6 +527,7 @@ void uct_rc_mlx5_iface_fill_attr(uct_rc_mlx5_iface_common_t *iface, srq->verbs.srq); break; case UCT_IB_MLX5_OBJ_TYPE_DEVX: + case UCT_IB_MLX5_OBJ_TYPE_NULL: uct_rc_iface_fill_attr(&iface->super, &qp_attr->super, max_send_wr, NULL); qp_attr->mmio_mode = iface->tx.mmio_mode; break; @@ -579,6 +581,7 @@ void uct_rc_mlx5_destroy_srq(uct_ib_mlx5_md_t *md, uct_ib_mlx5_srq_t *srq) uct_rc_mlx5_devx_cleanup_srq(md, srq); #endif break; + case UCT_IB_MLX5_OBJ_TYPE_NULL: case UCT_IB_MLX5_OBJ_TYPE_LAST: break; } diff --git a/src/uct/ib/rc/accel/rc_mlx5_ep.c b/src/uct/ib/rc/accel/rc_mlx5_ep.c index 02b171183c0..d3552849c4c 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_ep.c +++ b/src/uct/ib/rc/accel/rc_mlx5_ep.c @@ -1068,11 +1068,13 @@ UCS_CLASS_INIT_FUNC(uct_rc_mlx5_base_ep_t, const uct_ep_params_t *params) } } - status = uct_ib_device_async_event_register(&md->super.dev, - IBV_EVENT_QP_LAST_WQE_REACHED, - self->tx.wq.super.qp_num); - if (status != UCS_OK) { - goto err_destroy_txwq_qp; + if (iface->rx.srq.type != UCT_IB_MLX5_OBJ_TYPE_NULL) { + status = uct_ib_device_async_event_register(&md->super.dev, + IBV_EVENT_QP_LAST_WQE_REACHED, + self->tx.wq.super.qp_num); + if (status != UCS_OK) { + goto err_destroy_txwq_qp; + } } status = uct_rc_iface_add_qp(&iface->super, &self->super, @@ -1087,16 +1089,19 @@ UCS_CLASS_INIT_FUNC(uct_rc_mlx5_base_ep_t, const uct_ep_params_t *params) return UCS_OK; err_event_unreg: - uct_ib_device_async_event_unregister(&md->super.dev, - IBV_EVENT_QP_LAST_WQE_REACHED, - self->tx.wq.super.qp_num); + if (iface->rx.srq.type != UCT_IB_MLX5_OBJ_TYPE_NULL) { + uct_ib_device_async_event_unregister(&md->super.dev, + IBV_EVENT_QP_LAST_WQE_REACHED, + self->tx.wq.super.qp_num); + } err_destroy_txwq_qp: uct_ib_mlx5_destroy_qp(md, &self->tx.wq.super); return status; } void uct_rc_mlx5_base_ep_cleanup(uct_rc_mlx5_base_ep_t *ep, - uct_rc_mlx5_iface_common_qp_cleanup_ctx_t *ctx) + uct_rc_mlx5_iface_common_qp_cleanup_ctx_t *ctx, + int async) { uct_rc_mlx5_iface_common_t *iface = ucs_derived_of( ep->super.super.super.iface, uct_rc_mlx5_iface_common_t); @@ -1114,7 +1119,7 @@ void uct_rc_mlx5_base_ep_cleanup(uct_rc_mlx5_base_ep_t *ep, wqe_count = uct_ib_mlx5_txwq_num_posted_wqes(&ep->tx.wq, outstanding); ucs_assert(outstanding >= wqe_count); uct_rc_ep_cleanup_qp(&ep->super, &ctx->super, ep->tx.wq.super.qp_num, - outstanding - wqe_count); + outstanding - wqe_count, async); } UCS_CLASS_CLEANUP_FUNC(uct_rc_mlx5_base_ep_t) @@ -1183,7 +1188,7 @@ UCS_CLASS_CLEANUP_FUNC(uct_rc_mlx5_ep_t) cleanup_ctx->super.reg = self->super.tx.wq.reg; cleanup_ctx->tm_qp = self->tm_qp; - uct_rc_mlx5_base_ep_cleanup(&self->super, &cleanup_ctx->super); + uct_rc_mlx5_base_ep_cleanup(&self->super, &cleanup_ctx->super, 1); } UCS_CLASS_DEFINE(uct_rc_mlx5_ep_t, uct_rc_mlx5_base_ep_t); diff --git a/src/uct/ib/rc/accel/rc_mlx5_iface.c b/src/uct/ib/rc/accel/rc_mlx5_iface.c index a559ebedac3..807b553c2f7 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_iface.c +++ b/src/uct/ib/rc/accel/rc_mlx5_iface.c @@ -113,69 +113,6 @@ uct_rc_mlx5_iface_check_rx_completion(uct_ib_iface_t *ib_iface, return NULL; } -static UCS_F_ALWAYS_INLINE void -uct_rc_mlx5_iface_update_tx_res(uct_rc_iface_t *rc_iface, - uct_rc_mlx5_base_ep_t *rc_mlx5_base_ep, - uint16_t hw_ci) -{ - uct_ib_mlx5_txwq_t *txwq = &rc_mlx5_base_ep->tx.wq; - uct_rc_txqp_t *txqp = &rc_mlx5_base_ep->super.txqp; - uint16_t bb_num; - - bb_num = uct_ib_mlx5_txwq_update_bb(txwq, hw_ci) - - uct_rc_txqp_available(txqp); - - /* Must always have positive number of released resources. The first - * completion will report bb_num=1 (because prev_sw_pi is initialized to -1) - * and all the rest report the amount of BBs the previous WQE has consumed. - */ - ucs_assertv(bb_num > 0, "hw_ci=%d prev_sw_pi=%d available=%d bb_num=%d", - hw_ci, txwq->prev_sw_pi, txqp->available, bb_num); - - uct_rc_txqp_available_add(txqp, bb_num); - ucs_assert(uct_rc_txqp_available(txqp) <= txwq->bb_max); - - uct_rc_iface_update_reads(rc_iface); - uct_rc_iface_add_cq_credits(rc_iface, bb_num); -} - -static UCS_F_ALWAYS_INLINE unsigned -uct_rc_mlx5_iface_poll_tx(uct_rc_mlx5_iface_common_t *iface, int poll_flags) -{ - struct mlx5_cqe64 *cqe; - uct_rc_mlx5_base_ep_t *ep; - unsigned qp_num; - uint16_t hw_ci; - - cqe = uct_ib_mlx5_poll_cq(&iface->super.super, &iface->cq[UCT_IB_DIR_TX], - poll_flags, uct_ib_mlx5_check_completion); - if (cqe == NULL) { - return 0; - } - - UCS_STATS_UPDATE_COUNTER(iface->super.super.stats, - UCT_IB_IFACE_STAT_TX_COMPLETION, 1); - - ucs_memory_cpu_load_fence(); - - qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); - ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, qp_num), - uct_rc_mlx5_base_ep_t); - ucs_assert(ep != NULL); - - hw_ci = ntohs(cqe->wqe_counter); - ucs_trace_poll("rc_mlx5 iface %p tx_cqe: ep %p qpn 0x%x hw_ci %d", iface, - ep, qp_num, hw_ci); - - uct_rc_mlx5_txqp_process_tx_cqe(&ep->super.txqp, cqe, hw_ci); - ucs_arbiter_group_schedule(&iface->super.tx.arbiter, &ep->super.arb_group); - uct_rc_mlx5_iface_update_tx_res(&iface->super, ep, hw_ci); - uct_rc_iface_arbiter_dispatch(&iface->super); - uct_ib_mlx5_update_db_cq_ci(&iface->cq[UCT_IB_DIR_TX]); - - return 1; -} - static UCS_F_ALWAYS_INLINE unsigned uct_rc_mlx5_iface_progress(void *arg, int flags) { @@ -302,7 +239,8 @@ uct_rc_mlx5_iface_handle_failure(uct_ib_iface_t *ib_iface, void *arg, uct_rc_iface_arbiter_dispatch(iface); } -void uct_rc_mlx5_iface_progress_enable(uct_iface_h tl_iface, unsigned flags) +static void +uct_rc_mlx5_iface_progress_enable(uct_iface_h tl_iface, unsigned flags) { uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_common_t); @@ -594,7 +532,7 @@ static ucs_status_t uct_rc_mlx5_iface_preinit(uct_rc_mlx5_iface_common_t *iface, return UCS_OK; } -ucs_status_t +static ucs_status_t uct_rc_mlx5_iface_init_rx(uct_rc_iface_t *rc_iface, const uct_rc_iface_common_config_t *rc_config) { @@ -646,7 +584,7 @@ uct_rc_mlx5_iface_init_rx(uct_rc_iface_t *rc_iface, return UCS_OK; } -void uct_rc_mlx5_iface_cleanup_rx(uct_rc_iface_t *rc_iface) +static void uct_rc_mlx5_iface_cleanup_rx(uct_rc_iface_t *rc_iface) { uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(rc_iface, uct_rc_mlx5_iface_common_t); uct_ib_mlx5_md_t *md = ucs_derived_of(rc_iface->super.super.md, diff --git a/src/uct/ib/rc/base/rc_ep.c b/src/uct/ib/rc/base/rc_ep.c index 2fb375ab0bf..f3f48d174d5 100644 --- a/src/uct/ib/rc/base/rc_ep.c +++ b/src/uct/ib/rc/base/rc_ep.c @@ -109,7 +109,7 @@ void uct_rc_fc_cleanup(uct_rc_fc_t *fc) void uct_rc_ep_cleanup_qp(uct_rc_ep_t *ep, uct_rc_iface_qp_cleanup_ctx_t *cleanup_ctx, - uint32_t qp_num, uint16_t cq_credits) + uint32_t qp_num, uint16_t cq_credits, int async) { uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_rc_iface_t); @@ -126,12 +126,16 @@ void uct_rc_ep_cleanup_qp(uct_rc_ep_t *ep, cleanup_ctx->iface = iface; cleanup_ctx->qp_num = qp_num; cleanup_ctx->cq_credits = cq_credits; - ucs_list_add_tail(&iface->qp_gc_list, &cleanup_ctx->list); + if (async) { + ucs_list_add_tail(&iface->qp_gc_list, &cleanup_ctx->list); - status = uct_ib_device_async_event_wait(&md->dev, - IBV_EVENT_QP_LAST_WQE_REACHED, - qp_num, &cleanup_ctx->super); - ucs_assert_always(status == UCS_OK); + status = uct_ib_device_async_event_wait(&md->dev, + IBV_EVENT_QP_LAST_WQE_REACHED, + qp_num, &cleanup_ctx->super); + ucs_assert_always(status == UCS_OK); + } else { + uct_rc_iface_qp_cleanup(cleanup_ctx); + } } UCS_CLASS_INIT_FUNC(uct_rc_ep_t, uct_rc_iface_t *iface, uint32_t qp_num, diff --git a/src/uct/ib/rc/base/rc_ep.h b/src/uct/ib/rc/base/rc_ep.h index ad7ddab6c31..fc48c99a4da 100644 --- a/src/uct/ib/rc/base/rc_ep.h +++ b/src/uct/ib/rc/base/rc_ep.h @@ -288,7 +288,7 @@ int uct_rc_ep_is_connected(struct ibv_ah_attr *ah_attr, void uct_rc_ep_cleanup_qp(uct_rc_ep_t *ep, uct_rc_iface_qp_cleanup_ctx_t *cleanup_ctx, - uint32_t qp_num, uint16_t cq_credits); + uint32_t qp_num, uint16_t cq_credits, int async); void uct_rc_ep_pending_purge_warn_cb(uct_pending_req_t *self, void *arg); diff --git a/src/uct/ib/rc/base/rc_iface.c b/src/uct/ib/rc/base/rc_iface.c index cc91eeaa117..9a4ce6d2fcc 100644 --- a/src/uct/ib/rc/base/rc_iface.c +++ b/src/uct/ib/rc/base/rc_iface.c @@ -754,24 +754,31 @@ UCS_CLASS_INIT_FUNC(uct_rc_iface_t, uct_iface_ops_t *tl_ops, return status; } +void uct_rc_iface_qp_cleanup(uct_rc_iface_qp_cleanup_ctx_t *cleanup_ctx) +{ + uct_rc_iface_t *iface = cleanup_ctx->iface; + uct_rc_iface_ops_t *ops = ucs_derived_of(iface->super.ops, + uct_rc_iface_ops_t); + + ops->cleanup_qp(cleanup_ctx); + if (cleanup_ctx->cq_credits == 0) { + return; + } + + uct_rc_iface_add_cq_credits(iface, cleanup_ctx->cq_credits); + uct_rc_iface_arbiter_dispatch(iface); +} + unsigned uct_rc_iface_qp_cleanup_progress(void *arg) { uct_rc_iface_qp_cleanup_ctx_t *cleanup_ctx = arg; - uct_rc_iface_t *iface = cleanup_ctx->iface; - uct_rc_iface_ops_t *ops; + uct_ib_iface_t *iface = + ucs_derived_of(cleanup_ctx->iface, uct_ib_iface_t); - uct_ib_device_async_event_unregister(uct_ib_iface_device(&iface->super), + uct_ib_device_async_event_unregister(uct_ib_iface_device(iface), IBV_EVENT_QP_LAST_WQE_REACHED, cleanup_ctx->qp_num); - - ops = ucs_derived_of(iface->super.ops, uct_rc_iface_ops_t); - ops->cleanup_qp(cleanup_ctx); - - if (cleanup_ctx->cq_credits > 0) { - uct_rc_iface_add_cq_credits(iface, cleanup_ctx->cq_credits); - uct_rc_iface_arbiter_dispatch(iface); - } - + uct_rc_iface_qp_cleanup(cleanup_ctx); ucs_list_del(&cleanup_ctx->list); ucs_free(cleanup_ctx); return 1; diff --git a/src/uct/ib/rc/base/rc_iface.h b/src/uct/ib/rc/base/rc_iface.h index c7a31d27921..79953fb5573 100644 --- a/src/uct/ib/rc/base/rc_iface.h +++ b/src/uct/ib/rc/base/rc_iface.h @@ -393,6 +393,8 @@ void uct_rc_ep_am_zcopy_handler(uct_rc_iface_send_op_t *op, const void *resp); void uct_rc_iface_cleanup_qps(uct_rc_iface_t *iface); +void uct_rc_iface_qp_cleanup(uct_rc_iface_qp_cleanup_ctx_t *cleanup_ctx); + unsigned uct_rc_iface_qp_cleanup_progress(void *arg); /** diff --git a/src/uct/ib/rc/verbs/rc_verbs_ep.c b/src/uct/ib/rc/verbs/rc_verbs_ep.c index 681bb99db45..5da4f0b7449 100644 --- a/src/uct/ib/rc/verbs/rc_verbs_ep.c +++ b/src/uct/ib/rc/verbs/rc_verbs_ep.c @@ -734,7 +734,7 @@ UCS_CLASS_CLEANUP_FUNC(uct_rc_verbs_ep_t) cleanup_ctx->qp = self->qp; ucs_assert(UCS_CIRCULAR_COMPARE16(self->txcnt.pi, >=, self->txcnt.ci)); uct_rc_ep_cleanup_qp(&self->super, &cleanup_ctx->super, self->qp->qp_num, - self->txcnt.pi - self->txcnt.ci); + self->txcnt.pi - self->txcnt.ci, 1); } UCS_CLASS_DEFINE(uct_rc_verbs_ep_t, uct_rc_ep_t);