diff --git a/src/mpid/ch4/netmod/ofi/ofi_events.c b/src/mpid/ch4/netmod/ofi/ofi_events.c index 7dc2023cf38..c3cb5a71d7e 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_events.c +++ b/src/mpid/ch4/netmod/ofi/ofi_events.c @@ -568,9 +568,8 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret) MPIR_STATUS_SET_CANCEL_BIT(req->status, TRUE); MPIR_STATUS_SET_COUNT(req->status, 0); MPIR_Datatype_release_if_not_builtin(MPIDI_OFI_REQUEST(req, datatype)); - if ((event_id == MPIDI_OFI_EVENT_RECV_PACK) && - MPIDI_OFI_REQUEST(req, noncontig.pack.pack_buffer)) { - MPL_free(MPIDI_OFI_REQUEST(req, noncontig.pack.pack_buffer)); + if (event_id == MPIDI_OFI_EVENT_RECV_PACK) { + MPIDI_OFI_free_pack_buffer(req); } else if (event_id == MPIDI_OFI_EVENT_RECV_NOPACK) { MPL_free(MPIDI_OFI_REQUEST(req, noncontig.nopack.iovs)); } diff --git a/src/mpid/ch4/netmod/ofi/ofi_events.h b/src/mpid/ch4/netmod/ofi/ofi_events.h index 7ca8d300b0d..13e44674918 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_events.h +++ b/src/mpid/ch4/netmod/ofi/ofi_events.h @@ -46,9 +46,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_send_event(int vci, MPIR_FUNC_ENTER; /* free the packing buffers and datatype */ - if ((event_id == MPIDI_OFI_EVENT_SEND_PACK) && - (MPIDI_OFI_REQUEST(sreq, noncontig.pack.pack_buffer))) { - MPL_free(MPIDI_OFI_REQUEST(sreq, noncontig.pack.pack_buffer)); + if (event_id == MPIDI_OFI_EVENT_SEND_PACK) { + MPIDI_OFI_free_pack_buffer(sreq); } else if (MPIDI_OFI_ENABLE_PT2PT_NOPACK && (event_id == MPIDI_OFI_EVENT_SEND_NOPACK)) { MPL_free(MPIDI_OFI_REQUEST(sreq, noncontig.nopack.iovs)); } @@ -97,7 +96,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_recv_complete(MPIR_Request * rreq, int ev if (mpi_errno) { MPIR_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch"); } - MPL_free(MPIDI_OFI_REQUEST(rreq, noncontig.pack.pack_buffer)); + MPIDI_OFI_free_pack_buffer(rreq); } else if (event_id == MPIDI_OFI_EVENT_RECV_NOPACK) { #ifdef HAVE_ERROR_CHECKING MPI_Count elements; diff --git a/src/mpid/ch4/netmod/ofi/ofi_impl.h b/src/mpid/ch4/netmod/ofi/ofi_impl.h index 8f441c16ca6..628a8c9e561 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_impl.h +++ b/src/mpid/ch4/netmod/ofi/ofi_impl.h @@ -833,27 +833,36 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_gpu_rma_register(const void *buffer, siz #undef CQ_D_HEAD #undef CQ_D_TAIL -MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_gpu_malloc_pack_buffer(void **ptr, size_t pack_size) -{ - if (MPIDI_OFI_ENABLE_HMEM) { - return MPL_gpu_malloc_host(ptr, pack_size); - } else { -#ifdef MPL_DEFINE_ALIGNED_ALLOC - *ptr = MPL_aligned_alloc(256, pack_size, MPL_MEM_BUFFER); -#else - *ptr = MPL_malloc(pack_size, MPL_MEM_BUFFER); -#endif - return 0; +MPL_STATIC_INLINE_PREFIX void *MPIDI_OFI_malloc_pack_buffer(MPIR_Request * req, MPI_Aint pack_size) +{ + void *pack_buf; + bool is_genq; + if (pack_size <= MPIR_CVAR_CH4_OFI_PIPELINE_CHUNK_SZ) { + int vci = MPIR_REQUEST_POOL_FROM_HANDLE(req->handle); + MPIDU_genq_private_pool_alloc_cell(MPIDI_OFI_global.per_vci[vci].pipeline_pool, &pack_buf); + is_genq = true; + } + if (!pack_buf) { + pack_buf = MPL_aligned_alloc(64, pack_size, MPL_MEM_OTHER); + is_genq = false; } + if (pack_buf) { + MPIDI_OFI_REQUEST(req, noncontig.pack.pack_buffer) = pack_buf; + MPIDI_OFI_REQUEST(req, noncontig.pack.is_genq) = is_genq; + } + return pack_buf; } -MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_gpu_free_pack_buffer(void *ptr) +MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_free_pack_buffer(MPIR_Request * req) { - if (MPIDI_OFI_ENABLE_HMEM) { - return MPL_gpu_free_host(ptr); - } else { - MPL_free(ptr); - return 0; + if (MPIDI_OFI_REQUEST(req, noncontig.pack.pack_buffer)) { + if (MPIDI_OFI_REQUEST(req, noncontig.pack.is_genq)) { + int vci = MPIR_REQUEST_POOL_FROM_HANDLE(req->handle); + MPIDU_genq_private_pool_free_cell(MPIDI_OFI_global.per_vci[vci].pipeline_pool, + MPIDI_OFI_REQUEST(req, noncontig.pack.pack_buffer)); + } else { + MPL_free(MPIDI_OFI_REQUEST(req, noncontig.pack.pack_buffer)); + } } } diff --git a/src/mpid/ch4/netmod/ofi/ofi_pre.h b/src/mpid/ch4/netmod/ofi/ofi_pre.h index 795c8e5c6f5..fa25b58165c 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_pre.h +++ b/src/mpid/ch4/netmod/ofi/ofi_pre.h @@ -302,6 +302,7 @@ typedef struct { enum MPIDI_OFI_req_kind kind; union { struct { + bool is_genq; char *pack_buffer; } pack; struct { diff --git a/src/mpid/ch4/netmod/ofi/ofi_recv.h b/src/mpid/ch4/netmod/ofi/ofi_recv.h index 9a78284e0f2..21401324b54 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_recv.h +++ b/src/mpid/ch4/netmod/ofi/ofi_recv.h @@ -224,11 +224,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_irecv(void *buf, /* Unpack */ MPIDI_OFI_REQUEST(rreq, event_id) = MPIDI_OFI_EVENT_RECV_PACK; - MPIDI_OFI_REQUEST(rreq, noncontig.pack.pack_buffer) = - MPL_aligned_alloc(64, data_sz, MPL_MEM_OTHER); + recv_buf = MPIDI_OFI_malloc_pack_buffer(rreq, data_sz); MPIR_ERR_CHKANDJUMP1(MPIDI_OFI_REQUEST(rreq, noncontig.pack.pack_buffer) == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "Recv Pack Buffer alloc"); - recv_buf = MPIDI_OFI_REQUEST(rreq, noncontig.pack.pack_buffer); } else { MPIDI_OFI_REQUEST(rreq, noncontig.pack.pack_buffer) = NULL; } diff --git a/src/mpid/ch4/netmod/ofi/ofi_rndv.c b/src/mpid/ch4/netmod/ofi/ofi_rndv.c index b6fa3b57bd4..0582f27111b 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_rndv.c +++ b/src/mpid/ch4/netmod/ofi/ofi_rndv.c @@ -91,7 +91,7 @@ int MPIDI_OFI_recv_rndv_event(int vci, struct fi_cq_tagged_entry *wc, MPIR_Reque /* if we were expecting an eager send, free the unneeded pack_buffer or iovs array */ switch (MPIDI_OFI_REQUEST(rreq, event_id)) { case MPIDI_OFI_EVENT_RECV_PACK: - MPL_free(MPIDI_OFI_REQUEST(rreq, noncontig.pack.pack_buffer)); + MPIDI_OFI_free_pack_buffer(rreq); break; case MPIDI_OFI_EVENT_RECV_NOPACK: MPL_free(MPIDI_OFI_REQUEST(rreq, noncontig.nopack.iovs)); diff --git a/src/mpid/ch4/netmod/ofi/ofi_send.h b/src/mpid/ch4/netmod/ofi/ofi_send.h index ae636e7a784..b9fea3d912e 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_send.h +++ b/src/mpid/ch4/netmod/ofi/ofi_send.h @@ -464,7 +464,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_send(const void *buf, MPI_Aint count, MPI void *data = NULL; if (need_pack) { - void *pack_buf = MPL_aligned_alloc(64, data_sz, MPL_MEM_OTHER); + void *pack_buf = MPIDI_OFI_malloc_pack_buffer(sreq, data_sz); MPIR_ERR_CHKANDJUMP1(pack_buf == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "Send Pack buffer alloc"); @@ -475,7 +475,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_send(const void *buf, MPI_Aint count, MPI MPIR_ERR_CHECK(mpi_errno); data = pack_buf; - MPIDI_OFI_REQUEST(sreq, noncontig.pack.pack_buffer) = pack_buf; } else { data = MPIR_get_contig_ptr(buf, dt_true_lb); MPIDI_OFI_REQUEST(sreq, noncontig.pack.pack_buffer) = NULL;