diff --git a/prov/efa/src/rdm/efa_rdm_ep_utils.c b/prov/efa/src/rdm/efa_rdm_ep_utils.c index 9c2132710db..aec7af79948 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_utils.c +++ b/prov/efa/src/rdm/efa_rdm_ep_utils.c @@ -510,6 +510,7 @@ void efa_rdm_ep_record_tx_op_completed(struct efa_rdm_ep *ep, struct efa_rdm_pke pkt_entry->peer->efa_outstanding_tx_ops--; if (ope) { + assert(ope->efa_outstanding_tx_ops > 0); ope->efa_outstanding_tx_ops--; switch(efa_rdm_pkt_type_of(pkt_entry)) { case EFA_RDM_RECEIPT_PKT: diff --git a/prov/efa/src/rdm/efa_rdm_ope.c b/prov/efa/src/rdm/efa_rdm_ope.c index 98406f70222..e8239159f42 100644 --- a/prov/efa/src/rdm/efa_rdm_ope.c +++ b/prov/efa/src/rdm/efa_rdm_ope.c @@ -131,7 +131,7 @@ void efa_rdm_txe_release(struct efa_rdm_ope *txe) * (which would have already removed it from the list). */ if (txe->state == EFA_RDM_OPE_SEND && - !(txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED)) + !(txe->internal_flags & EFA_RDM_TXE_RESPONSE_RECEIVED)) dlist_remove(&txe->entry); dlist_foreach_container_safe(&txe->queued_pkts, @@ -732,7 +732,7 @@ void efa_rdm_txe_handle_error(struct efa_rdm_ope *txe, int err, int prov_errno) case EFA_RDM_TXE_REQ: break; case EFA_RDM_OPE_SEND: - if (!(txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED)) + if (!(txe->internal_flags & EFA_RDM_TXE_RESPONSE_RECEIVED)) dlist_remove(&txe->entry); break; case EFA_RDM_OPE_ERR: @@ -982,7 +982,6 @@ void efa_rdm_txe_report_completion(struct efa_rdm_ope *txe) txe->peer->conn->fi_addr, txe->tx_id, txe->msg_id, txe->cq_entry.tag, txe->total_len); - efa_rdm_tracepoint(send_end, txe->msg_id, (size_t) txe->cq_entry.op_context, txe->total_len, txe->cq_entry.tag, txe->peer->conn->fi_addr); @@ -1198,7 +1197,18 @@ void efa_rdm_ope_handle_recv_completed(struct efa_rdm_ope *ope) } if (ope->type == EFA_RDM_TXE) { - efa_rdm_txe_release(ope); + /* + * This can only happen for emulated read protocols + * where we use TX entry to receive data from the read. + * target. When the recv completed, the RTR + * send completion may not have + * arrived yet. Defer the release until the RTR send + * completion arrives (efa_outstanding_tx_ops == 0) + * to avoid use-after-free of the tx entry. + */ + ope->internal_flags |= EFA_RDM_TXE_RESPONSE_RECEIVED;; + if (efa_rdm_txe_with_resp_ready_for_release(ope)) + efa_rdm_txe_release(ope); } else { assert(ope->type == EFA_RDM_RXE); efa_rdm_rxe_release(ope); diff --git a/prov/efa/src/rdm/efa_rdm_ope.h b/prov/efa/src/rdm/efa_rdm_ope.h index 61c2c74883d..04f80c587f0 100644 --- a/prov/efa/src/rdm/efa_rdm_ope.h +++ b/prov/efa/src/rdm/efa_rdm_ope.h @@ -285,13 +285,19 @@ void efa_rdm_rxe_release_internal(struct efa_rdm_ope *rxe); #define EFA_RDM_OPE_INTERNAL BIT_ULL(15) /** - * @brief flag to indicate that a DC txe has received its receipt packet + * @brief flag to indicate that a txe has received its response/ack * - * This flag is used to track when a delivery complete operation has - * received acknowledgment from the receiver, preventing premature - * completion before all TX operations finish. + * This applies to protocols where the txe sends a request packet and + * needs to wait for both a response AND all TX send completions before + * the txe can be released or completed: + * - Delivery complete: REQ sent, RECEIPT received + * - Emulated read: RTR sent, data received via READRSP/CTSDATA + * - Fetch/compare atomics: FETCH_RTA/COMPARE_RTA sent, ATOMRSP received + * + * The txe cannot be released/completed until both the response has been + * received AND all outstanding TX ops have completed. */ -#define EFA_RDM_TXE_RECEIPT_RECEIVED BIT_ULL(16) +#define EFA_RDM_TXE_RESPONSE_RECEIVED BIT_ULL(16) #define EFA_RDM_OPE_QUEUED_FLAGS (EFA_RDM_OPE_QUEUED_RNR | EFA_RDM_OPE_QUEUED_CTRL | EFA_RDM_OPE_QUEUED_READ | EFA_RDM_OPE_QUEUED_BEFORE_HANDSHAKE) @@ -317,24 +323,22 @@ void efa_rdm_ope_handle_recv_completed(struct efa_rdm_ope *ope); void efa_rdm_ope_handle_send_completed(struct efa_rdm_ope *ope); /** - * @brief Check if a delivery complete (DC) TXE is ready for release + * @brief Check if a txe that received its response/ack is ready for release * * @details - * For DC packets, this function prevents use-after-free race conditions by - * ensuring the TXE is only released when both conditions are met: - * 1. All TX operations have completed (efa_outstanding_tx_ops == 0) - * 2. Receipt packet has been received (EFA_RDM_TXE_RECEIPT_RECEIVED flag set) - * - * This dual-condition check ensures proper synchronization between send - * completions and receipt acknowledgments in the delivery complete protocol. + * In protocols where the txe sends a request and receives a response + * (emulated read, fetch/compare atomics), the txe can only be released + * when both: + * 1. The response has been received (EFA_RDM_TXE_RESPONSE_RECEIVED flag set) + * 2. All TX ops have completed (efa_outstanding_tx_ops == 0) * * @param[in] txe TX operation entry to check * @return true if TXE is ready for release, false otherwise */ -static inline bool efa_rdm_txe_dc_ready_for_release(struct efa_rdm_ope *txe) +static inline bool efa_rdm_txe_with_resp_ready_for_release(struct efa_rdm_ope *txe) { return (txe->efa_outstanding_tx_ops == 0) && - (txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED); + (txe->internal_flags & EFA_RDM_TXE_RESPONSE_RECEIVED); } int efa_rdm_ope_prepare_to_post_read(struct efa_rdm_ope *ope); diff --git a/prov/efa/src/rdm/efa_rdm_pke_cmd.c b/prov/efa/src/rdm/efa_rdm_pke_cmd.c index 0f350526be0..02a31db3095 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_cmd.c +++ b/prov/efa/src/rdm/efa_rdm_pke_cmd.c @@ -621,21 +621,25 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) case EFA_RDM_LONGREAD_RTW_PKT: /* nothing to do when long rtw send completes*/ break; - case EFA_RDM_SHORT_RTR_PKT: - case EFA_RDM_LONGCTS_RTR_PKT: - /* Unlike other protocol, for emulated read, txe - * is released in efa_rdm_ope_handle_recv_completed(). - * Therefore there is nothing to be done here. - */ - break; case EFA_RDM_WRITE_RTA_PKT: efa_rdm_pke_handle_write_rta_send_completion(pkt_entry); break; + case EFA_RDM_SHORT_RTR_PKT: + case EFA_RDM_LONGCTS_RTR_PKT: + /* For emulated read, txe is normally released in + * efa_rdm_ope_handle_recv_completed(). However, if recv + * completed before this RTR send completion arrived, the + * release was deferred. Release the txe now. + */ case EFA_RDM_FETCH_RTA_PKT: - /* no action to be taken here */ - break; case EFA_RDM_COMPARE_RTA_PKT: - /* no action to be taken here */ + /* For fetch/compare atomics, txe is normally released in + * efa_rdm_pke_handle_atomrsp_recv(). However, if the ATOMRSP + * arrived before this send completion, the release was deferred. + */ + assert(pkt_entry->ope); + if (efa_rdm_txe_with_resp_ready_for_release(pkt_entry->ope)) + efa_rdm_txe_release(pkt_entry->ope); break; case EFA_RDM_DC_EAGER_MSGRTM_PKT: case EFA_RDM_DC_EAGER_TAGRTM_PKT: @@ -650,11 +654,11 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) * instead of bytes_acked to avoid issues with unset payload_size. * Note: efa_rdm_ep_record_tx_op_completed() above decrements efa_outstanding_tx_ops, * so this check must come after that call. - * Only release TXE when both TX ops complete and receipt is received. + * Only complete the TXE when both TX ops complete and receipt is received. */ assert(pkt_entry->ope); - if (efa_rdm_txe_dc_ready_for_release(pkt_entry->ope)) - efa_rdm_txe_release(pkt_entry->ope); + if (efa_rdm_txe_with_resp_ready_for_release(pkt_entry->ope)) + efa_rdm_ope_handle_send_completed(pkt_entry->ope); break; case EFA_RDM_READ_NACK_PKT: /* no action needed for NACK packet */ diff --git a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c index f9cedd58177..9044bf30fbe 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c +++ b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c @@ -289,13 +289,16 @@ void efa_rdm_pke_handle_ctsdata_send_completion(struct efa_rdm_pke *pkt_entry) { struct efa_rdm_ope *ope; - /* if this DATA packet is used by a DC protocol, the completion - * was (or will be) written when the receipt packet was received. - * The txe may have already been released. So nothing - * to do (or can be done) here. + /* if this DATA packet is used by a DC protocol, the tx entry should + * be only completed when both all TX ops are done and the receipt + * has been received. */ - if (pkt_entry->flags & EFA_RDM_PKE_DC_LONGCTS_DATA) + if (pkt_entry->flags & EFA_RDM_PKE_DC_LONGCTS_DATA) { + assert(pkt_entry->ope); + if (efa_rdm_txe_with_resp_ready_for_release(pkt_entry->ope)) + efa_rdm_ope_handle_send_completed(pkt_entry->ope); return; + } ope = pkt_entry->ope; ope->bytes_acked += efa_rdm_pke_get_ctsdata_hdr(pkt_entry)->seg_length; @@ -783,20 +786,17 @@ void efa_rdm_pke_handle_receipt_recv(struct efa_rdm_pke *pkt_entry) return; } - /* Write send completion immediately to preserve DC semantics */ - efa_rdm_txe_report_completion(txe); - - /* Remove from ope_longcts_send_list since operation is complete */ + /* Remove from ope_longcts_send_list since all the data has been delivered */ if (txe->state == EFA_RDM_OPE_SEND) { dlist_remove(&txe->entry); } /* Set receipt received flag for DC operations */ - txe->internal_flags |= EFA_RDM_TXE_RECEIPT_RECEIVED; + txe->internal_flags |= EFA_RDM_TXE_RESPONSE_RECEIVED; - /* Only release txe if both conditions are met */ - if (efa_rdm_txe_dc_ready_for_release(txe)) - efa_rdm_txe_release(txe); + /* Only complete txe if both conditions are met */ + if (efa_rdm_txe_with_resp_ready_for_release(txe)) + efa_rdm_ope_handle_send_completed(txe); efa_rdm_pke_release_rx(pkt_entry); } @@ -866,6 +866,11 @@ void efa_rdm_pke_handle_atomrsp_recv(struct efa_rdm_pke *pkt_entry) else efa_cntr_report_tx_completion(&pkt_entry->ep->base_ep.util_ep, txe->cq_entry.flags); - efa_rdm_txe_release(txe); + /* Defer txe release until the FETCH_RTA/COMPARE_RTA send completion + * arrives to avoid use-after-free if the buffer pool slot is reused. + */ + txe->internal_flags |= EFA_RDM_TXE_RESPONSE_RECEIVED; + if (efa_rdm_txe_with_resp_ready_for_release(txe)) + efa_rdm_txe_release(txe); efa_rdm_pke_release_rx(pkt_entry); } diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index 1972611fc01..d6ebb37508c 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -314,6 +314,7 @@ void test_rdm_cq_handshake_bad_send_status_impl(struct efa_resource **state, int txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); assert_non_null(txe); txe->internal_flags |= EFA_RDM_OPE_INTERNAL; + txe->efa_outstanding_tx_ops = 1; pkt_entry->ope = txe; pkt_entry->peer = peer; diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index 6033778591b..0d18119e8aa 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -2109,6 +2109,7 @@ void test_efa_rdm_ep_outstanding_tx_ops_decremented_with_error_completion(struct txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); assert_non_null(txe); txe->internal_flags |= EFA_RDM_OPE_INTERNAL; + txe->efa_outstanding_tx_ops = 1; pkt_entry->ope = txe; pkt_entry->peer = peer; diff --git a/prov/efa/test/efa_unit_test_ope.c b/prov/efa/test/efa_unit_test_ope.c index 6bca9813103..993000e7bb9 100644 --- a/prov/efa/test/efa_unit_test_ope.c +++ b/prov/efa/test/efa_unit_test_ope.c @@ -1198,91 +1198,132 @@ void test_efa_rdm_atomic_compare_desc_persistence(struct efa_resource **state) * @param[in] send_first If true, send completion happens first; if false, receipt first * @param[in] txe_in_send_state If true, TXE is in EFA_RDM_OPE_SEND state; if false, different state */ -static void test_efa_rdm_txe_dc_release_common(struct efa_resource *resource, bool send_first, bool txe_in_send_state) +/** + * @brief Common test for txe release ordering when response/ack arrives + * + * This tests that a txe is only released when both: + * 1. Response/ack received (EFA_RDM_TXE_RESPONSE_RECEIVED set) + * 2. All TX ops completed (efa_outstanding_tx_ops == 0) + * + * @param[in] resource test resource + * @param[in] send_first if true, send completion arrives before response + * @param[in] pkt_type request packet type to test + */ +static void test_efa_rdm_txe_with_resp_release_common(struct efa_resource *resource, + bool send_first, int pkt_type) { struct efa_rdm_ep *efa_rdm_ep; struct efa_rdm_ope *txe; - struct efa_rdm_pke *dc_pkt_entry, *receipt_pkt_entry; - struct efa_rdm_receipt_hdr *receipt_hdr; + struct efa_rdm_pke *req_pkt_entry, *resp_pkt_entry; + struct fi_cq_err_entry cq_entry = {0}; + struct efa_cq *efa_cq; efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); - efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); - - /* Allocate TXE and set up for DC operation */ - txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); + efa_cq = container_of(resource->cq, struct efa_cq, util_cq.cq_fid); + + /* Allocate TXE based on protocol */ + if (pkt_type == EFA_RDM_SHORT_RTR_PKT || pkt_type == EFA_RDM_LONGCTS_RTR_PKT) { + txe = efa_unit_test_alloc_txe(resource, ofi_op_read_req); + txe->cq_entry.flags = FI_READ; + } else if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) { + txe = efa_unit_test_alloc_txe(resource, ofi_op_atomic); + txe->cq_entry.flags = FI_ATOMIC | FI_READ; + } else { + /* DC protocols */ + txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); + txe->internal_flags |= EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED; + } assert_non_null(txe); - txe->internal_flags |= EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED; txe->efa_outstanding_tx_ops = 1; - if (txe_in_send_state) { - /* Add TXE to ope_longcts_send_list to simulate active longcts send */ + /* Set txe state based on packet type */ + if (pkt_type == EFA_RDM_CTSDATA_PKT) { txe->state = EFA_RDM_OPE_SEND; dlist_insert_tail(&txe->entry, &efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list); - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 1); } else { - /* TXE is not in SEND state (e.g., non-long-cts TXE) */ txe->state = EFA_RDM_TXE_REQ; - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); } - /* Create fake DC packet entry */ - dc_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL); - assert_non_null(dc_pkt_entry); - dc_pkt_entry->ope = txe; - dc_pkt_entry->ep = efa_rdm_ep; - dc_pkt_entry->peer = txe->peer; - /* Set DC packet type in wiredata */ - struct efa_rdm_base_hdr *base_hdr = (struct efa_rdm_base_hdr *)dc_pkt_entry->wiredata; - base_hdr->type = EFA_RDM_DC_EAGER_MSGRTM_PKT; - - /* Create fake receipt packet entry */ - receipt_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL); - assert_non_null(receipt_pkt_entry); - receipt_pkt_entry->ope = txe; - receipt_pkt_entry->ep = efa_rdm_ep; - /* Set tx_id so efa_rdm_pke_handle_receipt_recv can look up the txe */ - receipt_hdr = efa_rdm_pke_get_receipt_hdr(receipt_pkt_entry); - receipt_hdr->tx_id = txe->tx_id; + /* Create request packet entry */ + req_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL); + assert_non_null(req_pkt_entry); + req_pkt_entry->ope = txe; + req_pkt_entry->ep = efa_rdm_ep; + req_pkt_entry->peer = txe->peer; + struct efa_rdm_base_hdr *req_hdr = (struct efa_rdm_base_hdr *)req_pkt_entry->wiredata; + req_hdr->type = pkt_type; + if (pkt_type == EFA_RDM_CTSDATA_PKT) { + req_pkt_entry->flags |= EFA_RDM_PKE_DC_LONGCTS_DATA; + struct efa_rdm_ctsdata_hdr *ctsdata_hdr = efa_rdm_pke_get_ctsdata_hdr(req_pkt_entry); + ctsdata_hdr->seg_length = 0; + } + + /* Create response packet entry (not needed for RTR which uses efa_rdm_ope_handle_recv_completed) */ + if (pkt_type != EFA_RDM_SHORT_RTR_PKT && pkt_type != EFA_RDM_LONGCTS_RTR_PKT) { + resp_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL); + assert_non_null(resp_pkt_entry); + resp_pkt_entry->ope = txe; + resp_pkt_entry->ep = efa_rdm_ep; + if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) { + struct efa_rdm_atomrsp_pkt *atomrsp_pkt = (struct efa_rdm_atomrsp_pkt *)resp_pkt_entry->wiredata; + atomrsp_pkt->hdr.type = EFA_RDM_ATOMRSP_PKT; + atomrsp_pkt->hdr.recv_id = txe->tx_id; + atomrsp_pkt->hdr.seg_length = 0; + txe->atomic_ex.resp_iov_count = 0; + } else { + /* DC protocols use RECEIPT as response */ + struct efa_rdm_receipt_hdr *receipt_hdr = efa_rdm_pke_get_receipt_hdr(resp_pkt_entry); + receipt_hdr->tx_id = txe->tx_id; + } + } /* Verify TXE is not ready for release initially */ - assert_false(efa_rdm_txe_dc_ready_for_release(txe)); + assert_false(efa_rdm_txe_with_resp_ready_for_release(txe)); assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); if (send_first) { /* Send completion first - should not release TXE yet */ - efa_rdm_pke_handle_send_completion(dc_pkt_entry); + efa_rdm_pke_handle_send_completion(req_pkt_entry); assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); - assert_false(efa_rdm_txe_dc_ready_for_release(txe)); - if (txe_in_send_state) { - /* TXE should still be in ope_longcts_send_list */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 1); - assert_int_equal(txe->state, EFA_RDM_OPE_SEND); - } else { - /* Non-long-cts TXE should not be in the list */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); - assert_int_equal(txe->state, EFA_RDM_TXE_REQ); - } - - /* Receipt handling - should set flag and release TXE */ - efa_rdm_pke_handle_receipt_recv(receipt_pkt_entry); - if (txe_in_send_state) { - /* Should remove from list */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); - } + assert_false(efa_rdm_txe_with_resp_ready_for_release(txe)); + + /* For DC packets, CQ should be empty as there is no completion yet */ + if (txe->internal_flags & EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED) + assert_int_equal(ofi_cq_read_entries(&efa_cq->util_cq, &cq_entry, 1, NULL), -FI_EAGAIN); + + /* Response arrives - should release TXE now */ + if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) + efa_rdm_pke_handle_atomrsp_recv(resp_pkt_entry); + else if (pkt_type == EFA_RDM_SHORT_RTR_PKT || pkt_type == EFA_RDM_LONGCTS_RTR_PKT) + efa_rdm_ope_handle_recv_completed(txe); + else + efa_rdm_pke_handle_receipt_recv(resp_pkt_entry); + + /* For DC packets, CQ should now have a completion */ + if (txe->internal_flags & EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED) + assert_int_equal(ofi_cq_read_entries(&efa_cq->util_cq, &cq_entry, 1, NULL), 1); } else { - /* Receipt handling first - should set flag but not release TXE yet */ - efa_rdm_pke_handle_receipt_recv(receipt_pkt_entry); + /* Response arrives first - should not release TXE yet */ + if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) + efa_rdm_pke_handle_atomrsp_recv(resp_pkt_entry); + else if (pkt_type == EFA_RDM_SHORT_RTR_PKT || pkt_type == EFA_RDM_LONGCTS_RTR_PKT) + efa_rdm_ope_handle_recv_completed(txe); + else + efa_rdm_pke_handle_receipt_recv(resp_pkt_entry); assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); - assert_true(txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED); - assert_false(efa_rdm_txe_dc_ready_for_release(txe)); - if (txe_in_send_state) { - /* TXE should be removed from ope_longcts_send_list immediately */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); - } + assert_true(txe->internal_flags & EFA_RDM_TXE_RESPONSE_RECEIVED); + + /* For DC packets, CQ should be empty as there is no completion yet */ + if (txe->internal_flags & EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED) + assert_int_equal(ofi_cq_read_entries(&efa_cq->util_cq, &cq_entry, 1, NULL), -FI_EAGAIN); - /* Send completion - should now release TXE */ - efa_rdm_pke_handle_send_completion(dc_pkt_entry); + /* Send completion - should release TXE now */ + efa_rdm_pke_handle_send_completion(req_pkt_entry); + + /* For DC packets, CQ should now have a completion */ + if (txe->internal_flags & EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED) + assert_int_equal(ofi_cq_read_entries(&efa_cq->util_cq, &cq_entry, 1, NULL), 1); } /* Verify TXE is released */ @@ -1297,9 +1338,9 @@ static void test_efa_rdm_txe_dc_release_common(struct efa_resource *resource, bo * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_send_first(struct efa_resource **state) +void test_efa_rdm_txe_dc_ctsdata_send_first(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, true, true); + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_CTSDATA_PKT); } /** @@ -1311,9 +1352,9 @@ void test_efa_rdm_txe_dc_send_first(struct efa_resource **state) * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_receipt_first(struct efa_resource **state) +void test_efa_rdm_txe_dc_ctsdata_resp_first(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, false, true); + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_CTSDATA_PKT); } /** @@ -1324,22 +1365,70 @@ void test_efa_rdm_txe_dc_receipt_first(struct efa_resource **state) * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_send_first_non_longcts(struct efa_resource **state) +void test_efa_rdm_txe_dc_eager_rtm_send_first(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, true, false); + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_DC_EAGER_MSGRTM_PKT); } /** * @brief Test DC packet TXE release with receipt completion first (TXE not in SEND state) * * This test verifies the bug fix where non-long-cts TXEs get the - * EFA_RDM_TXE_RECEIPT_RECEIVED flag set, allowing proper release. + * EFA_RDM_TXE_RESPONSE_RECEIVED flag set, allowing proper release. * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_receipt_first_non_longcts(struct efa_resource **state) +void test_efa_rdm_txe_dc_eager_rtm_resp_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_DC_EAGER_MSGRTM_PKT); +} + +/** + * @brief Test SHORT_RTR txe release: send completion before recv completed + */ +void test_efa_rdm_txe_short_rtr_send_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_SHORT_RTR_PKT); +} + +/** + * @brief Test SHORT_RTR txe release: recv completed before send completion + */ +void test_efa_rdm_txe_short_rtr_resp_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_SHORT_RTR_PKT); +} + +/** + * @brief Test FETCH_RTA txe release: send completion before ATOMRSP + */ +void test_efa_rdm_txe_fetch_rta_send_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_FETCH_RTA_PKT); +} + +/** + * @brief Test FETCH_RTA txe release: ATOMRSP before send completion + */ +void test_efa_rdm_txe_fetch_rta_resp_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_FETCH_RTA_PKT); +} + +/** + * @brief Test COMPARE_RTA txe release: send completion before ATOMRSP + */ +void test_efa_rdm_txe_compare_rta_send_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_COMPARE_RTA_PKT); +} + +/** + * @brief Test COMPARE_RTA txe release: ATOMRSP before send completion + */ +void test_efa_rdm_txe_compare_rta_resp_first(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, false, false); + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_COMPARE_RTA_PKT); } /* RDM MSG 0-byte tests */ diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 925deb888f4..b2c29191b1e 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -345,10 +345,16 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ope_eor_packet_failed_posting, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ope_eor_packet_tracking_unresponsive_wait_send, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_atomic_compare_desc_persistence, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_receipt_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_send_first_non_longcts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_receipt_first_non_longcts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_ctsdata_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_ctsdata_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_eager_rtm_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_eager_rtm_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_short_rtr_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_short_rtr_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_fetch_rta_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_fetch_rta_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_compare_rta_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_compare_rta_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), /* end of efa_unit_test_ope.c */ cmocka_unit_test_setup_teardown(test_efa_rdm_msg_send_to_local_peer_with_null_desc, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 845564515a8..ee69358531d 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -297,10 +297,16 @@ void test_efa_rdm_ope_eor_packet_tracking_wait_send(); void test_efa_rdm_ope_eor_packet_failed_posting(); void test_efa_rdm_ope_eor_packet_tracking_unresponsive_wait_send(); void test_efa_rdm_atomic_compare_desc_persistence(); -void test_efa_rdm_txe_dc_send_first(); -void test_efa_rdm_txe_dc_receipt_first(); -void test_efa_rdm_txe_dc_send_first_non_longcts(); -void test_efa_rdm_txe_dc_receipt_first_non_longcts(); +void test_efa_rdm_txe_dc_ctsdata_send_first(); +void test_efa_rdm_txe_dc_ctsdata_resp_first(); +void test_efa_rdm_txe_dc_eager_rtm_send_first(); +void test_efa_rdm_txe_dc_eager_rtm_resp_first(); +void test_efa_rdm_txe_short_rtr_send_first(); +void test_efa_rdm_txe_short_rtr_resp_first(); +void test_efa_rdm_txe_fetch_rta_send_first(); +void test_efa_rdm_txe_fetch_rta_resp_first(); +void test_efa_rdm_txe_compare_rta_send_first(); +void test_efa_rdm_txe_compare_rta_resp_first(); /* end of efa_unit_test_ope.c */