diff --git a/prov/efa/src/rdm/efa_rdm_ep_utils.c b/prov/efa/src/rdm/efa_rdm_ep_utils.c index 834519802bd..e838017b53b 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_utils.c +++ b/prov/efa/src/rdm/efa_rdm_ep_utils.c @@ -413,6 +413,17 @@ void efa_rdm_ep_record_tx_op_completed(struct efa_rdm_ep *ep, struct efa_rdm_pke pkt_entry->peer->efa_outstanding_tx_ops--; if (ope) { + /* + * This assertion can fail if an ope is released while it + * still has outstanding TX ops, and the buffer pool slot + * is reused by a new ope (reset the counter to 0). + * The stale send completion then + * decrements the new ope's counter, causing underflow. + */ + if (ope->efa_outstanding_tx_ops == 0) { + EFA_WARN(FI_LOG_EP_DATA, "decrementing ope->efa_outstanding_tx_ops from 0, pkt type: %d\n", efa_rdm_pkt_type_of(pkt_entry)); + assert(ope->efa_outstanding_tx_ops > 0); + } ope->efa_outstanding_tx_ops--; switch(efa_rdm_pkt_type_of(pkt_entry)) { case EFA_RDM_RECEIPT_PKT: diff --git a/prov/efa/src/rdm/efa_rdm_ope.c b/prov/efa/src/rdm/efa_rdm_ope.c index feed792c600..c3511b933e2 100644 --- a/prov/efa/src/rdm/efa_rdm_ope.c +++ b/prov/efa/src/rdm/efa_rdm_ope.c @@ -136,7 +136,7 @@ void efa_rdm_txe_release(struct efa_rdm_ope *txe) * (which would have already removed it from the list). */ if (txe->state == EFA_RDM_OPE_SEND && - !(txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED)) + !(txe->internal_flags & EFA_RDM_TXE_REMOTE_ACK_RECEIVED)) dlist_remove(&txe->entry); dlist_foreach_container_safe(&txe->queued_pkts, @@ -734,7 +734,7 @@ void efa_rdm_txe_handle_error(struct efa_rdm_ope *txe, int err, int prov_errno) case EFA_RDM_TXE_REQ: break; case EFA_RDM_OPE_SEND: - if (!(txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED)) + if (!(txe->internal_flags & EFA_RDM_TXE_REMOTE_ACK_RECEIVED)) dlist_remove(&txe->entry); break; case EFA_RDM_OPE_ERR: @@ -1013,7 +1013,6 @@ void efa_rdm_txe_report_completion(struct efa_rdm_ope *txe) txe->peer->conn->fi_addr, txe->tx_id, txe->msg_id, txe->cq_entry.tag, txe->total_len); - efa_rdm_tracepoint(send_end, txe->msg_id, (size_t) txe->cq_entry.op_context, txe->total_len, txe->cq_entry.tag, txe->peer->conn->fi_addr); @@ -1188,6 +1187,15 @@ void efa_rdm_ope_handle_recv_completed(struct efa_rdm_ope *ope) efa_rdm_rxe_report_completion(rxe); } + /* + * Mark recv completed before any release attempts below. + * This flag is checked by send completion handlers + * (efa_rdm_pke_handle_cts_send_completion for CTS, + * efa_rdm_pke_handle_send_completion for SHORT_RTR/LONGCTS_RTR) + * to decide whether a deferred release should proceed. + */ + ope->internal_flags |= EFA_RDM_OPE_RECV_COMPLETED; + /* As can be seen, this function does not release rxe when * efa_rdm_ope_post_send_or_queue() was successful. * @@ -1202,6 +1210,17 @@ void efa_rdm_ope_handle_recv_completed(struct efa_rdm_ope *ope) if (ope->internal_flags & EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED) { assert(ope->type == EFA_RDM_RXE); rxe = ope; /* Intentionally assigned for easier understanding */ + /* + * Set ACK_IN_FLIGHT before posting RECEIPT. This must + * be done before efa_rdm_ope_post_send_or_queue() because + * the RECEIPT may be queued (not immediately posted), in + * which case efa_outstanding_tx_ops is NOT incremented + * yet. Without this flag, a pending CTS send completion + * could see ops == 0 and release the rxe while the + * RECEIPT is still queued, causing a hang. + * Cleared in efa_rdm_pke_handle_receipt_send_completion. + */ + rxe->internal_flags |= EFA_RDM_RXE_ACK_IN_FLIGHT; err = efa_rdm_ope_post_send_or_queue(rxe, EFA_RDM_RECEIPT_PKT); if (OFI_UNLIKELY(err)) { EFA_WARN(FI_LOG_CQ, @@ -1219,20 +1238,42 @@ void efa_rdm_ope_handle_recv_completed(struct efa_rdm_ope *ope) * it is possible that when this function is called, EOR is still inflight * (EOR has been sent, and the send completion has NOT been received). * - * If EOR is inflight, the rxe cannot be released because the rxe - * is needed to handle the send completion of the EOR. + * Similarly, a RECEIPT packet may have been posted or queued above + * for DC protocols, setting RXE_ACK_IN_FLIGHT. * - * see #efa_rdm_pke_handle_eor_send_completion + * In either case, the rxe cannot be released here because it is + * needed to handle the send completion of the EOR or RECEIPT. */ - if (ope->internal_flags & EFA_RDM_RXE_EOR_IN_FLIGHT) { + if (ope->internal_flags & EFA_RDM_RXE_ACK_IN_FLIGHT) { + /* + * An EOR or RECEIPT is in flight / queued. The rxe + * cannot be released until its send completion arrives. + * The send completion handler will release the rxe. + * + * see #efa_rdm_pke_handle_eor_send_completion + * see #efa_rdm_pke_handle_receipt_send_completion + */ return; } - if (ope->type == EFA_RDM_TXE) { - efa_rdm_txe_release(ope); - } else { - assert(ope->type == EFA_RDM_RXE); - efa_rdm_rxe_release(ope); + /* + * Release the ope only if all outstanding TX ops (e.g. CTS, + * RTR send completions) have arrived. If not, the release is + * deferred to the corresponding send completion handler: + * + * - txe (emulated read): RTR or CTS send completion handler + * see #efa_rdm_pke_handle_cts_send_completion + * + * - rxe (longcts msg/write): CTS send completion handler + * see #efa_rdm_pke_handle_cts_send_completion + */ + if (ope->efa_outstanding_tx_ops == 0) { + if (ope->type == EFA_RDM_TXE) { + efa_rdm_txe_release(ope); + } else { + assert(ope->type == EFA_RDM_RXE); + efa_rdm_rxe_release(ope); + } } } diff --git a/prov/efa/src/rdm/efa_rdm_ope.h b/prov/efa/src/rdm/efa_rdm_ope.h index 25c65b356ce..9a98a285d43 100644 --- a/prov/efa/src/rdm/efa_rdm_ope.h +++ b/prov/efa/src/rdm/efa_rdm_ope.h @@ -217,12 +217,19 @@ void efa_rdm_rxe_release_internal(struct efa_rdm_ope *rxe); #define EFA_RDM_OPE_QUEUED_RNR BIT_ULL(9) /** - * @brief Flag to indicate an rxe has an EOR in flight + * @brief Flag to indicate an rxe has an EOR or RECEIPT in flight. * - * In flag means the EOR has been sent or queued, and has not got send completion. - * hence the rxe cannot be released + * This flag is set when an EOR or RECEIPT packet has been sent or + * queued but its send completion has not yet arrived. While set, + * the rxe cannot be released. + * + * For EOR: set in efa_rdm_pke_handle_rma_read_completion, + * cleared in efa_rdm_pke_handle_eor_send_completion. + * For RECEIPT: set in efa_rdm_ope_handle_recv_completed (before + * posting RECEIPT), cleared in + * efa_rdm_pke_handle_receipt_send_completion. */ -#define EFA_RDM_RXE_EOR_IN_FLIGHT BIT_ULL(10) +#define EFA_RDM_RXE_ACK_IN_FLIGHT BIT_ULL(10) /** * @brief flag to indicate a txe has already written an cq error entry for RNR @@ -272,13 +279,18 @@ void efa_rdm_rxe_release_internal(struct efa_rdm_ope *rxe); #define EFA_RDM_OPE_INTERNAL BIT_ULL(15) /** - * @brief flag to indicate that a DC txe has received its receipt packet + * @brief Flag to indicate that a txe has received a remote + * acknowledgment (RECEIPT or ATOMRSP). + * + * For DC protocols: set in efa_rdm_pke_handle_receipt_recv when + * the RECEIPT packet arrives from the remote. + * For fetch/compare atomics: set in efa_rdm_pke_handle_atomrsp_recv + * when the ATOMRSP packet arrives. * - * This flag is used to track when a delivery complete operation has - * received acknowledgment from the receiver, preventing premature - * completion before all TX operations finish. + * The txe can only be released when both this flag is set AND all + * outstanding TX ops have completed (efa_outstanding_tx_ops == 0). */ -#define EFA_RDM_TXE_RECEIPT_RECEIVED BIT_ULL(16) +#define EFA_RDM_TXE_REMOTE_ACK_RECEIVED BIT_ULL(16) /** * @brief flag to indicate an ope does not need to report completion to user @@ -294,6 +306,16 @@ void efa_rdm_rxe_release_internal(struct efa_rdm_ope *rxe); */ #define EFA_RDM_TXE_NO_COUNTER BIT_ULL(18) +/** + * @brief flag to indicate that efa_rdm_ope_handle_recv_completed was called. + * + * For txe: this means an emulated read protocol received all data from + * the remote. For rxe: this means all data has been received and copied + * to the application buffer. The ope can only be released when both + * this flag is set AND all outstanding TX ops have completed. + */ +#define EFA_RDM_OPE_RECV_COMPLETED BIT_ULL(19) + #define EFA_RDM_OPE_QUEUED_FLAGS (EFA_RDM_OPE_QUEUED_RNR | EFA_RDM_OPE_QUEUED_CTRL | EFA_RDM_OPE_QUEUED_READ | EFA_RDM_OPE_QUEUED_BEFORE_HANDSHAKE) void efa_rdm_ope_try_fill_desc(struct efa_rdm_ope *ope, int mr_iov_start, uint64_t access); @@ -318,24 +340,70 @@ void efa_rdm_ope_handle_recv_completed(struct efa_rdm_ope *ope); void efa_rdm_ope_handle_send_completed(struct efa_rdm_ope *ope); /** - * @brief Check if a delivery complete (DC) TXE is ready for release + * @brief Check if a DC or atomic txe is ready for release. + * + * Used by: DC eager/medium/longcts msg/tag/write, DC CTSDATA, + * FETCH_RTA, COMPARE_RTA. + * + * These protocols require a remote acknowledgment (RECEIPT for DC, + * ATOMRSP for fetch/compare atomics) before the txe can complete. + * The txe is only released when both: + * 1. The remote ack has arrived (EFA_RDM_TXE_REMOTE_ACK_RECEIVED set) + * 2. All packet send completions have arrived (efa_outstanding_tx_ops == 0) + * + * @param[in] txe TX operation entry to check + * @return true if txe is ready for release, false otherwise + */ +static inline bool efa_rdm_txe_with_remote_ack_ready_for_release(struct efa_rdm_ope *txe) +{ + return (txe->efa_outstanding_tx_ops == 0 && + (txe->internal_flags & EFA_RDM_TXE_REMOTE_ACK_RECEIVED)); +} + +/** + * @brief Check if a longcts rxe is ready for release. + * + * Used by: CTS send completion handler for longcts msg/write + * (both DC and non-DC). + * + * The rxe can only be released when all of: + * 1. Recv completed (EFA_RDM_OPE_RECV_COMPLETED set) + * 2. All send completions arrived (efa_outstanding_tx_ops == 0) + * 3. For DC: no ack packet (RECEIPT) is in flight + * (EFA_RDM_RXE_ACK_IN_FLIGHT clear). The ACK_IN_FLIGHT check + * is needed because RECEIPT may be queued (not yet posted), + * in which case efa_outstanding_tx_ops has not been + * incremented yet. + * + * @param[in] rxe RX operation entry to check + * @return true if rxe is ready for release, false otherwise + */ +static inline bool efa_rdm_rxe_cts_ready_for_release(struct efa_rdm_ope *rxe) +{ + return (rxe->efa_outstanding_tx_ops == 0 && + (rxe->internal_flags & EFA_RDM_OPE_RECV_COMPLETED) && + !(rxe->internal_flags & EFA_RDM_RXE_ACK_IN_FLIGHT)); +} + +/** + * @brief Check if an emulated read txe is ready for release. * - * @details - * For DC packets, this function prevents use-after-free race conditions by - * ensuring the TXE is only released when both conditions are met: - * 1. All TX operations have completed (efa_outstanding_tx_ops == 0) - * 2. Receipt packet has been received (EFA_RDM_TXE_RECEIPT_RECEIVED flag set) + * Used by: SHORT_RTR and LONGCTS_RTR send completion handlers, + * and CTS send completion handler for emulated longcts read. * - * This dual-condition check ensures proper synchronization between send - * completions and receipt acknowledgments in the delivery complete protocol. + * In emulated read, the txe posts an RTR (or CTS for longcts read) + * and then receives data from the remote. The txe can only be + * released when both: + * 1. Recv completed (EFA_RDM_OPE_RECV_COMPLETED set) + * 2. All send completions arrived (efa_outstanding_tx_ops == 0) * * @param[in] txe TX operation entry to check - * @return true if TXE is ready for release, false otherwise + * @return true if txe is ready for release, false otherwise */ -static inline bool efa_rdm_txe_dc_ready_for_release(struct efa_rdm_ope *txe) +static inline bool efa_rdm_txe_emulated_read_ready_for_release(struct efa_rdm_ope *txe) { - return (txe->efa_outstanding_tx_ops == 0) && - (txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED); + return (txe->efa_outstanding_tx_ops == 0 && + (txe->internal_flags & EFA_RDM_OPE_RECV_COMPLETED)); } int efa_rdm_ope_prepare_to_post_read(struct efa_rdm_ope *ope); diff --git a/prov/efa/src/rdm/efa_rdm_pke_cmd.c b/prov/efa/src/rdm/efa_rdm_pke_cmd.c index 8976df182bc..0bd4eee50a3 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_cmd.c +++ b/prov/efa/src/rdm/efa_rdm_pke_cmd.c @@ -571,6 +571,7 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) efa_rdm_txe_release(pkt_entry->ope); break; case EFA_RDM_CTS_PKT: + efa_rdm_pke_handle_cts_send_completion(pkt_entry); break; case EFA_RDM_CTSDATA_PKT: efa_rdm_pke_handle_ctsdata_send_completion(pkt_entry); @@ -621,20 +622,23 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) break; case EFA_RDM_SHORT_RTR_PKT: case EFA_RDM_LONGCTS_RTR_PKT: - /* Unlike other protocol, for emulated read, txe - * is released in efa_rdm_ope_handle_recv_completed(). - * Therefore there is nothing to be done here. + /* For emulated read, the txe is released either here + * or in efa_rdm_ope_handle_recv_completed(), whichever + * happens last. Release here if recv already completed. */ + assert(pkt_entry->ope); + if (efa_rdm_txe_emulated_read_ready_for_release(pkt_entry->ope)) + efa_rdm_txe_release(pkt_entry->ope); break; case EFA_RDM_WRITE_RTA_PKT: efa_rdm_pke_handle_write_rta_send_completion(pkt_entry); break; - case EFA_RDM_FETCH_RTA_PKT: - /* no action to be taken here */ - break; - case EFA_RDM_COMPARE_RTA_PKT: - /* no action to be taken here */ - break; + case EFA_RDM_FETCH_RTA_PKT: /* fall through */ + case EFA_RDM_COMPARE_RTA_PKT: /* fall through */ + /* For fetch/compare atomics, the txe is released either + * here or in efa_rdm_pke_handle_atomrsp_recv(), whichever + * happens last. Release here if ATOMRSP already arrived. + */ case EFA_RDM_DC_EAGER_MSGRTM_PKT: case EFA_RDM_DC_EAGER_TAGRTM_PKT: case EFA_RDM_DC_MEDIUM_MSGRTM_PKT: @@ -651,7 +655,7 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) * Only release TXE when both TX ops complete and receipt is received. */ assert(pkt_entry->ope); - if (efa_rdm_txe_dc_ready_for_release(pkt_entry->ope)) + if (efa_rdm_txe_with_remote_ack_ready_for_release(pkt_entry->ope)) efa_rdm_txe_release(pkt_entry->ope); break; case EFA_RDM_READ_NACK_PKT: diff --git a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c index 2ed75f38a00..c3cba1d2a6b 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c +++ b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c @@ -187,6 +187,40 @@ void efa_rdm_pke_handle_cts_sent(struct efa_rdm_pke *pkt_entry) ope->window = efa_rdm_pke_get_cts_hdr(pkt_entry)->recv_length; } +/** + * @brief Handle CTS packet send completion. + * + * CTS can be sent by two different ope types: + * + * 1. rxe for longcts msg/write (DC and non-DC): + * Release if efa_rdm_rxe_cts_ready_for_release(), which + * checks recv completed, ops == 0, and no ACK in flight. + * + * 2. txe for emulated longcts read: + * Release if efa_rdm_txe_emulated_read_ready_for_release(), + * which checks recv completed and ops == 0. + * + * In both cases, the ope is released either here or in + * efa_rdm_ope_handle_recv_completed(), whichever happens last. + * + * @param[in] pkt_entry the CTS packet entry whose send completed + */ +void efa_rdm_pke_handle_cts_send_completion(struct efa_rdm_pke *pkt_entry) +{ + struct efa_rdm_ope *ope; + + ope = pkt_entry->ope; + assert(ope); + if (ope->type == EFA_RDM_RXE) { + if (efa_rdm_rxe_cts_ready_for_release(ope)) + efa_rdm_rxe_release(ope); + } else { + assert(ope->type == EFA_RDM_TXE); + if (efa_rdm_txe_emulated_read_ready_for_release(ope)) + efa_rdm_txe_release(ope); + } +} + void efa_rdm_pke_handle_cts_recv(struct efa_rdm_pke *pkt_entry) { struct efa_rdm_ep *ep; @@ -285,13 +319,16 @@ void efa_rdm_pke_handle_ctsdata_send_completion(struct efa_rdm_pke *pkt_entry) { struct efa_rdm_ope *ope; - /* if this DATA packet is used by a DC protocol, the completion - * was (or will be) written when the receipt packet was received. - * The txe may have already been released. So nothing - * to do (or can be done) here. + /* if this DATA packet is used by a DC protocol, the tx entry should + * be only released when both all TX ops are done and the receipt + * has been received. */ - if (pkt_entry->flags & EFA_RDM_PKE_DC_LONGCTS_DATA) + if (pkt_entry->flags & EFA_RDM_PKE_DC_LONGCTS_DATA) { + assert(pkt_entry->ope); + if (efa_rdm_txe_with_remote_ack_ready_for_release(pkt_entry->ope)) + efa_rdm_txe_release(pkt_entry->ope); return; + } ope = pkt_entry->ope; ope->bytes_acked += efa_rdm_pke_get_ctsdata_hdr(pkt_entry)->seg_length; @@ -545,7 +582,7 @@ void efa_rdm_pke_handle_rma_read_completion(struct efa_rdm_pke *context_pkt_entr efa_rdm_rxe_release(rxe); } - rxe->internal_flags |= EFA_RDM_RXE_EOR_IN_FLIGHT; + rxe->internal_flags |= EFA_RDM_RXE_ACK_IN_FLIGHT; rxe->bytes_received += rxe->bytes_read_completed; rxe->bytes_copied += rxe->bytes_read_completed; if (rxe->bytes_copied == rxe->total_len) { @@ -640,7 +677,7 @@ void efa_rdm_pke_handle_eor_send_completion(struct efa_rdm_pke *pkt_entry) if (rxe->bytes_copied == rxe->total_len) { efa_rdm_rxe_release(rxe); } else { - rxe->internal_flags &= ~EFA_RDM_RXE_EOR_IN_FLIGHT; + rxe->internal_flags &= ~EFA_RDM_RXE_ACK_IN_FLIGHT; } } @@ -764,7 +801,19 @@ void efa_rdm_pke_handle_receipt_send_completion(struct efa_rdm_pke *pkt_entry) struct efa_rdm_ope *rxe; rxe = pkt_entry->ope; - efa_rdm_rxe_release(rxe); + /* + * Clear ACK_IN_FLIGHT so the CTS send completion handler + * (efa_rdm_pke_handle_cts_send_completion) can see that + * no ack is pending and proceed with release. + */ + rxe->internal_flags &= ~EFA_RDM_RXE_ACK_IN_FLIGHT; + /* + * Release the rxe if the CTS send completion has already + * arrived (efa_outstanding_tx_ops == 0). Otherwise, the + * CTS send completion handler will release it. + */ + if (rxe->efa_outstanding_tx_ops == 0) + efa_rdm_rxe_release(rxe); } void efa_rdm_pke_handle_receipt_recv(struct efa_rdm_pke *pkt_entry) @@ -790,11 +839,15 @@ void efa_rdm_pke_handle_receipt_recv(struct efa_rdm_pke *pkt_entry) dlist_remove(&txe->entry); } - /* Set receipt received flag for DC operations */ - txe->internal_flags |= EFA_RDM_TXE_RECEIPT_RECEIVED; - - /* Only release txe if both conditions are met */ - if (efa_rdm_txe_dc_ready_for_release(txe)) + /* + * Mark that the remote ack (RECEIPT) has arrived. + * The txe is released either here or in + * efa_rdm_pke_handle_send_completion() for the DC + * request/CTSDATA packet, whichever happens last. + * Release here if the send completion already arrived. + */ + txe->internal_flags |= EFA_RDM_TXE_REMOTE_ACK_RECEIVED; + if (efa_rdm_txe_with_remote_ack_ready_for_release(txe)) efa_rdm_txe_release(txe); efa_rdm_pke_release_rx(pkt_entry); @@ -865,6 +918,15 @@ void efa_rdm_pke_handle_atomrsp_recv(struct efa_rdm_pke *pkt_entry) else efa_cntr_report_tx_completion(&pkt_entry->ep->base_ep.util_ep, txe->cq_entry.flags); - efa_rdm_txe_release(txe); + /* + * Mark that the remote response (ATOMRSP) has arrived. + * The txe is released either here or in + * efa_rdm_pke_handle_send_completion() for the + * FETCH_RTA/COMPARE_RTA packet, whichever happens last. + * Release here if the send completion already arrived. + */ + txe->internal_flags |= EFA_RDM_TXE_REMOTE_ACK_RECEIVED; + if (efa_rdm_txe_with_remote_ack_ready_for_release(txe)) + efa_rdm_txe_release(txe); efa_rdm_pke_release_rx(pkt_entry); } diff --git a/prov/efa/src/rdm/efa_rdm_pke_nonreq.h b/prov/efa/src/rdm/efa_rdm_pke_nonreq.h index d34717a6e6b..47e5bc0d123 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_nonreq.h +++ b/prov/efa/src/rdm/efa_rdm_pke_nonreq.h @@ -135,6 +135,8 @@ ssize_t efa_rdm_pke_init_cts(struct efa_rdm_pke *pkt_entry, void efa_rdm_pke_handle_cts_sent(struct efa_rdm_pke *pkt_entry); +void efa_rdm_pke_handle_cts_send_completion(struct efa_rdm_pke *pkt_entry); + void efa_rdm_pke_handle_cts_recv(struct efa_rdm_pke *pkt_entry); static inline diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index 1972611fc01..d6ebb37508c 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -314,6 +314,7 @@ void test_rdm_cq_handshake_bad_send_status_impl(struct efa_resource **state, int txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); assert_non_null(txe); txe->internal_flags |= EFA_RDM_OPE_INTERNAL; + txe->efa_outstanding_tx_ops = 1; pkt_entry->ope = txe; pkt_entry->peer = peer; diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index e532c0813a9..eaec6978265 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -1969,6 +1969,7 @@ void test_efa_rdm_ep_outstanding_tx_ops_decremented_with_error_completion(struct txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); assert_non_null(txe); txe->internal_flags |= EFA_RDM_OPE_INTERNAL; + txe->efa_outstanding_tx_ops = 1; pkt_entry->ope = txe; pkt_entry->peer = peer; diff --git a/prov/efa/test/efa_unit_test_ope.c b/prov/efa/test/efa_unit_test_ope.c index 269f5424375..b3b46a72ea7 100644 --- a/prov/efa/test/efa_unit_test_ope.c +++ b/prov/efa/test/efa_unit_test_ope.c @@ -1296,91 +1296,126 @@ void test_efa_rdm_atomic_compare_desc_persistence(struct efa_resource **state) * @param[in] send_first If true, send completion happens first; if false, receipt first * @param[in] txe_in_send_state If true, TXE is in EFA_RDM_OPE_SEND state; if false, different state */ -static void test_efa_rdm_txe_dc_release_common(struct efa_resource *resource, bool send_first, bool txe_in_send_state) +/** + * @brief Common test for txe release ordering when response/ack arrives + * + * This tests that a txe is only released when both: + * 1. Response/ack received (EFA_RDM_TXE_REMOTE_ACK_RECEIVED set) + * 2. All TX ops completed (efa_outstanding_tx_ops == 0) + * + * @param[in] resource test resource + * @param[in] send_first if true, send completion arrives before response + * @param[in] pkt_type request packet type to test + */ +static void test_efa_rdm_txe_with_resp_release_common(struct efa_resource *resource, + bool send_first, int pkt_type) { struct efa_rdm_ep *efa_rdm_ep; struct efa_rdm_ope *txe; - struct efa_rdm_pke *dc_pkt_entry, *receipt_pkt_entry; - struct efa_rdm_receipt_hdr *receipt_hdr; + struct efa_rdm_pke *req_pkt_entry, *resp_pkt_entry; efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); - efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); - /* Allocate TXE and set up for DC operation */ - txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); + /* Allocate TXE based on protocol */ + if (pkt_type == EFA_RDM_SHORT_RTR_PKT || pkt_type == EFA_RDM_LONGCTS_RTR_PKT) { + txe = efa_unit_test_alloc_txe(resource, ofi_op_read_req); + txe->cq_entry.flags = FI_READ; + /* Set len >= total_len to avoid truncation error path */ + txe->cq_entry.len = 1000; + /* Non-zero total_len so bytes_copied != total_len initially */ + txe->total_len = 1000; + txe->bytes_copied = 0; + /* Ensure CQ entry is written by efa_rdm_txe_report_completion */ + txe->fi_flags |= FI_COMPLETION; + } else if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) { + txe = efa_unit_test_alloc_txe(resource, ofi_op_atomic); + txe->cq_entry.flags = FI_ATOMIC | FI_READ; + } else { + /* DC protocols */ + txe = efa_unit_test_alloc_txe(resource, ofi_op_msg); + txe->internal_flags |= EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED; + } assert_non_null(txe); - txe->internal_flags |= EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED; txe->efa_outstanding_tx_ops = 1; - if (txe_in_send_state) { - /* Add TXE to ope_longcts_send_list to simulate active longcts send */ + /* Set txe state based on packet type */ + if (pkt_type == EFA_RDM_CTSDATA_PKT) { txe->state = EFA_RDM_OPE_SEND; dlist_insert_tail(&txe->entry, &efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list); - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 1); } else { - /* TXE is not in SEND state (e.g., non-long-cts TXE) */ txe->state = EFA_RDM_TXE_REQ; - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); } - /* Create fake DC packet entry */ - dc_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL); - assert_non_null(dc_pkt_entry); - dc_pkt_entry->ope = txe; - dc_pkt_entry->ep = efa_rdm_ep; - dc_pkt_entry->peer = txe->peer; - /* Set DC packet type in wiredata */ - struct efa_rdm_base_hdr *base_hdr = (struct efa_rdm_base_hdr *)dc_pkt_entry->wiredata; - base_hdr->type = EFA_RDM_DC_EAGER_MSGRTM_PKT; - - /* Create fake receipt packet entry */ - receipt_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL); - assert_non_null(receipt_pkt_entry); - receipt_pkt_entry->ope = txe; - receipt_pkt_entry->ep = efa_rdm_ep; - /* Set tx_id so efa_rdm_pke_handle_receipt_recv can look up the txe */ - receipt_hdr = efa_rdm_pke_get_receipt_hdr(receipt_pkt_entry); - receipt_hdr->tx_id = txe->tx_id; - - /* Verify TXE is not ready for release initially */ - assert_false(efa_rdm_txe_dc_ready_for_release(txe)); + /* Create request packet entry */ + req_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL); + assert_non_null(req_pkt_entry); + req_pkt_entry->ope = txe; + req_pkt_entry->ep = efa_rdm_ep; + req_pkt_entry->peer = txe->peer; + struct efa_rdm_base_hdr *req_hdr = (struct efa_rdm_base_hdr *)req_pkt_entry->wiredata; + req_hdr->type = pkt_type; + if (pkt_type == EFA_RDM_CTSDATA_PKT) { + req_pkt_entry->flags |= EFA_RDM_PKE_DC_LONGCTS_DATA; + struct efa_rdm_ctsdata_hdr *ctsdata_hdr = efa_rdm_pke_get_ctsdata_hdr(req_pkt_entry); + ctsdata_hdr->seg_length = 0; + } + + /* Create response packet entry (not needed for RTR which uses efa_rdm_ope_handle_recv_completed) */ + if (pkt_type != EFA_RDM_SHORT_RTR_PKT && pkt_type != EFA_RDM_LONGCTS_RTR_PKT) { + resp_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL); + assert_non_null(resp_pkt_entry); + resp_pkt_entry->ope = txe; + resp_pkt_entry->ep = efa_rdm_ep; + if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) { + struct efa_rdm_atomrsp_pkt *atomrsp_pkt = (struct efa_rdm_atomrsp_pkt *)resp_pkt_entry->wiredata; + atomrsp_pkt->hdr.type = EFA_RDM_ATOMRSP_PKT; + atomrsp_pkt->hdr.recv_id = txe->tx_id; + atomrsp_pkt->hdr.seg_length = 0; + txe->atomic_ex.resp_iov_count = 0; + } else { + /* DC protocols use RECEIPT as response */ + struct efa_rdm_receipt_hdr *receipt_hdr = efa_rdm_pke_get_receipt_hdr(resp_pkt_entry); + receipt_hdr->tx_id = txe->tx_id; + } + } + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); if (send_first) { /* Send completion first - should not release TXE yet */ - efa_rdm_pke_handle_send_completion(dc_pkt_entry); + efa_rdm_pke_handle_send_completion(req_pkt_entry); assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); - assert_false(efa_rdm_txe_dc_ready_for_release(txe)); - if (txe_in_send_state) { - /* TXE should still be in ope_longcts_send_list */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 1); - assert_int_equal(txe->state, EFA_RDM_OPE_SEND); - } else { - /* Non-long-cts TXE should not be in the list */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); - assert_int_equal(txe->state, EFA_RDM_TXE_REQ); - } - /* Receipt handling - should set flag and release TXE */ - efa_rdm_pke_handle_receipt_recv(receipt_pkt_entry); - if (txe_in_send_state) { - /* Should remove from list */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); + /* Response arrives - should release TXE now */ + if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) { + efa_rdm_pke_handle_atomrsp_recv(resp_pkt_entry); + } else if (pkt_type == EFA_RDM_SHORT_RTR_PKT || pkt_type == EFA_RDM_LONGCTS_RTR_PKT) { + /* Simulate all read data received and copied */ + txe->bytes_received = txe->total_len; + txe->bytes_copied = txe->total_len; + efa_rdm_ope_handle_recv_completed(txe); + } else { + efa_rdm_pke_handle_receipt_recv(resp_pkt_entry); } } else { - /* Receipt handling first - should set flag but not release TXE yet */ - efa_rdm_pke_handle_receipt_recv(receipt_pkt_entry); - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); - assert_true(txe->internal_flags & EFA_RDM_TXE_RECEIPT_RECEIVED); - assert_false(efa_rdm_txe_dc_ready_for_release(txe)); - if (txe_in_send_state) { - /* TXE should be removed from ope_longcts_send_list immediately */ - assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep_domain(efa_rdm_ep)->ope_longcts_send_list), 0); + /* Response arrives first - should not release TXE yet */ + if (pkt_type == EFA_RDM_FETCH_RTA_PKT || pkt_type == EFA_RDM_COMPARE_RTA_PKT) { + efa_rdm_pke_handle_atomrsp_recv(resp_pkt_entry); + assert_true(txe->internal_flags & EFA_RDM_TXE_REMOTE_ACK_RECEIVED); + } else if (pkt_type == EFA_RDM_SHORT_RTR_PKT || pkt_type == EFA_RDM_LONGCTS_RTR_PKT) { + /* Simulate all read data received and copied */ + txe->bytes_received = txe->total_len; + txe->bytes_copied = txe->total_len; + efa_rdm_ope_handle_recv_completed(txe); + } else { + efa_rdm_pke_handle_receipt_recv(resp_pkt_entry); + assert_true(txe->internal_flags & EFA_RDM_TXE_REMOTE_ACK_RECEIVED); } + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); - /* Send completion - should now release TXE */ - efa_rdm_pke_handle_send_completion(dc_pkt_entry); + /* Send completion - should release TXE now */ + efa_rdm_pke_handle_send_completion(req_pkt_entry); } /* Verify TXE is released */ @@ -1395,9 +1430,9 @@ static void test_efa_rdm_txe_dc_release_common(struct efa_resource *resource, bo * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_send_first(struct efa_resource **state) +void test_efa_rdm_txe_dc_ctsdata_send_first(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, true, true); + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_CTSDATA_PKT); } /** @@ -1409,9 +1444,9 @@ void test_efa_rdm_txe_dc_send_first(struct efa_resource **state) * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_receipt_first(struct efa_resource **state) +void test_efa_rdm_txe_dc_ctsdata_resp_first(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, false, true); + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_CTSDATA_PKT); } /** @@ -1422,22 +1457,323 @@ void test_efa_rdm_txe_dc_receipt_first(struct efa_resource **state) * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_send_first_non_longcts(struct efa_resource **state) +void test_efa_rdm_txe_dc_eager_rtm_send_first(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, true, false); + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_DC_EAGER_MSGRTM_PKT); } /** * @brief Test DC packet TXE release with receipt completion first (TXE not in SEND state) * * This test verifies the bug fix where non-long-cts TXEs get the - * EFA_RDM_TXE_RECEIPT_RECEIVED flag set, allowing proper release. + * EFA_RDM_TXE_REMOTE_ACK_RECEIVED flag set, allowing proper release. * * @param[in] state cmocka state variable */ -void test_efa_rdm_txe_dc_receipt_first_non_longcts(struct efa_resource **state) +void test_efa_rdm_txe_dc_eager_rtm_resp_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_DC_EAGER_MSGRTM_PKT); +} + +/** + * @brief Test SHORT_RTR txe release: send completion before recv completed + */ +void test_efa_rdm_txe_short_rtr_send_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_SHORT_RTR_PKT); +} + +/** + * @brief Test SHORT_RTR txe release: recv completed before send completion + */ +void test_efa_rdm_txe_short_rtr_resp_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_SHORT_RTR_PKT); +} + +/** + * @brief Test FETCH_RTA txe release: send completion before ATOMRSP + */ +void test_efa_rdm_txe_fetch_rta_send_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_FETCH_RTA_PKT); +} + +/** + * @brief Test FETCH_RTA txe release: ATOMRSP before send completion + */ +void test_efa_rdm_txe_fetch_rta_resp_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_FETCH_RTA_PKT); +} + +/** + * @brief Test COMPARE_RTA txe release: send completion before ATOMRSP + */ +void test_efa_rdm_txe_compare_rta_send_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, true, EFA_RDM_COMPARE_RTA_PKT); +} + +/** + * @brief Test COMPARE_RTA txe release: ATOMRSP before send completion + */ +void test_efa_rdm_txe_compare_rta_resp_first(struct efa_resource **state) +{ + test_efa_rdm_txe_with_resp_release_common(*state, false, EFA_RDM_COMPARE_RTA_PKT); +} + +/** + * @brief Common test for longcts ope release ordering with CTS send completion + * + * In the longcts protocol, the ope sends a CTS packet and then receives + * CTSDATA. The ope can only be released when both: + * 1. All data has been received (bytes_received == total_len) + * 2. All TX ops have completed (efa_outstanding_tx_ops == 0), i.e. + * the CTS send completion has arrived. + * + * CTS can be sent by rxe (longcts msg/write) or txe (emulated longcts read). + * + * @param[in] resource test resource + * @param[in] send_first if true, CTS send completion arrives before recv completed + * @param[in] op operation type (ofi_op_msg, ofi_op_write, ofi_op_read_req) + */ +static void test_efa_rdm_ope_longcts_cts_release_common(struct efa_resource *resource, + bool send_first, uint32_t op) +{ + struct efa_rdm_ep *efa_rdm_ep; + struct efa_rdm_ope *ope; + struct efa_rdm_pke *cts_pkt_entry; + bool is_txe; + + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); + efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); + + /* Emulated longcts read uses txe, msg/write uses rxe */ + is_txe = (op == ofi_op_read_req); + + if (is_txe) { + ope = efa_unit_test_alloc_txe(resource, op); + ope->cq_entry.flags = FI_READ; + } else { + ope = efa_unit_test_alloc_rxe(resource, op); + } + assert_non_null(ope); + ope->efa_outstanding_tx_ops = 1; /* CTS packet in flight */ + ope->total_len = 1000; + ope->bytes_received = 0; + ope->bytes_copied = 0; + if (is_txe) + ope->state = EFA_RDM_TXE_REQ; + else + ope->state = EFA_RDM_RXE_RECV; + + /* Create fake CTS packet entry */ + cts_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL); + assert_non_null(cts_pkt_entry); + cts_pkt_entry->ope = ope; + cts_pkt_entry->ep = efa_rdm_ep; + cts_pkt_entry->peer = ope->peer; + struct efa_rdm_base_hdr *cts_hdr = (struct efa_rdm_base_hdr *)cts_pkt_entry->wiredata; + cts_hdr->type = EFA_RDM_CTS_PKT; + + if (is_txe) + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); + else + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 1); + + if (send_first) { + /* CTS send completion first - recv not done, should not release */ + efa_rdm_pke_handle_send_completion(cts_pkt_entry); + assert_int_equal(ope->efa_outstanding_tx_ops, 0); + if (is_txe) + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); + else + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 1); + + /* Simulate recv completed */ + ope->bytes_received = ope->total_len; + ope->bytes_copied = ope->total_len; + efa_rdm_ope_handle_recv_completed(ope); + } else { + /* Simulate recv completed first - CTS still outstanding */ + ope->bytes_received = ope->total_len; + ope->bytes_copied = ope->total_len; + efa_rdm_ope_handle_recv_completed(ope); + /* ope should NOT be released yet */ + if (is_txe) + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 1); + else + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 1); + + /* CTS send completion - should release ope now */ + efa_rdm_pke_handle_send_completion(cts_pkt_entry); + } + + /* Verify ope is released */ + if (is_txe) + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->txe_list), 0); + else + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 0); +} + +/** + * @brief Test longcts msg rxe release: CTS send completion before recv completed + */ +void test_efa_rdm_rxe_longcts_msg_cts_send_first(struct efa_resource **state) +{ + test_efa_rdm_ope_longcts_cts_release_common(*state, true, ofi_op_msg); +} + +/** + * @brief Test longcts msg rxe release: recv completed before CTS send completion + */ +void test_efa_rdm_rxe_longcts_msg_cts_recv_first(struct efa_resource **state) +{ + test_efa_rdm_ope_longcts_cts_release_common(*state, false, ofi_op_msg); +} + +/** + * @brief Test longcts write rxe release: CTS send completion before recv completed + */ +void test_efa_rdm_rxe_longcts_write_cts_send_first(struct efa_resource **state) +{ + test_efa_rdm_ope_longcts_cts_release_common(*state, true, ofi_op_write); +} + +/** + * @brief Test longcts write rxe release: recv completed before CTS send completion + */ +void test_efa_rdm_rxe_longcts_write_cts_recv_first(struct efa_resource **state) +{ + test_efa_rdm_ope_longcts_cts_release_common(*state, false, ofi_op_write); +} + +/** + * @brief Test emulated longcts read txe release: CTS send completion before recv completed + */ +void test_efa_rdm_txe_longcts_read_cts_send_first(struct efa_resource **state) +{ + test_efa_rdm_ope_longcts_cts_release_common(*state, true, ofi_op_read_req); +} + +/** + * @brief Test emulated longcts read txe release: recv completed before CTS send completion + */ +void test_efa_rdm_txe_longcts_read_cts_recv_first(struct efa_resource **state) +{ + test_efa_rdm_ope_longcts_cts_release_common(*state, false, ofi_op_read_req); +} + +/** + * @brief Test DC longcts write rxe release: RECEIPT send completion before CTS + * + * In DC longcts write, the receiver rxe sends CTS, receives CTSDATA, + * then posts a RECEIPT. If the RECEIPT send completion arrives before + * the CTS send completion, the rxe must not be released until all + * outstanding TX ops complete. + */ +/** + * @brief Common test for DC longcts write rxe release with CTS and RECEIPT + * + * In DC longcts write, the receiver rxe sends CTS, receives CTSDATA, + * then posts a RECEIPT via efa_rdm_ope_handle_recv_completed. The rxe + * can only be released when all outstanding TX ops (CTS + RECEIPT) + * have completed. + * + * @param[in] resource test resource + * @param[in] cts_first if true, CTS send completion arrives before RECEIPT + */ +static void test_efa_rdm_rxe_dc_longcts_write_cts_receipt_order_common( + struct efa_resource *resource, bool cts_first) +{ + struct efa_rdm_ep *efa_rdm_ep; + struct efa_rdm_ope *rxe; + struct efa_rdm_pke *cts_pkt_entry; + + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); + efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); + + /* Allocate RXE for DC longcts write receive */ + rxe = efa_unit_test_alloc_rxe(resource, ofi_op_write); + assert_non_null(rxe); + rxe->internal_flags |= EFA_RDM_TXE_DELIVERY_COMPLETE_REQUESTED; + /* 1 outstanding TX op: CTS in flight */ + rxe->efa_outstanding_tx_ops = 1; + rxe->total_len = 1000; + rxe->cq_entry.len = rxe->total_len; + rxe->bytes_received = rxe->total_len; + rxe->bytes_copied = rxe->total_len; + rxe->state = EFA_RDM_RXE_RECV; + + /* Create fake CTS packet entry */ + cts_pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_tx_pkt_pool, EFA_RDM_PKE_FROM_EFA_TX_POOL); + assert_non_null(cts_pkt_entry); + cts_pkt_entry->ope = rxe; + cts_pkt_entry->ep = efa_rdm_ep; + cts_pkt_entry->peer = rxe->peer; + struct efa_rdm_base_hdr *cts_hdr = (struct efa_rdm_base_hdr *)cts_pkt_entry->wiredata; + cts_hdr->type = EFA_RDM_CTS_PKT; + + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 1); + + /* + * Simulate recv completed: efa_rdm_ope_handle_recv_completed will + * post a RECEIPT packet (because DC is requested) and set + * EFA_RDM_OPE_RECV_COMPLETED. Mock efa_qp_post_send so the + * RECEIPT posting succeeds. + */ + g_efa_unit_test_mocks.efa_qp_post_send = &efa_mock_efa_qp_post_send_return_mock; + will_return(efa_mock_efa_qp_post_send_return_mock, 0); + efa_rdm_ope_handle_recv_completed(rxe); + + /* rxe should NOT be released: CTS + RECEIPT outstanding */ + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 1); + assert_true(rxe->internal_flags & EFA_RDM_OPE_RECV_COMPLETED); + assert_true(rxe->internal_flags & EFA_RDM_RXE_ACK_IN_FLIGHT); + assert_int_equal(rxe->efa_outstanding_tx_ops, 2); + + /* Get the RECEIPT pkt entry from the mocked post */ + struct efa_rdm_pke *receipt_pkt_entry = efa_rdm_ep->send_pkt_entry_vec[0]; + + if (cts_first) { + /* CTS send completion first - RECEIPT still outstanding */ + efa_rdm_pke_handle_send_completion(cts_pkt_entry); + assert_int_equal(rxe->efa_outstanding_tx_ops, 1); + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 1); + + /* RECEIPT send completion - should release rxe now */ + efa_rdm_pke_handle_send_completion(receipt_pkt_entry); + } else { + /* RECEIPT send completion first - CTS still outstanding */ + efa_rdm_pke_handle_send_completion(receipt_pkt_entry); + /* Now we shouldn't have such flag */ + assert_false(rxe->internal_flags & EFA_RDM_RXE_ACK_IN_FLIGHT); + assert_int_equal(rxe->efa_outstanding_tx_ops, 1); + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 1); + + /* CTS send completion - should release rxe now */ + efa_rdm_pke_handle_send_completion(cts_pkt_entry); + } + + /* Verify rxe is released */ + assert_int_equal(efa_unit_test_get_dlist_length(&efa_rdm_ep->rxe_list), 0); +} + +/** + * @brief Test DC longcts write: CTS send completion before RECEIPT + */ +void test_efa_rdm_rxe_dc_longcts_write_cts_before_receipt(struct efa_resource **state) +{ + test_efa_rdm_rxe_dc_longcts_write_cts_receipt_order_common(*state, true); +} + +/** + * @brief Test DC longcts write: RECEIPT send completion before CTS + */ +void test_efa_rdm_rxe_dc_longcts_write_receipt_before_cts(struct efa_resource **state) { - test_efa_rdm_txe_dc_release_common(*state, false, false); + test_efa_rdm_rxe_dc_longcts_write_cts_receipt_order_common(*state, false); } /* RDM MSG 0-byte tests */ diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 15fd1c344d8..a5aef35143b 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -343,10 +343,24 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rdm_ope_eor_packet_failed_posting, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ope_eor_packet_tracking_unresponsive_wait_send, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_atomic_compare_desc_persistence, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_receipt_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_send_first_non_longcts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_receipt_first_non_longcts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_ctsdata_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_ctsdata_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_eager_rtm_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_dc_eager_rtm_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_short_rtr_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_short_rtr_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_fetch_rta_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_fetch_rta_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_compare_rta_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_compare_rta_resp_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_rxe_longcts_msg_cts_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_rxe_longcts_msg_cts_recv_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_rxe_longcts_write_cts_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_rxe_longcts_write_cts_recv_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_longcts_read_cts_send_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_txe_longcts_read_cts_recv_first, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_rxe_dc_longcts_write_cts_before_receipt, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rdm_rxe_dc_longcts_write_receipt_before_cts, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), /* end of efa_unit_test_ope.c */ cmocka_unit_test_setup_teardown(test_efa_rdm_msg_send_to_local_peer_with_null_desc, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 4ee4991130c..9c77fb553d1 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -295,10 +295,24 @@ void test_efa_rdm_ope_eor_packet_tracking_wait_send(); void test_efa_rdm_ope_eor_packet_failed_posting(); void test_efa_rdm_ope_eor_packet_tracking_unresponsive_wait_send(); void test_efa_rdm_atomic_compare_desc_persistence(); -void test_efa_rdm_txe_dc_send_first(); -void test_efa_rdm_txe_dc_receipt_first(); -void test_efa_rdm_txe_dc_send_first_non_longcts(); -void test_efa_rdm_txe_dc_receipt_first_non_longcts(); +void test_efa_rdm_txe_dc_ctsdata_send_first(); +void test_efa_rdm_txe_dc_ctsdata_resp_first(); +void test_efa_rdm_txe_dc_eager_rtm_send_first(); +void test_efa_rdm_txe_dc_eager_rtm_resp_first(); +void test_efa_rdm_txe_short_rtr_send_first(); +void test_efa_rdm_txe_short_rtr_resp_first(); +void test_efa_rdm_txe_fetch_rta_send_first(); +void test_efa_rdm_txe_fetch_rta_resp_first(); +void test_efa_rdm_txe_compare_rta_send_first(); +void test_efa_rdm_txe_compare_rta_resp_first(); +void test_efa_rdm_rxe_longcts_msg_cts_send_first(); +void test_efa_rdm_rxe_longcts_msg_cts_recv_first(); +void test_efa_rdm_rxe_longcts_write_cts_send_first(); +void test_efa_rdm_rxe_longcts_write_cts_recv_first(); +void test_efa_rdm_txe_longcts_read_cts_send_first(); +void test_efa_rdm_txe_longcts_read_cts_recv_first(); +void test_efa_rdm_rxe_dc_longcts_write_cts_before_receipt(); +void test_efa_rdm_rxe_dc_longcts_write_receipt_before_cts(); /* end of efa_unit_test_ope.c */