-
Notifications
You must be signed in to change notification settings - Fork 503
prov/efa: Introduce new protocol interface and implement it for the eager protocol in the TX path #12144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
prov/efa: Introduce new protocol interface and implement it for the eager protocol in the TX path #12144
Changes from all commits
ed70939
de44e9f
1254b4f
188de26
5a04d6c
6c27b95
5cb4d31
ba44866
7a21c1f
614580d
3b03182
588b40c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,9 @@ | |
| #include "efa_rdm_pke_utils.h" | ||
| #include "efa_rdm_pke_req.h" | ||
|
|
||
| #include "efa_mr.h" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we only active the new code in this commit, should the earlier assertion in the switch cases added only in this commit? |
||
| #include "efa_rdm_proto.h" | ||
| #include "efa_rdm_proto_eager.h" | ||
| #include "efa_rdm_tracepoint.h" | ||
|
|
||
| /** | ||
|
|
@@ -102,14 +105,42 @@ int efa_rdm_msg_select_rtm(struct efa_rdm_ep *efa_rdm_ep, struct efa_rdm_ope *tx | |
| } | ||
|
|
||
| /** | ||
| * @brief post RTM packet(s) for a send operation | ||
| * @brief Post an already-filled TXE using the new protocol path. | ||
| * | ||
| * Used by the retry path after handshake completes and by the normal | ||
| * send path. The TXE must already be filled by efa_rdm_proto_txe_fill. | ||
| */ | ||
| ssize_t efa_rdm_msg_post_rtm_proto(struct efa_rdm_ep *ep, | ||
| struct efa_rdm_ope *txe, | ||
| struct efa_rdm_proto *proto) | ||
| { | ||
| ssize_t err; | ||
| uint64_t pke_send_flags = 0; | ||
|
|
||
| err = proto->construct_tx_pkes( | ||
| ep, txe->peer, NULL, txe->op, txe->tag, | ||
| txe->fi_flags, txe->internal_flags, txe); | ||
| if (err) | ||
| return err; | ||
|
|
||
| err = efa_rdm_pke_sendv(ep->send_pkt_entry_vec, | ||
| ep->send_pkt_entry_vec_size, | ||
| pke_send_flags); | ||
| if (err) | ||
| return err; | ||
|
|
||
| proto->handle_tx_pkes_posted(ep, txe); | ||
| return FI_SUCCESS; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Post a RTM packet for a TX entry using the old code path. | ||
| * | ||
| * @param[in,out] ep endpoint | ||
| * @param[in,out] txe information of the send operation. | ||
| * @retval 0 if packet(s) was posted successfully. | ||
| * @retval -FI_ENOSUPP if the send operation requires an extra feature, | ||
| * which peer does not support. | ||
| * @retval -FI_EAGAIN for temporary out of resources for send | ||
| */ | ||
| ssize_t efa_rdm_msg_post_rtm(struct efa_rdm_ep *ep, struct efa_rdm_ope *txe) | ||
| { | ||
|
|
@@ -168,6 +199,7 @@ ssize_t efa_rdm_msg_generic_send(struct efa_rdm_ep *ep, const struct fi_msg *msg | |
| struct efa_rdm_ope *txe; | ||
| struct efa_rdm_peer *peer; | ||
| size_t available_tx_pkts; | ||
| struct efa_rdm_proto *proto; | ||
|
|
||
| efa_rdm_tracepoint(send_begin_msg_context, | ||
| (size_t) msg->context, (size_t) msg->addr); | ||
|
|
@@ -196,6 +228,47 @@ ssize_t efa_rdm_msg_generic_send(struct efa_rdm_ep *ep, const struct fi_msg *msg | |
| goto out; | ||
| } | ||
|
|
||
| /* First try to use the refactored code path */ | ||
| err = efa_rdm_proto_select_send_protocol(ep, peer, msg, op, fi_flags, txe, | ||
| &proto); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please split the new and old paths to two different functions |
||
| if (err) | ||
| goto out; | ||
|
|
||
| /* If a protocol is found, use it. Otherwise, fall back to the old code | ||
| * path */ | ||
| if (proto) { | ||
| efa_rdm_proto_txe_fill(txe, ep, peer, msg, op, tag, fi_flags, | ||
| internal_flags); | ||
| txe->msg_id = peer->next_msg_id++; | ||
|
|
||
| /* | ||
| * For backwards compatibility: if the peer may have zero-copy | ||
| * receive enabled, we must complete handshake before sending so | ||
| * we can discover the peer's user_recv_qp and route packets | ||
| * accordingly. | ||
| */ | ||
| if (ep->peer_may_have_zcpy_rx && | ||
| !(peer->flags & EFA_RDM_PEER_HANDSHAKE_RECEIVED)) { | ||
| err = efa_rdm_ep_enforce_handshake_for_txe(ep, txe); | ||
| if (err) { | ||
| efa_rdm_txe_release(txe); | ||
| peer->next_msg_id--; | ||
| } | ||
| goto out; | ||
| } | ||
|
|
||
| err = efa_rdm_msg_post_rtm_proto(ep, txe, proto); | ||
| if (err) { | ||
| efa_rdm_txe_release(txe); | ||
| peer->next_msg_id--; | ||
| goto out; | ||
| } | ||
|
|
||
| peer->flags |= EFA_RDM_PEER_REQ_SENT; | ||
| goto out; | ||
| } | ||
|
|
||
| /* Fallback to the old code path */ | ||
| efa_rdm_txe_construct(txe, ep, peer, msg, op, fi_flags, internal_flags); | ||
| if (op == ofi_op_tagged) { | ||
| txe->cq_entry.tag = tag; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,12 +91,8 @@ int efa_rdm_pke_fill_data(struct efa_rdm_pke *pkt_entry, | |
| ret = efa_rdm_pke_init_receipt(pkt_entry, ope); | ||
| break; | ||
| case EFA_RDM_EAGER_MSGRTM_PKT: | ||
| assert(data_offset == 0 && data_size == -1); | ||
| ret = efa_rdm_pke_init_eager_msgrtm(pkt_entry, ope); | ||
| break; | ||
| case EFA_RDM_EAGER_TAGRTM_PKT: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it not an opportunity to united these cases?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This entire function will be removed when the refactor is complete, so I'm not sure it matters |
||
| assert(data_offset == 0 && data_size == -1); | ||
| ret = efa_rdm_pke_init_eager_tagrtm(pkt_entry, ope); | ||
| assert(0 && "Eager protocol moved to refactored code path"); | ||
| break; | ||
| case EFA_RDM_MEDIUM_MSGRTM_PKT: | ||
| assert(data_offset >= 0 && data_size > 0); | ||
|
|
@@ -173,12 +169,8 @@ int efa_rdm_pke_fill_data(struct efa_rdm_pke *pkt_entry, | |
| ret = efa_rdm_pke_init_compare_rta(pkt_entry, ope); | ||
| break; | ||
| case EFA_RDM_DC_EAGER_MSGRTM_PKT: | ||
| assert(data_offset == 0 && data_size == -1); | ||
| ret = efa_rdm_pke_init_dc_eager_msgrtm(pkt_entry, ope); | ||
| break; | ||
| case EFA_RDM_DC_EAGER_TAGRTM_PKT: | ||
| assert(data_offset == 0 && data_size == -1); | ||
| ret = efa_rdm_pke_init_dc_eager_tagrtm(pkt_entry, ope); | ||
| assert(0 && "Eager protocol moved to refactored code path"); | ||
| break; | ||
| case EFA_RDM_DC_MEDIUM_MSGRTM_PKT: | ||
| assert(data_offset >= 0 && data_size > 0); | ||
|
|
@@ -266,7 +258,7 @@ void efa_rdm_pke_handle_sent(struct efa_rdm_pke *pkt_entry, int pkt_type, struct | |
| break; | ||
| case EFA_RDM_EAGER_MSGRTM_PKT: | ||
| case EFA_RDM_EAGER_TAGRTM_PKT: | ||
| /* nothing to do */ | ||
| assert(0 && "Eager protocol moved to refactored code path"); | ||
| break; | ||
| case EFA_RDM_MEDIUM_MSGRTM_PKT: | ||
| case EFA_RDM_MEDIUM_TAGRTM_PKT: | ||
|
|
@@ -310,8 +302,10 @@ void efa_rdm_pke_handle_sent(struct efa_rdm_pke *pkt_entry, int pkt_type, struct | |
| break; | ||
| case EFA_RDM_DC_EAGER_MSGRTM_PKT: | ||
| case EFA_RDM_DC_EAGER_TAGRTM_PKT: | ||
| assert(0 && "Eager protocol moved to refactored code path"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not a packet construction ? seems it should be part of your next commit |
||
| break; | ||
| case EFA_RDM_DC_EAGER_RTW_PKT: | ||
| /* nothing to do for DC EAGER RTM/RTW */ | ||
| /* nothing to do for DC EAGER RTW */ | ||
| break; | ||
| case EFA_RDM_CTSDATA_PKT: | ||
| efa_rdm_pke_handle_ctsdata_sent(pkt_entry); | ||
|
|
@@ -553,13 +547,6 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) | |
| return; | ||
| } | ||
|
|
||
| /* These pkts are eager pkts withour hdrs */ | ||
| if (pkt_entry->flags & EFA_RDM_PKE_SEND_TO_USER_RECV_QP) { | ||
| efa_rdm_pke_handle_eager_rtm_send_completion(pkt_entry); | ||
| efa_rdm_pke_release_tx(pkt_entry); | ||
| return; | ||
| } | ||
|
|
||
| /* Start handling pkts with hdrs */ | ||
| switch (efa_rdm_pkt_type_of(pkt_entry)) { | ||
| case EFA_RDM_HANDSHAKE_PKT: | ||
|
|
@@ -592,7 +579,7 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) | |
| break; | ||
| case EFA_RDM_EAGER_MSGRTM_PKT: | ||
| case EFA_RDM_EAGER_TAGRTM_PKT: | ||
| efa_rdm_pke_handle_eager_rtm_send_completion(pkt_entry); | ||
| assert(0 && "Eager protocol moved to refactored code path"); | ||
| break; | ||
| case EFA_RDM_MEDIUM_MSGRTM_PKT: | ||
| case EFA_RDM_MEDIUM_TAGRTM_PKT: | ||
|
|
@@ -637,6 +624,8 @@ void efa_rdm_pke_handle_send_completion(struct efa_rdm_pke *pkt_entry) | |
| break; | ||
| case EFA_RDM_DC_EAGER_MSGRTM_PKT: | ||
| case EFA_RDM_DC_EAGER_TAGRTM_PKT: | ||
| assert(0 && "Eager protocol moved to refactored code path"); | ||
| break; | ||
| case EFA_RDM_DC_MEDIUM_MSGRTM_PKT: | ||
| case EFA_RDM_DC_MEDIUM_TAGRTM_PKT: | ||
| case EFA_RDM_DC_EAGER_RTW_PKT: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will it be better to make
efa_rdm_cq_increment_pkt_entry_genpart of the callback .. ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I considered that. The callback is part of the protocol. The gen counter is used by the progress engine to keep track of duplicated completions. So I felt that it's better to leave the gen counter increment here