Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libfabric.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,7 @@
<ClCompile Include="prov\efa\src\efa_prov.c" />
<ClCompile Include="prov\efa\src\efa_env.c" />
<ClCompile Include="prov\efa\src\efa_cntr.c" />
<ClCompile Include="prov\efa\src\efa_hw_cntr.c" />
<ClCompile Include="prov\efa\src\efa_msg.c" />
<ClCompile Include="prov\efa\src\efa_rma.c" />
<ClCompile Include="prov\efa\src\efa_cq.c" />
Expand Down Expand Up @@ -1021,6 +1022,7 @@
<ClInclude Include="prov\efa\src\efa_conn.h" />
<ClInclude Include="prov\efa\src\efa_base_ep.h" />
<ClInclude Include="prov\efa\src\efa_cntr.h" />
<ClInclude Include="prov\efa\src\efa_hw_cntr.h" />
<ClInclude Include="prov\efa\src\rdm\efa_rdm_ep.h" />
<ClInclude Include="prov\efa\src\rdm\efa_rdm_cntr.h" />
<ClInclude Include="prov\efa\src\rdm\efa_rdm_pke_utils.h" />
Expand Down
13 changes: 13 additions & 0 deletions prov/efa/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ _efa_files = \
prov/efa/src/efa_prov.c \
prov/efa/src/efa_env.c \
prov/efa/src/efa_cntr.c \
prov/efa/src/efa_hw_cntr.c \
prov/efa/src/efa_msg.c \
prov/efa/src/efa_rma.c \
prov/efa/src/efa_cq.c \
Expand Down Expand Up @@ -101,6 +102,7 @@ _efa_headers = \
prov/efa/src/efa_fork_support.h \
prov/efa/src/efa_cq.h \
prov/efa/src/efa_cntr.h \
prov/efa/src/efa_hw_cntr.h \
prov/efa/src/efa_base_ep.h \
prov/efa/src/efa_direct_ope.h \
prov/efa/src/efa_tp_def.h \
Expand Down Expand Up @@ -241,6 +243,17 @@ if HAVE_EFADV_QUERY_CQ
prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=efadv_query_cq
endif HAVE_EFADV_QUERY_CQ

if HAVE_EFADV_CREATE_COMP_CNTR
prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=efadv_create_comp_cntr \
-Wl,--wrap=ibv_destroy_comp_cntr \
-Wl,--wrap=ibv_inc_comp_cntr \
-Wl,--wrap=ibv_inc_err_comp_cntr \
-Wl,--wrap=ibv_set_comp_cntr \
-Wl,--wrap=ibv_set_err_comp_cntr \
-Wl,--wrap=ibv_read_comp_cntr \
-Wl,--wrap=ibv_read_err_comp_cntr
endif HAVE_EFADV_CREATE_COMP_CNTR

prov_efa_test_efa_unit_test_LIBS = $(efa_LIBS) $(linkback)

endif ENABLE_EFA_UNIT_TEST
Expand Down
27 changes: 27 additions & 0 deletions prov/efa/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
have_efadv_cq_attr_db=0
have_ibv_create_comp_channel=0
have_ibv_get_cq_event=0
have_ibv_device_attr_ex_max_comp_cntr=0
have_ibv_create_comp_cntr=0
have_efadv_create_comp_cntr=0

dnl $have_neuron is defined at top-level configure.ac
AM_CONDITIONAL([HAVE_NEURON], [ test x"$have_neuron" = x1 ])
Expand Down Expand Up @@ -216,6 +219,20 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
[have_ibv_get_cq_event=1],
[have_ibv_get_cq_event=0],
[[#include <infiniband/verbs.h>]])

AC_CHECK_MEMBER([struct ibv_device_attr_ex.max_comp_cntr],
[have_ibv_device_attr_ex_max_comp_cntr=1],
[have_ibv_device_attr_ex_max_comp_cntr=0],
[[#include <infiniband/verbs.h>]])

AC_CHECK_DECL([ibv_create_comp_cntr],
[have_ibv_create_comp_cntr=1],
[have_ibv_create_comp_cntr=0],
[[#include <infiniband/verbs.h>]])
AC_CHECK_DECL([efadv_create_comp_cntr],
[have_efadv_create_comp_cntr=1],
[have_efadv_create_comp_cntr=0],
[[#include <infiniband/efadv.h>]])
])

AC_DEFINE_UNQUOTED([HAVE_RDMA_SIZE],
Expand Down Expand Up @@ -275,6 +292,15 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
AC_DEFINE_UNQUOTED([HAVE_EFA_CQ_NOTIFICATION],
[$have_efa_cq_notification],
[Indicates if EFA supports CQ notification (requires ibv_create_comp_channel and ibv_get_cq_event)])
AC_DEFINE_UNQUOTED([HAVE_IBV_DEVICE_ATTR_EX_MAX_COMP_CNTR],
[$have_ibv_device_attr_ex_max_comp_cntr],
[Indicates if ibv_device_attr_ex has max_comp_cntr field])
AC_DEFINE_UNQUOTED([HAVE_IBV_CREATE_COMP_CNTR],
[$have_ibv_create_comp_cntr],
[Indicates if ibv_create_comp_cntr is available])
AC_DEFINE_UNQUOTED([HAVE_EFADV_CREATE_COMP_CNTR],
[$have_efadv_create_comp_cntr],
[Indicates if efadv_create_comp_cntr is available])


CPPFLAGS=$save_CPPFLAGS
Expand Down Expand Up @@ -329,6 +355,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
AM_CONDITIONAL([HAVE_EFADV_QUERY_MR], [ test $have_efadv_query_mr = 1])
AM_CONDITIONAL([HAVE_EFADV_QUERY_QP_WQS], [ test $have_efadv_query_qp_wqs = 1])
AM_CONDITIONAL([HAVE_EFADV_QUERY_CQ], [ test $have_efadv_query_cq = 1])
AM_CONDITIONAL([HAVE_EFADV_CREATE_COMP_CNTR], [ test $have_efadv_create_comp_cntr = 1])
AM_CONDITIONAL([HAVE_EFA_DATA_IN_ORDER_ALIGNED_128_BYTES], [ test $efa_support_data_in_order_aligned_128_byte = 1])
AM_CONDITIONAL([ENABLE_EFA_UNIT_TEST], [ test x"$enable_efa_unit_test" != xno])

Expand Down
3 changes: 2 additions & 1 deletion prov/efa/src/efa_cntr.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "efa.h"
#include "efa_cntr.h"
#include "efa_cq.h"
#include "efa_hw_cntr.h"

int efa_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int timeout)
{
Expand Down Expand Up @@ -137,7 +138,7 @@ void efa_cntr_progress_ibv_cq_poll_list(struct efa_cntr *efa_cntr)
}
}

static void efa_cntr_progress(struct util_cntr *cntr)
void efa_cntr_progress(struct util_cntr *cntr)
{
struct efa_cntr *efa_cntr;

Expand Down
8 changes: 8 additions & 0 deletions prov/efa/src/efa_cntr.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
struct efa_cntr {
struct util_cntr util_cntr;
struct dlist_entry ibv_cq_poll_list;
/* Hardware completion counter */
struct ibv_comp_cntr *ibv_comp_cntr;
/* Whether completion counter memory is on device (DMABUF) without host mapping */
bool comp_use_device_mem;
/* Whether error counter memory is on device (DMABUF) without host mapping */
bool err_use_device_mem;
};

int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
Expand All @@ -36,4 +42,6 @@ void efa_cntr_report_rx_completion(struct util_ep *ep, uint64_t flags);

void efa_cntr_report_error(struct util_ep *ep, uint64_t flags);

void efa_cntr_progress(struct util_cntr *cntr);

#endif
54 changes: 54 additions & 0 deletions prov/efa/src/efa_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,58 @@ int efa_device_construct_gid(struct efa_device *efa_device,
return err;
}

/**
* @brief set the max completion counter value from the device
*
* @param efa_device[in] pointer to a struct efa_device
* @return max_comp_cntr value from device, or 0 if unavailable
*/
static void efa_device_set_max_comp_cntr(struct efa_device *efa_device)
{
efa_device->max_comp_cntr = 0;
#if HAVE_IBV_DEVICE_ATTR_EX_MAX_COMP_CNTR
struct ibv_device_attr_ex attr_ex = {0};
struct ibv_query_device_ex_input input = {0};
int err;

err = ibv_query_device_ex(efa_device->ibv_ctx, &input, &attr_ex);
if (err) {
EFA_WARN_ERRNO(FI_LOG_FABRIC, "ibv_query_device_ex failed", err);
return;
}
efa_device->max_comp_cntr = attr_ex.max_comp_cntr;
#endif
}

/**
* @brief set the max comp/err count values from the device
*
* @param efa_device[in,out] pointer to a struct efa_device
*/
static void efa_device_set_cntr_max_values(struct efa_device *efa_device)
{
efa_device->comp_count_max_value = 0;
efa_device->err_count_max_value = 0;
#if HAVE_IBV_CREATE_COMP_CNTR
{
struct ibv_comp_cntr_init_attr cc_attr = {0};
struct ibv_comp_cntr *comp_cntr;
int err;

comp_cntr = ibv_create_comp_cntr(efa_device->ibv_ctx, &cc_attr);
if (!comp_cntr) {
EFA_WARN_ERRNO(FI_LOG_CNTR, "ibv_create_comp_cntr failed.", errno);
} else {
efa_device->comp_count_max_value = comp_cntr->comp_count_max_value;
efa_device->err_count_max_value = comp_cntr->err_count_max_value;
err = ibv_destroy_comp_cntr(comp_cntr);
if (err)
EFA_WARN_ERRNO(FI_LOG_CNTR, "ibv_destroy_comp_cntr failed", err);
}
}
#endif
}

/**
* @brief initialize data members of a struct of efa_device after the gid
* including the prov info
Expand Down Expand Up @@ -137,6 +189,8 @@ int efa_device_construct_data(struct efa_device *efa_device,
efa_device->max_rdma_size = 0;
efa_device->device_caps = 0;
#endif
efa_device_set_max_comp_cntr(efa_device);
efa_device_set_cntr_max_values(efa_device);
efa_device->rdm_info = NULL;
err = efa_prov_info_alloc(&efa_device->rdm_info, efa_device, FI_EP_RDM);
if (err) {
Expand Down
5 changes: 5 additions & 0 deletions prov/efa/src/efa_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ struct efa_device {
size_t qp_table_sz_m1;
struct ofi_genlock qp_table_lock;
int urandom_fd;
/* Max number of completion counters supported by device, used to
* populate info->domain_attr->cntr_cnt */
uint32_t max_comp_cntr;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: a comment here explaining what this field is used for?

uint64_t comp_count_max_value;
uint64_t err_count_max_value;
};

int efa_device_list_initialize(void);
Expand Down
4 changes: 4 additions & 0 deletions prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info,
ret = -FI_ENOMEM;
goto err_free;
}
/* keep max_cntr_value and max_err_cntr_value from user info so we can
* decide whether to use hw counter later */
efa_domain->info->domain_attr->max_cntr_value = info->domain_attr->max_cntr_value;
efa_domain->info->domain_attr->max_err_cntr_value = info->domain_attr->max_err_cntr_value;

*domain_fid = &efa_domain->util_domain.domain_fid;

Expand Down
Loading