From f01f8b2178c6f4fde0ad7697f82518283eec08b1 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 4 Sep 2025 11:33:13 -0500 Subject: [PATCH 01/47] mpit: fix an unused function warning Warning under -Wall. --- src/mpi_t/mpit_initthread.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mpi_t/mpit_initthread.c b/src/mpi_t/mpit_initthread.c index 39393b21f88..5d34377b8b4 100644 --- a/src/mpi_t/mpit_initthread.c +++ b/src/mpi_t/mpit_initthread.c @@ -9,6 +9,7 @@ * Read global config files that have MPICH_ environment variables for us to * use */ +#ifdef USE_CONFIGFILE static int read_config_files(const char *fname) { int lineno = 0; @@ -49,6 +50,7 @@ static int read_config_files(const char *fname) fclose(f); return TRUE; } +#endif static inline void MPIR_T_enum_env_init(void) { From cfdef4a24b7c17c2ef41aa05fefd12c5e8ba0245 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 18:26:12 -0500 Subject: [PATCH 02/47] ADI: remove MPID collectives We will use a single-level JSON for algorithm selection including device-specific algorithms. Remove the collective ADI for now. We'll add the mechanism of selecting device-level algorithms later. gen_coll.py is updated to skip calling MPID_ collectives. Device collective CVARs are removed. --- maint/gen_coll.py | 12 +- src/mpi/coll/cvars.txt | 990 ------------------ src/mpi/coll/src/coll_impl.c | 14 - src/mpid/ch3/Makefile.mk | 3 +- src/mpid/ch3/include/mpid_coll.h | 577 ----------- src/mpid/ch3/include/mpidpost.h | 2 - src/mpid/ch3/include/mpidpre.h | 93 -- src/mpid/ch3/src/mpid_startall.c | 423 -------- src/mpid/ch4/include/mpidch4.h | 229 ----- src/mpid/ch4/src/ch4_coll.h | 1603 ++---------------------------- src/mpid/ch4/src/ch4_init.c | 15 - src/mpid/ch4/src/ch4_persist.c | 342 ------- 12 files changed, 98 insertions(+), 4205 deletions(-) delete mode 100644 src/mpid/ch3/include/mpid_coll.h diff --git a/maint/gen_coll.py b/maint/gen_coll.py index de4fbcbdfd6..5495e7c4ffd 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -533,17 +533,7 @@ def dump_buffer_swap_post(): if need_buffer_swap: dump_buffer_swap_pre() - cond1 = "MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_all" - cond2 = "MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_percoll" - cond3 = "MPIR_CVAR_%s_DEVICE_COLLECTIVE" % NAME - G.out.append("if ((%s) ||" % cond1) - G.out.append(" ((%s) &&" % cond2) - G.out.append(" %s)) {" % cond3) - G.out.append("INDENT") - dump_split(2, "mpi_errno = MPID_%s(%s);" % (Name, func_args)) - dump_else() - dump_split(2, "mpi_errno = MPIR_%s_impl(%s);" % (Name, func_args)) - dump_close("}") + dump_split(1, "mpi_errno = MPIR_%s_impl(%s);" % (Name, func_args)) if need_buffer_swap: dump_buffer_swap_post() G.out.append("") diff --git a/src/mpi/coll/cvars.txt b/src/mpi/coll/cvars.txt index d2a7f5576b7..8737d315b5b 100644 --- a/src/mpi/coll/cvars.txt +++ b/src/mpi/coll/cvars.txt @@ -2123,996 +2123,6 @@ cvars: sched_linear - Force linear algorithm tsp_linear - Force generic transport based linear algorithm - - name : MPIR_CVAR_BARRIER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Barrier will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IBARRIER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ibarrier will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_BARRIER_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Barrier will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_BCAST_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Bcast will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IBCAST_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ibcast will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_BCAST_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Bcast_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_GATHER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Gather will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IGATHER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Igather will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_GATHER_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Gather_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_GATHERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Gatherv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IGATHERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Igatherv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_GATHERV_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Gatherv_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_SCATTER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Scatter will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ISCATTER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Iscatter will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_SCATTER_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Scatter_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_SCATTERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Scatterv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ISCATTERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Iscatterv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_SCATTERV_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Scatterv_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLGATHER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Allgather will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IALLGATHER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Iallgather will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLGATHER_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Allgather_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLGATHERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Allgatherv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IALLGATHERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Iallgatherv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLGATHERV_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Allgatherv_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLTOALL_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Alltoall will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IALLTOALL_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ialltoall will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLTOALL_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Alltoall_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLTOALLV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Alltoallv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IALLTOALLV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ialltoallv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLTOALLV_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Alltoallv_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLTOALLW_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Alltoallw will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IALLTOALLW_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ialltoallw will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLTOALLW_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Alltoallw_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Reduce will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IREDUCE_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ireduce will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_REDUCE_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Reduce_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Allreduce will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IALLREDUCE_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Iallreduce will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ALLREDUCE_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Allreduce_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_REDUCE_SCATTER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Reduce_scatter will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IREDUCE_SCATTER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ireduce_scatter will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_REDUCE_SCATTER_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Reduce_scatter_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_REDUCE_SCATTER_BLOCK_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Reduce_scatter_block will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IREDUCE_SCATTER_BLOCK_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ireduce_scatter_block will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_REDUCE_SCATTER_BLOCK_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Reduce_scatter_block_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_SCAN_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Scan will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_ISCAN_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Iscan will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_SCAN_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Scan_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_EXSCAN_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Exscan will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_IEXSCAN_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Iexscan will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_EXSCAN_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Exscan_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLGATHER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_allgather will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_INEIGHBOR_ALLGATHER_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ineighbor_allgather will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLGATHER_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_allgather_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLGATHERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_allgatherv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_INEIGHBOR_ALLGATHERV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ineighbor_allgatherv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLGATHERV_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_allgatherv_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLTOALL_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_alltoall will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_INEIGHBOR_ALLTOALL_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ineighbor_alltoall will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLTOALL_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_alltoall_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLTOALLV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_alltoallv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_INEIGHBOR_ALLTOALLV_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ineighbor_alltoallv will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLTOALLV_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_alltoallv_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLTOALLW_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_alltoallw will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_INEIGHBOR_ALLTOALLW_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Ineighbor_alltoallw will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - - name : MPIR_CVAR_NEIGHBOR_ALLTOALLW_INIT_DEVICE_COLLECTIVE - category : COLLECTIVE - type : boolean - default : true - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - This CVAR is only used when MPIR_CVAR_DEVICE_COLLECTIVES - is set to "percoll". If set to true, MPI_Neighbor_alltoallw_init will - allow the device to override the MPIR-level collective - algorithms. The device might still call the MPIR-level - algorithms manually. If set to false, the device-override - will be disabled. - - name : MPIR_CVAR_COLL_HYBRID_MEMORY category : COLLECTIVE type : boolean diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 9066f82c871..d83551aa565 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -14,20 +14,6 @@ categories : description : A category for collective communication variables. cvars: - - name : MPIR_CVAR_DEVICE_COLLECTIVES - category : COLLECTIVE - type : enum - default : percoll - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : |- - Variable to select whether the device can override the - MPIR-level collective algorithms. - all - Always prefer the device collectives - none - Never pick the device collectives - percoll - Use the per-collective CVARs to decide - - name : MPIR_CVAR_COLLECTIVE_FALLBACK category : COLLECTIVE type : enum diff --git a/src/mpid/ch3/Makefile.mk b/src/mpid/ch3/Makefile.mk index 80d7136fab7..1b44a2d0c0e 100644 --- a/src/mpid/ch3/Makefile.mk +++ b/src/mpid/ch3/Makefile.mk @@ -19,8 +19,7 @@ noinst_HEADERS += \ src/mpid/ch3/include/mpidpre.h \ src/mpid/ch3/include/mpid_thread.h \ src/mpid/ch3/include/mpidrma.h \ - src/mpid/ch3/include/mpid_sched.h \ - src/mpid/ch3/include/mpid_coll.h + src/mpid/ch3/include/mpid_sched.h include $(top_srcdir)/src/mpid/ch3/src/Makefile.mk include $(top_srcdir)/src/mpid/ch3/util/Makefile.mk diff --git a/src/mpid/ch3/include/mpid_coll.h b/src/mpid/ch3/include/mpid_coll.h deleted file mode 100644 index 4b922093e59..00000000000 --- a/src/mpid/ch3/include/mpid_coll.h +++ /dev/null @@ -1,577 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#ifndef MPID_COLL_H_INCLUDED -#define MPID_COLL_H_INCLUDED - -#include "mpiimpl.h" -#ifdef HAVE_HCOLL -#include "../../common/hcoll/hcoll.h" -#endif - -static inline int MPID_Barrier(MPIR_Comm * comm, int coll_attr) -{ -#ifdef HAVE_HCOLL - if (MPI_SUCCESS == hcoll_Barrier(comm, coll_attr)) - return MPI_SUCCESS; -#endif - return MPIR_Barrier_impl(comm, coll_attr); -} - -static inline int MPID_Bcast(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, - MPIR_Comm * comm, int coll_attr) -{ -#ifdef HAVE_HCOLL - if (MPI_SUCCESS == hcoll_Bcast(buffer, count, datatype, root, comm, coll_attr)) - return MPI_SUCCESS; -#endif - return MPIR_Bcast_impl(buffer, count, datatype, root, comm, coll_attr); -} - -static inline int MPID_Allreduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ -#ifdef HAVE_HCOLL - if (MPI_SUCCESS == hcoll_Allreduce(sendbuf, recvbuf, count, datatype, op, comm, coll_attr)) - return MPI_SUCCESS; -#endif - return MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); -} - -static inline int MPID_Allgather(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ -#ifdef HAVE_HCOLL - if (MPI_SUCCESS == hcoll_Allgather(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr)) - return MPI_SUCCESS; -#endif - return MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr); -} - -static inline int MPID_Allgatherv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm, - coll_attr); - - return mpi_errno; -} - -static inline int MPID_Scatter(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Scatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - - return mpi_errno; -} - -static inline int MPID_Scatterv(const void *sendbuf, const MPI_Aint * sendcounts, const MPI_Aint * displs, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, - coll_attr); - - return mpi_errno; -} - -static inline int MPID_Gather(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - - return mpi_errno; -} - -static inline int MPID_Gatherv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm, - coll_attr); - - return mpi_errno; -} - -static inline int MPID_Alltoall(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr); - - return mpi_errno; -} - -static inline int MPID_Alltoallv(const void *sendbuf, const MPI_Aint * sendcounts, const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, MPI_Datatype recvtype, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, - comm, coll_attr); - - return mpi_errno; -} - -static inline int MPID_Alltoallw(const void *sendbuf, const MPI_Aint sendcounts[], const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, - comm_ptr, coll_attr); - - return mpi_errno; -} - -static inline int MPID_Reduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, - comm, coll_attr); - - return mpi_errno; -} - -static inline int MPID_Reduce_scatter(const void *sendbuf, void *recvbuf, const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, - datatype, op, comm_ptr, coll_attr); - - return mpi_errno; -} - -static inline int MPID_Reduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm_ptr, - coll_attr); - - return mpi_errno; -} - -static inline int MPID_Scan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm, - coll_attr); - - return mpi_errno; -} - -static inline int MPID_Exscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm, - coll_attr); - - return mpi_errno; -} - -static inline int MPID_Neighbor_allgather(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Neighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm); - - return mpi_errno; -} - -static inline int MPID_Neighbor_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint displs[], - MPI_Datatype recvtype, MPIR_Comm * comm) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Neighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, - recvtype, comm); - - return mpi_errno; -} - -static inline int MPID_Neighbor_alltoallv(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Neighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm); - - return mpi_errno; -} - -static inline int MPID_Neighbor_alltoallw(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Neighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm); - - return mpi_errno; -} - -static inline int MPID_Neighbor_alltoall(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Neighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm); - - return mpi_errno; -} - -static inline int MPID_Ineighbor_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ineighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm, request); - - return mpi_errno; -} - -static inline int MPID_Ineighbor_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint displs[], - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ineighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, - recvtype, comm, request); - - return mpi_errno; -} - -static inline int MPID_Ineighbor_alltoall(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ineighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm, request); - - return mpi_errno; -} - -static inline int MPID_Ineighbor_alltoallv(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ineighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm, - request); - - return mpi_errno; -} - -static inline int MPID_Ineighbor_alltoallw(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ineighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm, - request); - - return mpi_errno; -} - -static inline int MPID_Ibarrier(MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ibarrier_impl(comm, request); - - return mpi_errno; -} - -static inline int MPID_Ibcast(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ibcast_impl(buffer, count, datatype, root, comm, request); - - return mpi_errno; -} - -static inline int MPID_Iallgather(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Iallgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, request); - - return mpi_errno; -} - -static inline int MPID_Iallgatherv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Iallgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm, - request); - - return mpi_errno; -} - -static inline int MPID_Iallreduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Iallreduce_impl(sendbuf, recvbuf, count, datatype, op, - comm, request); - - return mpi_errno; -} - -static inline int MPID_Ialltoall(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ialltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, request); - - return mpi_errno; -} - -static inline int MPID_Ialltoallv(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ialltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, - comm, request); - - return mpi_errno; -} - -static inline int MPID_Ialltoallw(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ialltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, - comm, request); - - return mpi_errno; -} - -static inline int MPID_Iexscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Iexscan_impl(sendbuf, recvbuf, count, datatype, op, comm, - request); - - return mpi_errno; -} - -static inline int MPID_Igather(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Igather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, request); - - return mpi_errno; -} - -static inline int MPID_Igatherv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Igatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm, - request); - - return mpi_errno; -} - -static inline int MPID_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm, request); - - return mpi_errno; -} - -static inline int MPID_Ireduce_scatter(const void *sendbuf, void *recvbuf, const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ireduce_scatter_impl(sendbuf, recvbuf, recvcounts, - datatype, op, comm, request); - - return mpi_errno; -} - -static inline int MPID_Ireduce(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, int root, MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Ireduce_impl(sendbuf, recvbuf, count, datatype, op, root, - comm, request); - - return mpi_errno; -} - -static inline int MPID_Iscan(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Iscan_impl(sendbuf, recvbuf, count, datatype, op, comm, - request); - - return mpi_errno; -} - -static inline int MPID_Iscatter(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, request); - - return mpi_errno; -} - -static inline int MPID_Iscatterv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Iscatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, - request); - - return mpi_errno; -} - -#endif /* MPID_COLL_H_INCLUDED */ diff --git a/src/mpid/ch3/include/mpidpost.h b/src/mpid/ch3/include/mpidpost.h index 231bccec1bf..eb0042a036b 100644 --- a/src/mpid/ch3/include/mpidpost.h +++ b/src/mpid/ch3/include/mpidpost.h @@ -6,8 +6,6 @@ #ifndef MPIDPOST_H_INCLUDED #define MPIDPOST_H_INCLUDED -#include "mpid_coll.h" - /* FIXME: mpidpost.h is included by mpiimpl.h . However, mpiimpl.h should refer only to the ADI3 prototypes and should never include prototypes specific to any particular device. Factor the include files to maintain diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h index 1259ff687e4..064eda60340 100644 --- a/src/mpid/ch3/include/mpidpre.h +++ b/src/mpid/ch3/include/mpidpre.h @@ -615,99 +615,6 @@ int MPID_Recv_init( void *buf, MPI_Aint count, MPI_Datatype datatype, int MPID_Startall(int count, MPIR_Request *requests[]); -int MPID_Bcast_init(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, - MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, MPIR_Request **request); - -int MPID_Allreduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, - MPIR_Request ** request); - -int MPID_Reduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, - MPIR_Request **request); - -int MPID_Alltoall_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, MPIR_Request** request); - -int MPID_Alltoallv_init(const void *sendbuf, const MPI_Aint sendcounts[], const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Info * info_ptr, MPIR_Request ** request); - -int MPID_Alltoallw_init(const void *sendbuf, const MPI_Aint sendcounts[], const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); - -int MPID_Allgather_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, MPIR_Request** request); - -int MPID_Allgatherv_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, - MPIR_Request** request); - -int MPID_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); - -int MPID_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, MPIR_Info * info, - MPIR_Request ** request); - -int MPID_Scan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); - -int MPID_Gather_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); - -int MPID_Gatherv_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint displs[], MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); - -int MPID_Scatter_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); - -int MPID_Scatterv_init(const void *sendbuf, const MPI_Aint sendcounts[], const MPI_Aint displs[], - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, MPIR_Info * info, - MPIR_Request ** request); - -int MPID_Barrier_init(MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); - -int MPID_Exscan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); - -int MPID_Neighbor_allgather_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); - -int MPID_Neighbor_allgatherv_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint displs[], MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); - -int MPID_Neighbor_alltoall_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); - - -int MPID_Neighbor_alltoallv_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint rdispls[], - MPI_Datatype recvtype, MPIR_Comm * comm, MPIR_Info * info, - MPIR_Request ** request); - -int MPID_Neighbor_alltoallw_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); - int MPID_Probe(int source, int tag, MPIR_Comm *comm, int attr, MPI_Status * status); int MPID_Iprobe(int source, int tag, MPIR_Comm *comm, int attr, int *flag, MPI_Status *status); diff --git a/src/mpid/ch3/src/mpid_startall.c b/src/mpid/ch3/src/mpid_startall.c index cba93847a43..e7d9e8dcee2 100644 --- a/src/mpid/ch3/src/mpid_startall.c +++ b/src/mpid/ch3/src/mpid_startall.c @@ -315,426 +315,3 @@ int MPID_Recv_init(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, MPIR_FUNC_EXIT; return mpi_errno; } - -int MPID_Bcast_init(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, - MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Bcast_init_impl(buffer, count, datatype, root, comm_ptr, info_ptr, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Allreduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, - MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allreduce_init_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, info_ptr, - request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Reduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, - MPIR_Request **request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_init_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, - info_ptr, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Alltoall_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, MPIR_Request** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoall_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, info_ptr, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Alltoallv_init(const void *sendbuf, const MPI_Aint sendcounts[], const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallv_init_impl(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, comm_ptr, info_ptr, - request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Alltoallw_init(const void *sendbuf, const MPI_Aint sendcounts[], const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallw_init_impl(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, - recvcounts, rdispls, recvtypes, comm_ptr, info_ptr, - request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Allgather_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, MPIR_Request** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allgather_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, info_ptr, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Allgatherv_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Info* info_ptr, - MPIR_Request** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allgatherv_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, comm_ptr, info_ptr, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_scatter_block_init_impl(sendbuf, recvbuf, recvcount, datatype, op, comm, - info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, MPIR_Info * info, - MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_scatter_init_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm, - info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Scan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scan_init_impl(sendbuf, recvbuf, count, datatype, op, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Gather_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gather_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - root, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Gatherv_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint displs[], MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gatherv_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, - recvtype, root, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Scatter_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatter_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - root, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Scatterv_init(const void *sendbuf, const MPI_Aint sendcounts[], const MPI_Aint displs[], - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, MPIR_Info * info, - MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatterv_init_impl(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, - recvtype, root, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Barrier_init(MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Barrier_init_impl(comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Exscan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Exscan_init_impl(sendbuf, recvbuf, count, datatype, op, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Neighbor_allgather_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgather_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Neighbor_allgatherv_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], const MPI_Aint - displs[], MPI_Datatype recvtype, MPIR_Comm * comm, MPIR_Info * - info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgatherv_init_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm, info, - request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Neighbor_alltoall_init(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoall_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, info, request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Neighbor_alltoallv_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint rdispls[], - MPI_Datatype recvtype, MPIR_Comm * comm, MPIR_Info * info, - MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallv_init_impl(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, comm, info, - request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -int MPID_Neighbor_alltoallw_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallw_init_impl(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, - recvcounts, rdispls, recvtypes, comm, info, - request); - MPIR_ERR_CHECK(mpi_errno); - MPIDI_Request_set_type(*request, MPIDI_REQUEST_TYPE_PERSISTENT_COLL); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpid/ch4/include/mpidch4.h b/src/mpid/ch4/include/mpidch4.h index 103b3bd6900..30593ba2ae6 100644 --- a/src/mpid/ch4/include/mpidch4.h +++ b/src/mpid/ch4/include/mpidch4.h @@ -177,235 +177,6 @@ int MPID_Comm_set_hints(MPIR_Comm *, MPIR_Info *); int MPID_Comm_commit_post_hook(MPIR_Comm *); int MPID_Stream_create_hook(MPIR_Stream * stream); int MPID_Stream_free_hook(MPIR_Stream * stream); -MPL_STATIC_INLINE_PREFIX int MPID_Barrier(MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Bcast(void *, MPI_Aint, MPI_Datatype, int, MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Allreduce(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, - MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Allgather(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Allgatherv(const void *, MPI_Aint, MPI_Datatype, void *, - const MPI_Aint *, const MPI_Aint *, MPI_Datatype, - MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Scatter(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, int, MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Scatterv(const void *, const MPI_Aint *, const MPI_Aint *, - MPI_Datatype, void *, MPI_Aint, MPI_Datatype, int, - MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Gather(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, int, MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Gatherv(const void *, MPI_Aint, MPI_Datatype, void *, - const MPI_Aint *, const MPI_Aint *, MPI_Datatype, int, - MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Alltoall(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Alltoallv(const void *, const MPI_Aint *, const MPI_Aint *, - MPI_Datatype, void *, const MPI_Aint *, - const MPI_Aint *, MPI_Datatype, MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Alltoallw(const void *, const MPI_Aint[], const MPI_Aint[], - const MPI_Datatype[], void *, const MPI_Aint[], - const MPI_Aint[], const MPI_Datatype[], MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Reduce(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, int, - MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Reduce_scatter(const void *, void *, const MPI_Aint[], - MPI_Datatype, MPI_Op, MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Reduce_scatter_block(const void *, void *, MPI_Aint, MPI_Datatype, - MPI_Op, MPIR_Comm *, - int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Scan(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, - MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Exscan(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, - MPIR_Comm *, int) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_allgather(const void *, MPI_Aint, MPI_Datatype, void *, - MPI_Aint, MPI_Datatype, - MPIR_Comm *) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_allgatherv(const void *, MPI_Aint, MPI_Datatype, void *, - const MPI_Aint[], const MPI_Aint[], - MPI_Datatype, - MPIR_Comm *) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_alltoallv(const void *, const MPI_Aint[], - const MPI_Aint[], MPI_Datatype, void *, - const MPI_Aint[], const MPI_Aint[], - MPI_Datatype, - MPIR_Comm *) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_alltoallw(const void *, const MPI_Aint[], - const MPI_Aint[], const MPI_Datatype[], void *, - const MPI_Aint[], const MPI_Aint[], - const MPI_Datatype[], - MPIR_Comm *) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_alltoall(const void *, MPI_Aint, MPI_Datatype, void *, - MPI_Aint, MPI_Datatype, - MPIR_Comm *) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_allgather(const void *, MPI_Aint, MPI_Datatype, void *, - MPI_Aint, MPI_Datatype, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_allgatherv(const void *, MPI_Aint, MPI_Datatype, void *, - const MPI_Aint[], const MPI_Aint[], - MPI_Datatype, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_alltoall(const void *, MPI_Aint, MPI_Datatype, void *, - MPI_Aint, MPI_Datatype, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_alltoallv(const void *, const MPI_Aint[], - const MPI_Aint[], MPI_Datatype, void *, - const MPI_Aint[], const MPI_Aint[], - MPI_Datatype, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_alltoallw(const void *, const MPI_Aint[], - const MPI_Aint[], const MPI_Datatype[], - void *, const MPI_Aint[], const MPI_Aint[], - const MPI_Datatype[], MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ibarrier(MPIR_Comm *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ibcast(void *, MPI_Aint, MPI_Datatype, int, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Iallgather(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Iallgatherv(const void *, MPI_Aint, MPI_Datatype, void *, - const MPI_Aint *, const MPI_Aint *, MPI_Datatype, - MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Iallreduce(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, - MPIR_Comm *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ialltoall(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ialltoallv(const void *, const MPI_Aint[], const MPI_Aint[], - MPI_Datatype, void *, const MPI_Aint[], - const MPI_Aint[], MPI_Datatype, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ialltoallw(const void *, const MPI_Aint[], const MPI_Aint[], - const MPI_Datatype[], void *, const MPI_Aint[], - const MPI_Aint[], const MPI_Datatype[], MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Iexscan(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, - MPIR_Comm *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Igather(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, int, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Igatherv(const void *, MPI_Aint, MPI_Datatype, void *, - const MPI_Aint *, const MPI_Aint *, MPI_Datatype, int, - MPIR_Comm *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ireduce_scatter_block(const void *, void *, MPI_Aint, - MPI_Datatype, MPI_Op, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ireduce_scatter(const void *, void *, const MPI_Aint[], - MPI_Datatype, MPI_Op, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Ireduce(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, int, - MPIR_Comm *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Iscan(const void *, void *, MPI_Aint, MPI_Datatype, MPI_Op, - MPIR_Comm *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Iscatter(const void *, MPI_Aint, MPI_Datatype, void *, MPI_Aint, - MPI_Datatype, int, MPIR_Comm *, - MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -MPL_STATIC_INLINE_PREFIX int MPID_Iscatterv(const void *, const MPI_Aint *, const MPI_Aint *, - MPI_Datatype, void *, MPI_Aint, MPI_Datatype, int, - MPIR_Comm *, MPIR_Request **) MPL_STATIC_INLINE_SUFFIX; -int MPID_Bcast_init(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Allreduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Reduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Alltoall_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Alltoallv_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Alltoallw_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Allgather_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Allgatherv_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request); -int MPID_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); -int MPID_Reduce_scatter_init(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); -int MPID_Scan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); -int MPID_Gather_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); -int MPID_Gatherv_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint displs[], - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); -int MPID_Scatter_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); -int MPID_Scatterv_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint displs[], MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); -int MPID_Barrier_init(MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); -int MPID_Exscan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); -int MPID_Neighbor_allgather_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); -int MPID_Neighbor_allgatherv_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); -int MPID_Neighbor_alltoall_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); -int MPID_Neighbor_alltoallv_init(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request); -int MPID_Neighbor_alltoallw_init(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request); int MPID_Send_enqueue(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag, MPIR_Comm * comm_ptr); int MPID_Recv_enqueue(void *buf, MPI_Aint count, MPI_Datatype datatype, diff --git a/src/mpid/ch4/src/ch4_coll.h b/src/mpid/ch4/src/ch4_coll.h index c16fceaa9bc..0f8fb1714ff 100644 --- a/src/mpid/ch4/src/ch4_coll.h +++ b/src/mpid/ch4/src/ch4_coll.h @@ -136,48 +136,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Barrier_allcomm_composition_json(MPIR_Comm * goto fn_exit; } -MPL_STATIC_INLINE_PREFIX int MPID_Barrier(MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - - switch (MPIR_CVAR_BARRIER_COMPOSITION) { - case 1: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) && - MPIR_Comm_is_parent_comm(comm), mpi_errno, - "Barrier composition alpha cannot be applied.\n"); - mpi_errno = MPIDI_Barrier_intra_composition_alpha(comm, coll_attr); - break; - case 2: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Barrier composition beta cannot be applied.\n"); - mpi_errno = MPIDI_Barrier_intra_composition_beta(comm, coll_attr); - break; - default: - mpi_errno = MPIDI_Barrier_allcomm_composition_json(comm, coll_attr); - break; - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = MPIR_Barrier_impl(comm, coll_attr); - else - mpi_errno = MPIDI_Barrier_intra_composition_beta(comm, coll_attr); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_allcomm_composition_json(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, MPIR_Comm * comm, int coll_attr) @@ -241,69 +199,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_allcomm_composition_json(void *buffer, goto fn_exit; } -MPL_STATIC_INLINE_PREFIX int MPID_Bcast(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_BCAST_COMPOSITION) { - case 1: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) && - MPIR_Comm_is_parent_comm(comm), mpi_errno, - "Bcast composition alpha cannot be applied.\n"); - mpi_errno = - MPIDI_Bcast_intra_composition_alpha(buffer, count, datatype, root, comm, coll_attr); - break; - case 2: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) && - MPIR_Comm_is_parent_comm(comm), mpi_errno, - "Bcast composition beta cannot be applied.\n"); - mpi_errno = - MPIDI_Bcast_intra_composition_beta(buffer, count, datatype, root, comm, coll_attr); - break; - case 3: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Bcast composition gamma cannot be applied.\n"); - mpi_errno = - MPIDI_Bcast_intra_composition_gamma(buffer, count, datatype, root, comm, coll_attr); - break; - case 4: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) && - MPIR_Comm_is_parent_comm(comm), mpi_errno, - "Bcast composition delta cannot be applied.\n"); - mpi_errno = - MPIDI_Bcast_intra_composition_delta(buffer, count, datatype, root, comm, coll_attr); - break; - default: - mpi_errno = - MPIDI_Bcast_allcomm_composition_json(buffer, count, datatype, root, comm, - coll_attr); - break; - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm, coll_attr); - else - mpi_errno = - MPIDI_Bcast_intra_composition_gamma(buffer, count, datatype, root, comm, coll_attr); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - MPL_STATIC_INLINE_PREFIX void MPIDI_Allreduce_fill_multi_leads_info(MPIR_Comm * comm) { if (MPIDI_COMM(comm, allreduce_comp_info) == NULL) { @@ -405,99 +300,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_allcomm_composition_json(const void goto fn_exit; } -MPL_STATIC_INLINE_PREFIX int MPID_Allreduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - int is_commutative = -1; - int num_leads = 0, node_comm_size = 0; - - MPIR_FUNC_ENTER; - - is_commutative = MPIR_Op_is_commutative(op); - - switch (MPIR_CVAR_ALLREDUCE_COMPOSITION) { - case 1: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) && - MPIR_Comm_is_parent_comm(comm) && - is_commutative, mpi_errno, - "Allreduce composition alpha cannot be applied.\n"); - mpi_errno = - MPIDI_Allreduce_intra_composition_alpha(sendbuf, recvbuf, count, datatype, op, comm, - coll_attr); - break; - case 2: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Allreduce composition beta cannot be applied.\n"); - mpi_errno = - MPIDI_Allreduce_intra_composition_beta(sendbuf, recvbuf, count, datatype, op, comm, - coll_attr); - break; - case 3: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) && - (comm->node_comm != NULL) && - (MPIR_Comm_size(comm) == - MPIR_Comm_size(comm->node_comm)), mpi_errno, - "Allreduce composition gamma cannot be applied.\n"); - mpi_errno = - MPIDI_Allreduce_intra_composition_gamma(sendbuf, recvbuf, count, datatype, op, comm, - coll_attr); - break; - case 4: - if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) { - MPIDI_Allreduce_fill_multi_leads_info(comm); - if (comm->node_comm) - node_comm_size = MPIR_Comm_size(comm->node_comm); - /* Reset number of leaders, so that (node_comm_size % num_leads) is zero. The new number of - * leaders must lie within a range +/- from the leaders specified, or every rank is made - * as a leader. Currently we use range as 15. */ - num_leads = - MPL_round_closest_multiple(node_comm_size, MPIR_CVAR_NUM_MULTI_LEADS, 15); - } - - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM - && MPIDI_COMM_ALLREDUCE(comm, use_multi_leads) == 1 && - count >= num_leads && is_commutative, mpi_errno, - "Allreduce composition delta cannot be applied.\n"); - /* Multi-leaders based composition can only be used if the comm is spanned over more than - * 1 node, has equal number of ranks on each node, count is more than number of leaders and - * the operation is commutative. This composition is beneficial for large messages. - */ - - mpi_errno = - MPIDI_Allreduce_intra_composition_delta(sendbuf, recvbuf, count, datatype, op, - num_leads, comm, coll_attr); - break; - - default: - mpi_errno = - MPIDI_Allreduce_allcomm_composition_json(sendbuf, recvbuf, count, datatype, op, - comm, coll_attr); - break; - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - else - mpi_errno = - MPIDI_Allreduce_intra_composition_beta(sendbuf, recvbuf, count, datatype, op, comm, - coll_attr); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - MPL_STATIC_INLINE_PREFIX void MPIDI_Allgather_fill_multi_leads_info(MPIR_Comm * comm) { if (MPIDI_COMM(comm, allgather_comp_info) == NULL) { @@ -605,16 +407,36 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_allcomm_composition_json(const void goto fn_exit; } -MPL_STATIC_INLINE_PREFIX int MPID_Allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) +MPL_STATIC_INLINE_PREFIX void MPIDI_Alltoall_fill_multi_leads_info(MPIR_Comm * comm) +{ + if (MPIDI_COMM(comm, alltoall_comp_info) == NULL) { + MPIDI_COMM(comm, alltoall_comp_info) = + MPL_malloc(sizeof(MPIDI_Multileads_comp_info_t), MPL_MEM_OTHER); + MPIR_Assert(MPIDI_COMM(comm, alltoall_comp_info)); + MPIDI_COMM_ALLTOALL(comm, use_multi_leads) = -1; + MPIDI_COMM_ALLTOALL(comm, shm_addr) = NULL; + } + /* Find if the comm meets the constraints and store that info in the data structure */ + if (MPIDI_COMM_ALLTOALL(comm, use_multi_leads) == -1) { + if (MPII_Comm_is_node_canonical(comm)) + MPIDI_COMM_ALLTOALL(comm, use_multi_leads) = 1; + else + MPIDI_COMM_ALLTOALL(comm, use_multi_leads) = 0; + } +} + +MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_allcomm_composition_json(const void *sendbuf, + MPI_Aint sendcount, + MPI_Datatype sendtype, + void *recvbuf, + MPI_Aint recvcount, + MPI_Datatype recvtype, + MPIR_Comm * comm, + int coll_attr) { int mpi_errno = MPI_SUCCESS; MPI_Aint type_size, data_size; - MPIR_FUNC_ENTER; - if (sendbuf != MPI_IN_PLACE) { MPIR_Datatype_get_size_macro(sendtype, type_size); data_size = sendcount * type_size; @@ -623,38 +445,55 @@ MPL_STATIC_INLINE_PREFIX int MPID_Allgather(const void *sendbuf, MPI_Aint sendco data_size = recvcount * type_size; } - switch (MPIR_CVAR_ALLGATHER_COMPOSITION) { - case 1: - if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) - MPIDI_Allgather_fill_multi_leads_info(comm); + const MPIDI_Csel_container_s *cnt = NULL; - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - MPIDI_COMM_ALLGATHER(comm, use_multi_leads) == 1 && - data_size <= MPIR_CVAR_ALLGATHER_SHM_PER_RANK, mpi_errno, - "Allgather composition alpha cannot be applied.\n"); - /* Multi-leaders based composition can only be used if the comm is spanned over more than - * 1 node, has equal number of ranks on each node, ranks on a node are consecutive and - * the combined msg from all the ranks on a node fits the allocated shared memory buffer - */ + MPIR_Csel_coll_sig_s coll_sig = { + .coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALL, + .comm_ptr = comm, + + .u.alltoall.sendbuf = sendbuf, + .u.alltoall.sendcount = sendcount, + .u.alltoall.sendtype = sendtype, + .u.alltoall.recvcount = recvcount, + .u.alltoall.recvbuf = recvbuf, + .u.alltoall.recvtype = recvtype, + }; + + cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); + + if (cnt == NULL) { + mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, + recvbuf, recvcount, recvtype, comm, coll_attr); + MPIR_ERR_CHECK(mpi_errno); + goto fn_exit; + } + switch (cnt->id) { + case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_alpha: + if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) + MPIDI_Alltoall_fill_multi_leads_info(comm); + MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM + && MPIDI_COMM_ALLTOALL(comm, use_multi_leads) == 1 && + data_size <= MPIR_CVAR_ALLTOALL_SHM_PER_RANK, mpi_errno, + "Alltoall composition alpha cannot be applied.\n"); mpi_errno = - MPIDI_Allgather_intra_composition_alpha(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm, coll_attr); + MPIDI_Alltoall_intra_composition_alpha(sendbuf, sendcount, sendtype, + recvbuf, recvcount, recvtype, comm, + coll_attr); break; - case 2: + + case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_beta: MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Allgather composition beta cannot be applied.\n"); + "Alltoall composition beta cannot be applied.\n"); mpi_errno = - MPIDI_Allgather_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr); + MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, + recvbuf, recvcount, recvtype, comm, + coll_attr); break; + default: - mpi_errno = MPIDI_Allgather_allcomm_composition_json(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; + MPIR_Assert(0); } MPIR_ERR_CHECK(mpi_errno); @@ -663,59 +502,62 @@ MPL_STATIC_INLINE_PREFIX int MPID_Allgather(const void *sendbuf, MPI_Aint sendco fallback: if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) mpi_errno = - MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, - coll_attr); + MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, + coll_attr); else - mpi_errno = - MPIDI_Allgather_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, coll_attr); + mpi_errno = MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, + recvcount, recvtype, comm, coll_attr); fn_exit: - MPIR_FUNC_ENTER; return mpi_errno; fn_fail: goto fn_exit; } -MPL_STATIC_INLINE_PREFIX int MPID_Allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, int coll_attr) +MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_allcomm_composition_json(const void *sendbuf, + void *recvbuf, MPI_Aint count, + MPI_Datatype datatype, MPI_Op op, + int root, MPIR_Comm * comm, + int coll_attr) { int mpi_errno = MPI_SUCCESS; const MPIDI_Csel_container_s *cnt = NULL; MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLGATHERV, + .coll_type = MPIR_CSEL_COLL_TYPE__REDUCE, .comm_ptr = comm, - .u.allgatherv.sendbuf = sendbuf, - .u.allgatherv.sendcount = sendcount, - .u.allgatherv.sendtype = sendtype, - .u.allgatherv.recvbuf = recvbuf, - .u.allgatherv.recvcounts = recvcounts, - .u.allgatherv.displs = displs, - .u.allgatherv.recvtype = recvtype, + .u.reduce.sendbuf = sendbuf, + .u.reduce.recvbuf = recvbuf, + .u.reduce.count = count, + .u.reduce.datatype = datatype, + .u.reduce.op = op, + .u.reduce.root = root, }; - MPIR_FUNC_ENTER; - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); if (cnt == NULL) { - mpi_errno = - MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, - recvtype, comm, coll_attr); + mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, coll_attr); MPIR_ERR_CHECK(mpi_errno); goto fn_exit; } switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allgatherv_intra_composition_alpha: + case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_alpha: + mpi_errno = + MPIDI_Reduce_intra_composition_alpha(sendbuf, recvbuf, count, datatype, op, + root, comm, coll_attr); + break; + case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_beta: mpi_errno = - MPIDI_Allgatherv_intra_composition_alpha(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, - recvtype, comm, coll_attr); + MPIDI_Reduce_intra_composition_beta(sendbuf, recvbuf, count, datatype, op, + root, comm, coll_attr); + break; + case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_gamma: + mpi_errno = + MPIDI_Reduce_intra_composition_gamma(sendbuf, recvbuf, count, datatype, op, + root, comm, coll_attr); break; default: MPIR_Assert(0); @@ -724,1262 +566,9 @@ MPL_STATIC_INLINE_PREFIX int MPID_Allgatherv(const void *sendbuf, MPI_Aint sendc MPIR_ERR_CHECK(mpi_errno); fn_exit: - MPIR_FUNC_EXIT; return mpi_errno; fn_fail: goto fn_exit; } -MPL_STATIC_INLINE_PREFIX int MPID_Scatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__SCATTER, - .comm_ptr = comm, - - .u.scatter.sendbuf = sendbuf, - .u.scatter.sendcount = sendcount, - .u.scatter.sendtype = sendtype, - .u.scatter.recvcount = recvcount, - .u.scatter.recvbuf = recvbuf, - .u.scatter.recvtype = recvtype, - .u.scatter.root = root, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - MPIR_Scatter_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, - coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scatter_intra_composition_alpha: - mpi_errno = - MPIDI_Scatter_intra_composition_alpha(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Scatterv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__SCATTERV, - .comm_ptr = comm, - - .u.scatterv.sendbuf = sendbuf, - .u.scatterv.sendcounts = sendcounts, - .u.scatterv.displs = displs, - .u.scatterv.sendtype = sendtype, - .u.scatterv.recvcount = recvcount, - .u.scatterv.recvbuf = recvbuf, - .u.scatterv.recvtype = recvtype, - .u.scatterv.root = root, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, - root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scatterv_intra_composition_alpha: - mpi_errno = - MPIDI_Scatterv_intra_composition_alpha(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, - comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Gather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__GATHER, - .comm_ptr = comm, - - .u.gather.sendbuf = sendbuf, - .u.gather.sendcount = sendcount, - .u.gather.sendtype = sendtype, - .u.gather.recvcount = recvcount, - .u.gather.recvbuf = recvbuf, - .u.gather.recvtype = recvtype, - .u.gather.root = root, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = - MPIR_Gather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, - coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Gather_intra_composition_alpha: - mpi_errno = - MPIDI_Gather_intra_composition_alpha(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Gatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__GATHERV, - .comm_ptr = comm, - - .u.gatherv.sendbuf = sendbuf, - .u.gatherv.sendcount = sendcount, - .u.gatherv.sendtype = sendtype, - .u.gatherv.recvbuf = recvbuf, - .u.gatherv.recvcounts = recvcounts, - .u.gatherv.displs = displs, - .u.gatherv.recvtype = recvtype, - .u.gatherv.root = root, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Gatherv_intra_composition_alpha: - mpi_errno = - MPIDI_Gatherv_intra_composition_alpha(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, - comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX void MPIDI_Alltoall_fill_multi_leads_info(MPIR_Comm * comm) -{ - if (MPIDI_COMM(comm, alltoall_comp_info) == NULL) { - MPIDI_COMM(comm, alltoall_comp_info) = - MPL_malloc(sizeof(MPIDI_Multileads_comp_info_t), MPL_MEM_OTHER); - MPIR_Assert(MPIDI_COMM(comm, alltoall_comp_info)); - MPIDI_COMM_ALLTOALL(comm, use_multi_leads) = -1; - MPIDI_COMM_ALLTOALL(comm, shm_addr) = NULL; - } - /* Find if the comm meets the constraints and store that info in the data structure */ - if (MPIDI_COMM_ALLTOALL(comm, use_multi_leads) == -1) { - if (MPII_Comm_is_node_canonical(comm)) - MPIDI_COMM_ALLTOALL(comm, use_multi_leads) = 1; - else - MPIDI_COMM_ALLTOALL(comm, use_multi_leads) = 0; - } -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_allcomm_composition_json(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPI_Aint type_size, data_size; - - if (sendbuf != MPI_IN_PLACE) { - MPIR_Datatype_get_size_macro(sendtype, type_size); - data_size = sendcount * type_size; - } else { - MPIR_Datatype_get_size_macro(recvtype, type_size); - data_size = recvcount * type_size; - } - - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALL, - .comm_ptr = comm, - - .u.alltoall.sendbuf = sendbuf, - .u.alltoall.sendcount = sendcount, - .u.alltoall.sendtype = sendtype, - .u.alltoall.recvcount = recvcount, - .u.alltoall.recvbuf = recvbuf, - .u.alltoall.recvtype = recvtype, - }; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_alpha: - if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) - MPIDI_Alltoall_fill_multi_leads_info(comm); - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM - && MPIDI_COMM_ALLTOALL(comm, use_multi_leads) == 1 && - data_size <= MPIR_CVAR_ALLTOALL_SHM_PER_RANK, mpi_errno, - "Alltoall composition alpha cannot be applied.\n"); - mpi_errno = - MPIDI_Alltoall_intra_composition_alpha(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; - - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_beta: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Alltoall composition beta cannot be applied.\n"); - mpi_errno = - MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = - MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, - coll_attr); - else - mpi_errno = MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPI_Aint type_size, data_size; - - MPIR_FUNC_ENTER; - - if (sendbuf != MPI_IN_PLACE) { - MPIR_Datatype_get_size_macro(sendtype, type_size); - data_size = sendcount * type_size; - } else { - MPIR_Datatype_get_size_macro(recvtype, type_size); - data_size = recvcount * type_size; - } - - switch (MPIR_CVAR_ALLTOALL_COMPOSITION) { - case 1: - if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) - MPIDI_Alltoall_fill_multi_leads_info(comm); - - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM - && MPIDI_COMM_ALLTOALL(comm, use_multi_leads) == 1 && - data_size <= MPIR_CVAR_ALLTOALL_SHM_PER_RANK, mpi_errno, - "Alltoall composition alpha cannot be applied.\n"); - /* Multi-leaders based composition can only be used if the comm is spanned over more than - * 1 node, has equal number of ranks on each node, ranks on a node are consecutive and - * the combined msg from all the ranks on a node fits the allocated shared memory buffer - */ - - mpi_errno = - MPIDI_Alltoall_intra_composition_alpha(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; - case 2: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Alltoall composition beta cannot be applied.\n"); - mpi_errno = - MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr); - break; - default: - mpi_errno = MPIDI_Alltoall_allcomm_composition_json(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = - MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, - coll_attr); - else - mpi_errno = MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr); - - fn_exit: - MPIR_FUNC_ENTER; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Alltoallv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALLV, - .comm_ptr = comm, - - .u.alltoallv.sendbuf = sendbuf, - .u.alltoallv.sendcounts = sendcounts, - .u.alltoallv.sdispls = sdispls, - .u.alltoallv.sendtype = sendtype, - .u.alltoallv.recvbuf = recvbuf, - .u.alltoallv.recvcounts = recvcounts, - .u.alltoallv.rdispls = rdispls, - .u.alltoallv.recvtype = recvtype, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoallv_intra_composition_alpha: - mpi_errno = - MPIDI_Alltoallv_intra_composition_alpha(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Alltoallw(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALLW, - .comm_ptr = comm, - - .u.alltoallw.sendbuf = sendbuf, - .u.alltoallw.sendcounts = sendcounts, - .u.alltoallw.sdispls = sdispls, - .u.alltoallw.sendtypes = sendtypes, - .u.alltoallw.recvbuf = recvbuf, - .u.alltoallw.recvcounts = recvcounts, - .u.alltoallw.rdispls = rdispls, - .u.alltoallw.recvtypes = recvtypes, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoallw_intra_composition_alpha: - mpi_errno = - MPIDI_Alltoallw_intra_composition_alpha(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_allcomm_composition_json(const void *sendbuf, - void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__REDUCE, - .comm_ptr = comm, - - .u.reduce.sendbuf = sendbuf, - .u.reduce.recvbuf = recvbuf, - .u.reduce.count = count, - .u.reduce.datatype = datatype, - .u.reduce.op = op, - .u.reduce.root = root, - }; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_alpha: - mpi_errno = - MPIDI_Reduce_intra_composition_alpha(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_beta: - mpi_errno = - MPIDI_Reduce_intra_composition_beta(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_gamma: - mpi_errno = - MPIDI_Reduce_intra_composition_gamma(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Reduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_REDUCE_COMPOSITION) { - case 1: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM - && MPIR_Comm_is_parent_comm(comm) && - MPIR_Op_is_commutative(op), mpi_errno, - "Reduce composition alpha cannot be applied.\n"); - mpi_errno = - MPIDI_Reduce_intra_composition_alpha(sendbuf, recvbuf, count, datatype, op, root, - comm, coll_attr); - break; - case 2: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM - && MPIR_Comm_is_parent_comm(comm) && - MPIR_Op_is_commutative(op), mpi_errno, - "Reduce composition beta cannot be applied.\n"); - mpi_errno = - MPIDI_Reduce_intra_composition_beta(sendbuf, recvbuf, count, datatype, op, root, - comm, coll_attr); - break; - case 3: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Reduce composition gamma cannot be applied.\n"); - mpi_errno = - MPIDI_Reduce_intra_composition_gamma(sendbuf, recvbuf, count, datatype, op, root, - comm, coll_attr); - break; - default: - mpi_errno = - MPIDI_Reduce_allcomm_composition_json(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, coll_attr); - else - mpi_errno = - MPIDI_Reduce_intra_composition_gamma(sendbuf, recvbuf, count, datatype, op, root, comm, - coll_attr); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Reduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER, - .comm_ptr = comm, - - .u.reduce_scatter.sendbuf = sendbuf, - .u.reduce_scatter.recvbuf = recvbuf, - .u.reduce_scatter.recvcounts = recvcounts, - .u.reduce_scatter.datatype = datatype, - .u.reduce_scatter.op = op, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = - MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_scatter_intra_composition_alpha: - mpi_errno = - MPIDI_Reduce_scatter_intra_composition_alpha(sendbuf, recvbuf, recvcounts, - datatype, op, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Reduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK, - .comm_ptr = comm, - - .u.reduce_scatter_block.sendbuf = sendbuf, - .u.reduce_scatter_block.recvbuf = recvbuf, - .u.reduce_scatter_block.recvcount = recvcount, - .u.reduce_scatter_block.datatype = datatype, - .u.reduce_scatter_block.op = op, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = - MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm, - coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_scatter_block_intra_composition_alpha: - mpi_errno = - MPIDI_Reduce_scatter_block_intra_composition_alpha(sendbuf, recvbuf, recvcount, - datatype, op, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Scan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__SCAN, - .comm_ptr = comm, - - .u.scan.sendbuf = sendbuf, - .u.scan.recvbuf = recvbuf, - .u.scan.count = count, - .u.scan.datatype = datatype, - .u.scan.op = op, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scan_intra_composition_alpha: - mpi_errno = - MPIDI_Scan_intra_composition_alpha(sendbuf, recvbuf, count, - datatype, op, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scan_intra_composition_beta: - mpi_errno = - MPIDI_Scan_intra_composition_beta(sendbuf, recvbuf, count, - datatype, op, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Exscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__EXSCAN, - .comm_ptr = comm, - - .u.exscan.sendbuf = sendbuf, - .u.exscan.recvbuf = recvbuf, - .u.exscan.count = count, - .u.exscan.datatype = datatype, - .u.exscan.op = op, - }; - - MPIR_FUNC_ENTER; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr);; - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Exscan_intra_composition_alpha: - mpi_errno = - MPIDI_Exscan_intra_composition_alpha(sendbuf, recvbuf, count, - datatype, op, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_neighbor_allgather(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_neighbor_allgatherv(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_neighbor_alltoallv(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_alltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype * sendtypes, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype * recvtypes, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_neighbor_alltoallw(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Neighbor_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_neighbor_alltoall(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ineighbor_allgather(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ineighbor_allgatherv(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ineighbor_alltoall(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ineighbor_alltoallv(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, - req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ineighbor_alltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype * sendtypes, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype * recvtypes, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ineighbor_alltoallw(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, rdispls, recvtypes, - comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ibarrier(MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ibarrier(comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ibcast(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ibcast(buffer, count, datatype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Iallgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_iallgather(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Iallgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_iallgatherv(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Iallreduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_iallreduce(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ialltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ialltoall(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ialltoallv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ialltoallv(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ialltoallw(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype * sendtypes, void *recvbuf, - const MPI_Aint * recvcounts, const MPI_Aint * rdispls, - const MPI_Datatype * recvtypes, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ialltoallw(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Iexscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_iexscan(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Igather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_igather(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Igatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_igatherv(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ireduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint * recvcounts, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Ireduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Iscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_iscan(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Iscatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_iscatter(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPID_Iscatterv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_NM_mpi_iscatterv(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - #endif /* CH4_COLL_H_INCLUDED */ diff --git a/src/mpid/ch4/src/ch4_init.c b/src/mpid/ch4/src/ch4_init.c index c94c573cb1d..ee82c745287 100644 --- a/src/mpid/ch4/src/ch4_init.c +++ b/src/mpid/ch4/src/ch4_init.c @@ -171,20 +171,6 @@ === END_MPI_T_CVAR_INFO_BLOCK === */ -static const char *devcollstr(void) -{ - if (MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_all) { - return "all"; - } else if (MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_none) { - return "none"; - } else if (MPIR_CVAR_DEVICE_COLLECTIVES == MPIR_CVAR_DEVICE_COLLECTIVES_percoll) { - return "percoll"; - } else { - MPIR_Assert(0); - } - return NULL; -} - static void *create_container(struct json_object *obj) { MPIDI_Csel_container_s *cnt = MPL_malloc(sizeof(MPIDI_Csel_container_s), MPL_MEM_COLL); @@ -610,7 +596,6 @@ int MPID_Init(int requested, int *provided) fprintf(stdout, "==== Various sizes and limits ====\n"); fprintf(stdout, "sizeof(MPIDI_per_vci_t): %d\n", (int) sizeof(MPIDI_per_vci_t)); printf("==== collective selection ====\n"); - printf("MPIR_CVAR_DEVICE_COLLECTIVES: %s\n", devcollstr()); MPIR_Assert(MPIR_Csel_source); printf("MPIR: %s\n", MPIR_Csel_source); MPIR_Assert(MPIDI_global.csel_source); diff --git a/src/mpid/ch4/src/ch4_persist.c b/src/mpid/ch4/src/ch4_persist.c index 8b82e260d52..a10878a680d 100644 --- a/src/mpid/ch4/src/ch4_persist.c +++ b/src/mpid/ch4/src/ch4_persist.c @@ -166,345 +166,3 @@ int MPID_Recv_init(void *buf, fn_fail: goto fn_exit; } - -int MPID_Bcast_init(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Bcast_init_impl(buffer, count, datatype, root, comm_ptr, info_ptr, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Allreduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allreduce_init_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, info_ptr, - request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Reduce_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_init_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, - info_ptr, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Alltoall_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoall_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, info_ptr, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Alltoallv_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallv_init_impl(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, comm_ptr, info_ptr, - request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Alltoallw_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallw_init_impl(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, - recvcounts, rdispls, recvtypes, comm_ptr, info_ptr, - request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Allgather_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allgather_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, info_ptr, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Allgatherv_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Info * info_ptr, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allgatherv_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, comm_ptr, info_ptr, request); - - return mpi_errno; -} - -int MPID_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_scatter_block_init_impl(sendbuf, recvbuf, recvcount, datatype, op, comm, - info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Reduce_scatter_init(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_scatter_init_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm, - info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Scan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scan_init_impl(sendbuf, recvbuf, count, datatype, op, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Gather_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gather_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - root, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Gatherv_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint displs[], - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gatherv_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, - recvtype, root, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Scatter_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatter_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - root, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Scatterv_init(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint displs[], MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatterv_init_impl(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, - recvtype, root, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Barrier_init(MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Barrier_init_impl(comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Exscan_init(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Exscan_init_impl(sendbuf, recvbuf, count, datatype, op, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Neighbor_allgather_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgather_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Neighbor_allgatherv_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgatherv_init_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm, info, - request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Neighbor_alltoall_init(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoall_init_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, info, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Neighbor_alltoallv_init(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallv_init_impl(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, comm, info, - request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -int MPID_Neighbor_alltoallw_init(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Info * info, MPIR_Request ** request) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallw_init_impl(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, - recvcounts, rdispls, recvtypes, comm, info, - request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} From 7ca9a6377fa34b9ade41d554991cfe24bdc0ac67 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sun, 24 Aug 2025 21:05:49 -0500 Subject: [PATCH 03/47] maint: move json_gen.sh to top of maint folder Do not hide the script. Move it to maint/ as the reset of the autogen scripts. --- autogen.sh | 2 +- maint/{tuning/coll => }/json_gen.sh | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename maint/{tuning/coll => }/json_gen.sh (100%) diff --git a/autogen.sh b/autogen.sh index 0c98ebe7005..83abcf9a6d8 100755 --- a/autogen.sh +++ b/autogen.sh @@ -531,7 +531,7 @@ fn_gen_binding_c() { fn_json_gen() { echo_n "generating json char arrays... " - ./maint/tuning/coll/json_gen.sh + ./maint/json_gen.sh echo "done" } diff --git a/maint/tuning/coll/json_gen.sh b/maint/json_gen.sh similarity index 100% rename from maint/tuning/coll/json_gen.sh rename to maint/json_gen.sh From 64598d463fde978c3bd4fd7c078f3b4eefa8ec3b Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 18:56:46 -0500 Subject: [PATCH 04/47] ch4/coll: remove device-layer json selections We will add the mechanism of selecting device-layer algorithms later. --- src/mpid/ch4/src/ch4_coll.h | 414 ------------------------------------ 1 file changed, 414 deletions(-) diff --git a/src/mpid/ch4/src/ch4_coll.h b/src/mpid/ch4/src/ch4_coll.h index 0f8fb1714ff..d497fab8f26 100644 --- a/src/mpid/ch4/src/ch4_coll.h +++ b/src/mpid/ch4/src/ch4_coll.h @@ -99,106 +99,6 @@ === END_MPI_T_CVAR_INFO_BLOCK === */ -MPL_STATIC_INLINE_PREFIX int MPIDI_Barrier_allcomm_composition_json(MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__BARRIER, - .comm_ptr = comm, - }; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Barrier_impl(comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Barrier_intra_composition_alpha: - mpi_errno = MPIDI_Barrier_intra_composition_alpha(comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Barrier_intra_composition_beta: - mpi_errno = MPIDI_Barrier_intra_composition_beta(comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_allcomm_composition_json(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__BCAST, - .comm_ptr = comm, - .u.bcast.buffer = buffer, - .u.bcast.count = count, - .u.bcast.datatype = datatype, - .u.bcast.root = root, - }; - - const MPIDI_Csel_container_s *cnt = NULL; - - if (MPIR_CVAR_COLL_HYBRID_MEMORY) { - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - } else { - /* In no hybird case, local memory type can be used to select algorithm */ - MPL_pointer_attr_t pointer_attr; - MPIR_GPU_query_pointer_attr(buffer, &pointer_attr); - if (MPL_gpu_attr_is_strict_dev(&pointer_attr)) { - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm_gpu), coll_sig); - } else { - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - } - } - if (cnt == NULL) { - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_alpha: - mpi_errno = - MPIDI_Bcast_intra_composition_alpha(buffer, count, datatype, root, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_beta: - mpi_errno = - MPIDI_Bcast_intra_composition_beta(buffer, count, datatype, root, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_gamma: - mpi_errno = - MPIDI_Bcast_intra_composition_gamma(buffer, count, datatype, root, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_delta: - mpi_errno = - MPIDI_Bcast_intra_composition_delta(buffer, count, datatype, root, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - MPL_STATIC_INLINE_PREFIX void MPIDI_Allreduce_fill_multi_leads_info(MPIR_Comm * comm) { if (MPIDI_COMM(comm, allreduce_comp_info) == NULL) { @@ -221,85 +121,6 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_Allreduce_fill_multi_leads_info(MPIR_Comm * } -MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_allcomm_composition_json(const void *sendbuf, - void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - int num_leads = 0, node_comm_size = 0; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLREDUCE, - .comm_ptr = comm, - - .u.allreduce.sendbuf = sendbuf, - .u.allreduce.recvbuf = recvbuf, - .u.allreduce.count = count, - .u.allreduce.datatype = datatype, - .u.allreduce.op = op, - }; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allreduce_intra_composition_alpha: - mpi_errno = - MPIDI_Allreduce_intra_composition_alpha(sendbuf, recvbuf, count, datatype, op, - comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allreduce_intra_composition_beta: - mpi_errno = - MPIDI_Allreduce_intra_composition_beta(sendbuf, recvbuf, count, datatype, op, - comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allreduce_intra_composition_gamma: - mpi_errno = - MPIDI_Allreduce_intra_composition_gamma(sendbuf, recvbuf, count, datatype, op, - comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allreduce_intra_composition_delta: - if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) { - MPIDI_Allreduce_fill_multi_leads_info(comm); - if (comm->node_comm) - node_comm_size = MPIR_Comm_size(comm->node_comm); - /* Reset number of leaders, so that (node_comm_size % num_leads) is zero. The new number of - * leaders must lie within a range +/- from the leaders specified, or every rank is made - * as a leader. Currently we use range as 15. */ - num_leads = - MPL_round_closest_multiple(node_comm_size, MPIR_CVAR_NUM_MULTI_LEADS, 15); - } - /* make sure that the algo can be run */ - if (MPIDI_COMM_ALLREDUCE(comm, use_multi_leads) == 1 && - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM && - count >= num_leads && MPIR_Op_is_commutative(op)) { - mpi_errno = - MPIDI_Allreduce_intra_composition_delta(sendbuf, recvbuf, count, datatype, op, - num_leads, comm, coll_attr); - } else - mpi_errno = - MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - MPL_STATIC_INLINE_PREFIX void MPIDI_Allgather_fill_multi_leads_info(MPIR_Comm * comm) { if (MPIDI_COMM(comm, allgather_comp_info) == NULL) { @@ -318,95 +139,6 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_Allgather_fill_multi_leads_info(MPIR_Comm * } } -MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_allcomm_composition_json(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPI_Aint type_size, data_size; - const MPIDI_Csel_container_s *cnt = NULL; - - if (sendbuf != MPI_IN_PLACE) { - MPIR_Datatype_get_size_macro(sendtype, type_size); - data_size = sendcount * type_size; - } else { - MPIR_Datatype_get_size_macro(recvtype, type_size); - data_size = recvcount * type_size; - } - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLGATHER, - .comm_ptr = comm, - - .u.allgather.sendbuf = sendbuf, - .u.allgather.sendcount = sendcount, - .u.allgather.sendtype = sendtype, - .u.allgather.recvbuf = recvbuf, - .u.allgather.recvcount = recvcount, - .u.allgather.recvtype = recvtype, - }; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = - MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, - coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allgather_intra_composition_alpha: - /* make sure that the algo can be run */ - if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) - MPIDI_Allgather_fill_multi_leads_info(comm); - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - MPIDI_COMM_ALLGATHER(comm, use_multi_leads) == 1 && - data_size <= MPIR_CVAR_ALLGATHER_SHM_PER_RANK, mpi_errno, - "Allgather composition alpha cannot be applied.\n"); - mpi_errno = - MPIDI_Allgather_intra_composition_alpha(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allgather_intra_composition_beta: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Allgather composition beta cannot be applied.\n"); - mpi_errno = - MPIDI_Allgather_intra_composition_beta(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = - MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, - coll_attr); - else - mpi_errno = - MPIDI_Allgather_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, coll_attr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - MPL_STATIC_INLINE_PREFIX void MPIDI_Alltoall_fill_multi_leads_info(MPIR_Comm * comm) { if (MPIDI_COMM(comm, alltoall_comp_info) == NULL) { @@ -425,150 +157,4 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_Alltoall_fill_multi_leads_info(MPIR_Comm * c } } -MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_allcomm_composition_json(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPI_Aint type_size, data_size; - - if (sendbuf != MPI_IN_PLACE) { - MPIR_Datatype_get_size_macro(sendtype, type_size); - data_size = sendcount * type_size; - } else { - MPIR_Datatype_get_size_macro(recvtype, type_size); - data_size = recvcount * type_size; - } - - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALL, - .comm_ptr = comm, - - .u.alltoall.sendbuf = sendbuf, - .u.alltoall.sendcount = sendcount, - .u.alltoall.sendtype = sendtype, - .u.alltoall.recvcount = recvcount, - .u.alltoall.recvbuf = recvbuf, - .u.alltoall.recvtype = recvtype, - }; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_alpha: - if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) - MPIDI_Alltoall_fill_multi_leads_info(comm); - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, comm->comm_kind == MPIR_COMM_KIND__INTRACOMM - && MPIDI_COMM_ALLTOALL(comm, use_multi_leads) == 1 && - data_size <= MPIR_CVAR_ALLTOALL_SHM_PER_RANK, mpi_errno, - "Alltoall composition alpha cannot be applied.\n"); - mpi_errno = - MPIDI_Alltoall_intra_composition_alpha(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; - - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_beta: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, - comm->comm_kind == MPIR_COMM_KIND__INTRACOMM, mpi_errno, - "Alltoall composition beta cannot be applied.\n"); - mpi_errno = - MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, - coll_attr); - break; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - if (comm->comm_kind == MPIR_COMM_KIND__INTERCOMM) - mpi_errno = - MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, - coll_attr); - else - mpi_errno = MPIDI_Alltoall_intra_composition_beta(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, coll_attr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_allcomm_composition_json(const void *sendbuf, - void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - const MPIDI_Csel_container_s *cnt = NULL; - - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__REDUCE, - .comm_ptr = comm, - - .u.reduce.sendbuf = sendbuf, - .u.reduce.recvbuf = recvbuf, - .u.reduce.count = count, - .u.reduce.datatype = datatype, - .u.reduce.op = op, - .u.reduce.root = root, - }; - - cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig); - - if (cnt == NULL) { - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - } - - switch (cnt->id) { - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_alpha: - mpi_errno = - MPIDI_Reduce_intra_composition_alpha(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_beta: - mpi_errno = - MPIDI_Reduce_intra_composition_beta(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - case MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_gamma: - mpi_errno = - MPIDI_Reduce_intra_composition_gamma(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - #endif /* CH4_COLL_H_INCLUDED */ From 50c030615fdcbdb46f2702128d4c21710fddf85f Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 19:26:09 -0500 Subject: [PATCH 05/47] ch4: disable the code that call netmod/shm collectives Temporarily comment out the composition code that calls netmod/shm collectives since we will remove these apis next. Some NULL composition functions are removed. --- src/mpid/ch4/src/ch4_coll_impl.h | 388 ++++++++++--------------------- 1 file changed, 126 insertions(+), 262 deletions(-) diff --git a/src/mpid/ch4/src/ch4_coll_impl.h b/src/mpid/ch4/src/ch4_coll_impl.h index af4fcb1379f..2c3a8d898ef 100644 --- a/src/mpid/ch4/src/ch4_coll_impl.h +++ b/src/mpid/ch4/src/ch4_coll_impl.h @@ -95,11 +95,14 @@ static void MPIDI_Coll_host_buffer_genq_free(void *sendbuf, void *recvbuf, MPI_A } } -static void MPIDI_Coll_host_buffer_genq_alloc(const void *sendbuf, const void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - void **host_sendbuf, void **host_recvbuf, - MPL_pointer_attr_t send_attr, - MPL_pointer_attr_t recv_attr, MPI_Aint shift) +MPL_STATIC_INLINE_PREFIX void MPIDI_Coll_host_buffer_genq_alloc(const void *sendbuf, + const void *recvbuf, MPI_Aint count, + MPI_Datatype datatype, + void **host_sendbuf, + void **host_recvbuf, + MPL_pointer_attr_t send_attr, + MPL_pointer_attr_t recv_attr, + MPI_Aint shift) { void *tmp_send = NULL, *tmp_recv = NULL; if (sendbuf != MPI_IN_PLACE) { @@ -145,8 +148,8 @@ static void MPIDI_Coll_host_buffer_genq_alloc(const void *sendbuf, const void *r goto fn_exit; } -static void MPIDI_Coll_calculate_size_shift(MPI_Aint count, MPI_Datatype datatype, MPI_Aint * size, - MPI_Aint * shift) +MPL_STATIC_INLINE_PREFIX void MPIDI_Coll_calculate_size_shift(MPI_Aint count, MPI_Datatype datatype, + MPI_Aint * size, MPI_Aint * shift) { MPI_Aint true_lb, true_extent, extent; @@ -166,6 +169,11 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Barrier_intra_composition_alpha(MPIR_Comm * c { int mpi_errno = MPI_SUCCESS; +#if 1 + /* FIXME: call MPIR_Coll_auto or MPIR_Barrier_impl on sub-comms */ + MPIR_Assert(0); + return mpi_errno; +#else /* do the intranode barrier on all nodes */ if (comm->node_comm != NULL) { #ifndef MPIDI_CH4_DIRECT_NETMOD @@ -201,19 +209,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Barrier_intra_composition_alpha(MPIR_Comm * c return mpi_errno; fn_fail: goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Barrier_intra_composition_beta(MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIDI_NM_mpi_barrier(comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_alpha(void *buffer, MPI_Aint count, @@ -222,6 +218,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_alpha(void *buffer, M int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else void *host_buffer = NULL; void *saved_buffer = buffer; MPL_pointer_attr_t attr; @@ -308,6 +310,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_alpha(void *buffer, M return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_beta(void *buffer, MPI_Aint count, @@ -316,6 +319,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_beta(void *buffer, MP int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else void *host_buffer = NULL; void *saved_buffer = buffer; MPL_pointer_attr_t attr; @@ -377,6 +386,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_beta(void *buffer, MP return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_gamma(void *buffer, MPI_Aint count, @@ -385,6 +395,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_gamma(void *buffer, M int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else void *host_buffer = NULL; void *saved_buffer = buffer; MPL_pointer_attr_t attr; @@ -420,6 +436,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_gamma(void *buffer, M return mpi_errno; fn_fail: goto fn_exit; +#endif } /* @@ -436,6 +453,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_delta(void *buffer, M int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else void *host_buffer = NULL; void *saved_buffer = buffer; MPL_pointer_attr_t attr; @@ -527,6 +550,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_delta(void *buffer, M return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_alpha(const void *sendbuf, @@ -537,6 +561,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_alpha(const void int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else void *in_recvbuf = recvbuf; void *host_sendbuf = NULL; void *host_recvbuf = NULL; @@ -623,6 +653,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_alpha(const void return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_beta(const void *sendbuf, @@ -632,6 +663,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_beta(const void * MPIR_Comm * comm, int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else void *in_recvbuf = recvbuf; void *host_sendbuf = NULL; void *host_recvbuf = NULL; @@ -671,6 +708,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_beta(const void * return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_gamma(const void *sendbuf, @@ -681,6 +719,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_gamma(const void int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else void *in_recvbuf = recvbuf; void *host_sendbuf = NULL; void *host_recvbuf = NULL; @@ -723,6 +767,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_gamma(const void return mpi_errno; fn_fail: goto fn_exit; +#endif } /* Multi-leaders based composition. Have num_leaders per node, which reduce the data within @@ -741,6 +786,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_delta(const void int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else char *shm_addr; int my_leader_rank = -1, iter; MPI_Aint num_chunks, chunk_size_floor, chunk_size_ceil; @@ -934,6 +985,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_delta(const void return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_intra_composition_alpha(const void *sendbuf, @@ -943,6 +995,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_intra_composition_alpha(const void *se MPIR_Comm * comm, int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else MPI_Aint true_lb = 0; MPI_Aint true_extent = 0; MPI_Aint extent = 0; @@ -1017,6 +1075,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_intra_composition_alpha(const void *se return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_intra_composition_beta(const void *sendbuf, @@ -1026,6 +1085,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_intra_composition_beta(const void *sen MPIR_Comm * comm, int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else MPI_Aint true_lb = 0; MPI_Aint true_extent = 0; MPI_Aint extent = 0; @@ -1120,24 +1185,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_intra_composition_beta(const void *sen fn_fail: goto fn_exit; -} - - -MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_intra_composition_gamma(const void *sendbuf, - void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, - MPI_Op op, int root, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIDI_NM_mpi_reduce(sendbuf, recvbuf, count, datatype, op, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; +#endif } /* Node-aware multi-leaders based inter-node and intra-node composition. Each rank on a node places @@ -1153,6 +1201,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_intra_composition_alpha(const void * int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else int num_nodes; int num_ranks = MPIR_Comm_size(comm_ptr); int node_comm_size = MPIR_Comm_size(comm_ptr->node_comm); @@ -1248,6 +1302,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_intra_composition_alpha(const void * return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_intra_composition_beta(const void *sendbuf, @@ -1260,6 +1315,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_intra_composition_beta(const void *s int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else int node_comm_size = 0; if (comm_ptr->node_comm != NULL) { node_comm_size = MPIR_Comm_size(comm_ptr->node_comm); @@ -1288,58 +1349,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoall_intra_composition_beta(const void *s return mpi_errno; fn_fail: goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoallv_intra_composition_alpha(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_alltoallv(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr, - coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Alltoallw_intra_composition_alpha(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype - sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype - recvtypes[], - MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_alltoallw(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_intra_composition_alpha(const void *sendbuf, @@ -1352,6 +1362,11 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_intra_composition_alpha(const void int coll_attr) { int mpi_errno = MPI_SUCCESS; +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else int node_comm_size = MPIR_Comm_size(comm_ptr->node_comm); int my_node_comm_rank = MPIR_Comm_rank(comm_ptr->node_comm); MPI_Aint type_size, extent, true_extent, lb; @@ -1439,6 +1454,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_intra_composition_alpha(const void return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_intra_composition_beta(const void *sendbuf, @@ -1451,6 +1467,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_intra_composition_beta(const void * int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else int node_comm_size = 0; if (comm_ptr->node_comm != NULL) { node_comm_size = MPIR_Comm_size(comm_ptr->node_comm); @@ -1479,6 +1501,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allgather_intra_composition_beta(const void * return mpi_errno; fn_fail: goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Allgatherv_intra_composition_alpha(const void *sendbuf, @@ -1492,6 +1515,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allgatherv_intra_composition_alpha(const void int coll_attr) { int mpi_errno = MPI_SUCCESS; + +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else int node_comm_size = 0; if (comm_ptr->node_comm != NULL) { node_comm_size = MPIR_Comm_size(comm_ptr->node_comm); @@ -1520,141 +1549,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allgatherv_intra_composition_alpha(const void return mpi_errno; fn_fail: goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Gather_intra_composition_alpha(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Gatherv_intra_composition_alpha(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Scatter_intra_composition_alpha(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Scatterv_intra_composition_alpha(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, - MPI_Datatype sendtype, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_scatter_intra_composition_alpha(const void *sendbuf, - void *recvbuf, - const MPI_Aint - recvcounts[], - MPI_Datatype - datatype, MPI_Op op, - MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_reduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, - coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Reduce_scatter_block_intra_composition_alpha(const void - *sendbuf, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype - datatype, - MPI_Op op, - MPIR_Comm * - comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = - MPIDI_NM_mpi_reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, - op, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; +#endif } MPL_STATIC_INLINE_PREFIX int MPIDI_Scan_intra_composition_alpha(const void *sendbuf, @@ -1665,6 +1560,11 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Scan_intra_composition_alpha(const void *send MPIR_Comm * comm_ptr, int coll_attr) { int mpi_errno = MPI_SUCCESS; +#if 1 + /* FIXME */ + MPIR_Assert(0); + return mpi_errno; +#else int rank = comm_ptr->rank; MPI_Status status; void *tempbuf = NULL; @@ -1797,43 +1697,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Scan_intra_composition_alpha(const void *send return mpi_errno; fn_fail: goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Scan_intra_composition_beta(const void *sendbuf, - void *recvbuf, - MPI_Aint count, - MPI_Datatype datatype, - MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIDI_NM_mpi_scan(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_Exscan_intra_composition_alpha(const void *sendbuf, - void *recvbuf, - MPI_Aint count, - MPI_Datatype datatype, - MPI_Op op, - MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIDI_NM_mpi_exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; +#endif } #endif /* CH4_COLL_IMPL_H_INCLUDED */ From 39b626d8de082983ad1bad881eb7900a84a22d96 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 18:58:56 -0500 Subject: [PATCH 06/47] ch4/api: remove netmod/shm collective api --- src/mpid/ch4/ch4_api.txt | 133 -- .../ch4/netmod/include/netmod_am_fallback.h | 1 - .../netmod/include/netmod_am_fallback_coll.h | 442 ------- src/mpid/ch4/netmod/ofi/ofi_coll.h | 856 ------------- src/mpid/ch4/netmod/ucx/ucx_coll.h | 744 ------------ src/mpid/ch4/shm/posix/posix_coll.h | 1082 ----------------- src/mpid/ch4/shm/src/Makefile.mk | 2 - src/mpid/ch4/shm/src/shm_am_fallback.h | 1 - src/mpid/ch4/shm/src/shm_am_fallback_coll.h | 450 ------- src/mpid/ch4/shm/src/shm_coll.h | 755 ------------ 10 files changed, 4466 deletions(-) delete mode 100644 src/mpid/ch4/netmod/include/netmod_am_fallback_coll.h delete mode 100644 src/mpid/ch4/shm/src/shm_am_fallback_coll.h delete mode 100644 src/mpid/ch4/shm/src/shm_coll.h diff --git a/src/mpid/ch4/ch4_api.txt b/src/mpid/ch4/ch4_api.txt index 631144bd573..623ef6445a0 100644 --- a/src/mpid/ch4/ch4_api.txt +++ b/src/mpid/ch4/ch4_api.txt @@ -278,138 +278,6 @@ Native API: NM*: assert, win rank_is_local : int NM*: target, comm - mpi_barrier : int - NM*: comm, coll_attr - SHM*: comm, coll_attr - mpi_bcast : int - NM*: buffer, count, datatype, root, comm, coll_attr - SHM*: buffer, count, datatype, root, comm, coll_attr - mpi_allreduce : int - NM*: sendbuf, recvbuf, count, datatype, op, comm, coll_attr - SHM*: sendbuf, recvbuf, count, datatype, op, comm, coll_attr - mpi_allgather : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, coll_attr - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, coll_attr - mpi_allgatherv : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, coll_attr - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, coll_attr - mpi_scatter : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, coll_attr - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, coll_attr - mpi_scatterv : int - NM*: sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, coll_attr - SHM*: sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, coll_attr - mpi_gather : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, coll_attr - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, coll_attr - mpi_gatherv : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, coll_attr - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, coll_attr - mpi_alltoall : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, coll_attr - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, coll_attr - mpi_alltoallv : int - NM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, coll_attr - SHM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, coll_attr - mpi_alltoallw : int - NM*: sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, coll_attr - SHM*: sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, coll_attr - mpi_reduce : int - NM*: sendbuf, recvbuf, count, datatype, op, root, comm_ptr, coll_attr - SHM*: sendbuf, recvbuf, count, datatype, op, root, comm_ptr, coll_attr - mpi_reduce_scatter : int - NM*: sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, coll_attr - SHM*: sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, coll_attr - mpi_reduce_scatter_block : int - NM*: sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, coll_attr - SHM*: sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, coll_attr - mpi_scan : int - NM*: sendbuf, recvbuf, count, datatype, op, comm, coll_attr - SHM*: sendbuf, recvbuf, count, datatype, op, comm, coll_attr - mpi_exscan : int - NM*: sendbuf, recvbuf, count, datatype, op, comm, coll_attr - SHM*: sendbuf, recvbuf, count, datatype, op, comm, coll_attr - mpi_neighbor_allgather : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm - mpi_neighbor_allgatherv : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm - mpi_neighbor_alltoallv : int - NM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm - SHM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm - mpi_neighbor_alltoallw : int - NM*: sendbuf, sendcounts, sdispls-2, sendtypes, recvbuf, recvcounts, rdispls-2, recvtypes, comm - SHM*: sendbuf, sendcounts, sdispls-2, sendtypes, recvbuf, recvcounts, rdispls-2, recvtypes, comm - mpi_neighbor_alltoall : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm - mpi_ineighbor_allgather : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - mpi_ineighbor_allgatherv : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, req_p - mpi_ineighbor_alltoall : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - mpi_ineighbor_alltoallv : int - NM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, req_p - SHM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, req_p - mpi_ineighbor_alltoallw : int - NM*: sendbuf, sendcounts, sdispls-2, sendtypes, recvbuf, recvcounts, rdispls-2, recvtypes, comm, req_p - SHM*: sendbuf, sendcounts, sdispls-2, sendtypes, recvbuf, recvcounts, rdispls-2, recvtypes, comm, req_p - mpi_ibarrier : int - NM*: comm, req_p - SHM*: comm, req_p - mpi_ibcast : int - NM*: buffer, count, datatype, root, comm, req_p - SHM*: buffer, count, datatype, root, comm, req_p - mpi_iallgather : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - mpi_iallgatherv : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, req_p - mpi_iallreduce : int - NM*: sendbuf, recvbuf, count, datatype, op, comm, req_p - SHM*: sendbuf, recvbuf, count, datatype, op, comm, req_p - mpi_ialltoall : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, req_p - mpi_ialltoallv : int - NM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, req_p - SHM*: sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, req_p - mpi_ialltoallw : int - NM*: sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req_p - SHM*: sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req_p - mpi_iexscan : int - NM*: sendbuf, recvbuf, count, datatype, op, comm, req_p - SHM*: sendbuf, recvbuf, count, datatype, op, comm, req_p - mpi_igather : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, req_p - mpi_igatherv : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, req_p - mpi_ireduce_scatter_block : int - NM*: sendbuf, recvbuf, recvcount, datatype, op, comm, req_p - SHM*: sendbuf, recvbuf, recvcount, datatype, op, comm, req_p - mpi_ireduce_scatter : int - NM*: sendbuf, recvbuf, recvcounts, datatype, op, comm, req_p - SHM*: sendbuf, recvbuf, recvcounts, datatype, op, comm, req_p - mpi_ireduce : int - NM*: sendbuf, recvbuf, count, datatype, op, root, comm_ptr, req_p - SHM*: sendbuf, recvbuf, count, datatype, op, root, comm_ptr, req_p - mpi_iscan : int - NM*: sendbuf, recvbuf, count, datatype, op, comm, req_p - SHM*: sendbuf, recvbuf, count, datatype, op, comm, req_p - mpi_iscatter : int - NM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, req_p - SHM*: sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, req_p - mpi_iscatterv : int - NM*: sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, req_p - SHM*: sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, req_p mpi_type_commit_hook : int NM : datatype_p SHM : type @@ -457,7 +325,6 @@ PARAM: disp_unit_p: int * displs: const MPI_Aint * dst_vci: int - coll_attr: int flag: int * group: MPIR_Group * handler_id: int diff --git a/src/mpid/ch4/netmod/include/netmod_am_fallback.h b/src/mpid/ch4/netmod/include/netmod_am_fallback.h index fd708415fa7..443ca7486c0 100644 --- a/src/mpid/ch4/netmod/include/netmod_am_fallback.h +++ b/src/mpid/ch4/netmod/include/netmod_am_fallback.h @@ -10,7 +10,6 @@ #include "netmod_am_fallback_recv.h" #include "netmod_am_fallback_probe.h" #include "netmod_am_fallback_rma.h" -#include "netmod_am_fallback_coll.h" #include "netmod_am_fallback_part.h" #endif /* NETMOD_AM_FALLBACK_H_INCLUDED */ diff --git a/src/mpid/ch4/netmod/include/netmod_am_fallback_coll.h b/src/mpid/ch4/netmod/include/netmod_am_fallback_coll.h deleted file mode 100644 index 2c004d4773f..00000000000 --- a/src/mpid/ch4/netmod/include/netmod_am_fallback_coll.h +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#ifndef NETMOD_AM_FALLBACK_COLL_H_INCLUDED -#define NETMOD_AM_FALLBACK_COLL_H_INCLUDED - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_barrier(MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Barrier_impl(comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_bcast(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Bcast_impl(buffer, count, datatype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_gather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Gather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_gatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Scatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scatterv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, - coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_exscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ibarrier(MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ibarrier_impl(comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ibcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ibcast_impl(buffer, count, datatype, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Iallgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Iallgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** request) -{ - return MPIR_Iallreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, request); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ialltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ialltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ialltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iexscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Iexscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_igather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Igather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_igatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Igatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ireduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ireduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Iscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - MPIR_Request ** request) -{ - return MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, request); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Request ** request) -{ - return MPIR_Iscatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, request); -} - -#endif /* NETMOD_AM_FALLBACK_COLL_H_INCLUDED */ diff --git a/src/mpid/ch4/netmod/ofi/ofi_coll.h b/src/mpid/ch4/netmod/ofi/ofi_coll.h index 9fe52f2b35c..f394996e254 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_coll.h +++ b/src/mpid/ch4/netmod/ofi/ofi_coll.h @@ -30,860 +30,4 @@ === END_MPI_T_CVAR_INFO_BLOCK === */ -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_barrier(MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Barrier_impl(comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -static inline int MPIDI_OFI_bcast_json(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_bcast(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - enum fi_datatype fi_dt; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_BCAST_OFI_INTRA_ALGORITHM) { - case MPIR_CVAR_BCAST_OFI_INTRA_ALGORITHM_trigger_tree_tagged: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, MPIDI_OFI_ENABLE_TRIGGERED && - MPIDI_OFI_ENABLE_DATA_AUTO_PROGRESS && - MPIDI_OFI_datatype_to_ofi(datatype, &fi_dt) != -1, - mpi_errno, - "Bcast triggered_tagged cannot be applied.\n"); - mpi_errno = - MPIDI_OFI_Bcast_intra_triggered_tagged(buffer, count, datatype, root, comm, - MPIR_Bcast_tree_type, - MPIR_CVAR_BCAST_TREE_KVAL); - break; - case MPIR_CVAR_BCAST_OFI_INTRA_ALGORITHM_trigger_tree_rma: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, MPIDI_OFI_ENABLE_TRIGGERED && - MPIDI_OFI_ENABLE_DATA_AUTO_PROGRESS && - MPIDI_OFI_datatype_to_ofi(datatype, &fi_dt) != -1, - mpi_errno, "Bcast triggered_rma cannot be applied.\n"); - mpi_errno = - MPIDI_OFI_Bcast_intra_triggered_rma(buffer, count, datatype, root, comm, - MPIR_Bcast_tree_type, - MPIR_CVAR_BCAST_TREE_KVAL); - break; - case MPIR_CVAR_BCAST_OFI_INTRA_ALGORITHM_mpir: - goto fallback; - case MPIR_CVAR_BCAST_OFI_INTRA_ALGORITHM_auto: - mpi_errno = MPIDI_OFI_bcast_json(buffer, count, datatype, root, comm, coll_attr); - break; - default: - MPIR_Assert(0); - } - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_gather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_gatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; - -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scatterv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype, recvbuf, - recvcount, recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = - MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_exscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = - MPIR_Neighbor_allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ibarrier(MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ibarrier_impl(comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ibcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ibcast_impl(buffer, count, datatype, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iexscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iexscan_impl(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_igather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Igather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_igatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Igatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscan_impl(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - #endif /* OFI_COLL_H_INCLUDED */ diff --git a/src/mpid/ch4/netmod/ucx/ucx_coll.h b/src/mpid/ch4/netmod/ucx/ucx_coll.h index 5067fc22981..cf0f0274cff 100644 --- a/src/mpid/ch4/netmod/ucx/ucx_coll.h +++ b/src/mpid/ch4/netmod/ucx/ucx_coll.h @@ -11,748 +11,4 @@ #include "../../../common/hcoll/hcoll.h" #endif -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_barrier(MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - -#ifdef HAVE_HCOLL - mpi_errno = hcoll_Barrier(comm_ptr, coll_attr); - if (mpi_errno != MPI_SUCCESS) -#endif - { - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); - } - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_bcast(void *buffer, MPI_Aint count, MPI_Datatype datatype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - -#ifdef HAVE_HCOLL - mpi_errno = hcoll_Bcast(buffer, count, datatype, root, comm_ptr, coll_attr); - if (mpi_errno != MPI_SUCCESS) -#endif - { - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm_ptr, coll_attr); - } - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - -#ifdef HAVE_HCOLL - mpi_errno = hcoll_Allreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); - if (mpi_errno != MPI_SUCCESS) -#endif - { - mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); - } - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - -#ifdef HAVE_HCOLL - mpi_errno = hcoll_Allgather(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); - if (mpi_errno != MPI_SUCCESS) -#endif - { - mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); - } - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_gather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_gatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scatterv(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - -#ifdef HAVE_HCOLL - mpi_errno = hcoll_Alltoall(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); - if (mpi_errno != MPI_SUCCESS) -#endif - { - mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); - } - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - -#ifdef HAVE_HCOLL - mpi_errno = hcoll_Alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, comm_ptr, coll_attr); - if (mpi_errno != MPI_SUCCESS) -#endif - { - mpi_errno = MPIR_Alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm_ptr, - coll_attr); - } - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - -#ifdef HAVE_HCOLL - mpi_errno = hcoll_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, coll_attr); - if (mpi_errno != MPI_SUCCESS) -#endif - { - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, - coll_attr); - } - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, - datatype, op, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_reduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_scan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_exscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm_ptr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm_ptr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_neighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, - recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ineighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ibarrier(MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ibarrier_impl(comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ibcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ibcast_impl(buffer, count, datatype, root, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iallreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ialltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iexscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iexscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_igather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Igather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_igatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Igatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_scatter_impl(sendbuf, recvbuf, recvcounts, - datatype, op, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_ireduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_NM_mpi_iscatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - #endif /* UCX_COLL_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/posix/posix_coll.h b/src/mpid/ch4/shm/posix/posix_coll.h index f45015d25f8..da671fa0191 100644 --- a/src/mpid/ch4/shm/posix/posix_coll.h +++ b/src/mpid/ch4/shm/posix/posix_coll.h @@ -147,1086 +147,4 @@ === END_MPI_T_CVAR_INFO_BLOCK === */ - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_barrier(MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__BARRIER, - .comm_ptr = comm, - }; - MPIDI_POSIX_csel_container_s *cnt; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_BARRIER_POSIX_INTRA_ALGORITHM) { - case MPIR_CVAR_BARRIER_POSIX_INTRA_ALGORITHM_release_gather: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, !MPIR_IS_THREADED, mpi_errno, - "Barrier release_gather cannot be applied.\n"); - mpi_errno = MPIDI_POSIX_mpi_barrier_release_gather(comm, coll_attr); - break; - - case MPIR_CVAR_BARRIER_POSIX_INTRA_ALGORITHM_mpir: - goto fallback; - - case MPIR_CVAR_BARRIER_POSIX_INTRA_ALGORITHM_auto: - cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig); - if (cnt == NULL) - goto fallback; - - switch (cnt->id) { - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_barrier_release_gather: - mpi_errno = - MPIDI_POSIX_mpi_barrier_release_gather(comm, coll_attr); - break; - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_impl: - goto fallback; - default: - MPIR_Assert(0); - } - break; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Barrier_impl(comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__BCAST, - .comm_ptr = comm, - .u.bcast.buffer = buffer, - .u.bcast.count = count, - .u.bcast.datatype = datatype, - .u.bcast.root = root, - }; - MPIDI_POSIX_csel_container_s *cnt; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_BCAST_POSIX_INTRA_ALGORITHM) { - case MPIR_CVAR_BCAST_POSIX_INTRA_ALGORITHM_release_gather: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, !MPIR_IS_THREADED, mpi_errno, - "Bcast release_gather cannot be applied.\n"); - mpi_errno = - MPIDI_POSIX_mpi_bcast_release_gather(buffer, count, datatype, root, comm, - coll_attr); - break; - - case MPIR_CVAR_BCAST_POSIX_INTRA_ALGORITHM_ipc_read: - mpi_errno = - MPIDI_POSIX_mpi_bcast_gpu_ipc_read(buffer, count, datatype, root, comm, coll_attr); - break; - - case MPIR_CVAR_BCAST_POSIX_INTRA_ALGORITHM_mpir: - goto fallback; - - case MPIR_CVAR_BCAST_POSIX_INTRA_ALGORITHM_auto: - if (MPIR_CVAR_COLL_HYBRID_MEMORY) { - cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig); - } else { - /* In no hybird case, local memory type can be used to select algorithm */ - MPL_pointer_attr_t pointer_attr; - MPIR_GPU_query_pointer_attr(buffer, &pointer_attr); - if (MPL_gpu_attr_is_strict_dev(&pointer_attr)) { - cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm_gpu), coll_sig); - } else { - cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig); - } - } - if (cnt == NULL) - goto fallback; - - switch (cnt->id) { - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_bcast_release_gather: - mpi_errno = - MPIDI_POSIX_mpi_bcast_release_gather(buffer, count, datatype, root, comm, - coll_attr); - break; - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_bcast_ipc_read: - mpi_errno = - MPIDI_POSIX_mpi_bcast_gpu_ipc_read(buffer, count, datatype, root, comm, - coll_attr); - break; - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_impl: - goto fallback; - default: - MPIR_Assert(0); - } - break; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLREDUCE, - .comm_ptr = comm, - .u.allreduce.sendbuf = sendbuf, - .u.allreduce.recvbuf = recvbuf, - .u.allreduce.count = count, - .u.allreduce.datatype = datatype, - .u.allreduce.op = op, - }; - MPIDI_POSIX_csel_container_s *cnt; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_ALLREDUCE_POSIX_INTRA_ALGORITHM) { - case MPIR_CVAR_ALLREDUCE_POSIX_INTRA_ALGORITHM_release_gather: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, !MPIR_IS_THREADED && - MPIR_Op_is_commutative(op), mpi_errno, - "Allreduce release_gather cannot be applied.\n"); - mpi_errno = - MPIDI_POSIX_mpi_allreduce_release_gather(sendbuf, recvbuf, count, datatype, op, - comm, coll_attr); - break; - - case MPIR_CVAR_ALLREDUCE_POSIX_INTRA_ALGORITHM_mpir: - goto fallback; - - case MPIR_CVAR_ALLREDUCE_POSIX_INTRA_ALGORITHM_auto: - cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig); - if (cnt == NULL) - goto fallback; - - switch (cnt->id) { - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_allreduce_release_gather: - mpi_errno = - MPIDI_POSIX_mpi_allreduce_release_gather(sendbuf, recvbuf, count, datatype, - op, comm, coll_attr); - break; - - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_impl: - goto fallback; - - default: - MPIR_Assert(0); - } - break; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_ALLGATHER_POSIX_INTRA_ALGORITHM) { - case MPIR_CVAR_ALLGATHER_POSIX_INTRA_ALGORITHM_ipc_read: - mpi_errno = MPIDI_POSIX_mpi_allgather_gpu_ipc_read(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm, coll_attr); - break; - - case MPIR_CVAR_ALLGATHER_POSIX_INTRA_ALGORITHM_mpir: - goto fallback; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_ALLGATHERV_POSIX_INTRA_ALGORITHM) { - case MPIR_CVAR_ALLGATHERV_POSIX_INTRA_ALGORITHM_ipc_read: - mpi_errno = MPIDI_POSIX_mpi_allgatherv_gpu_ipc_read(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, - recvtype, comm, coll_attr); - break; - - case MPIR_CVAR_ALLGATHERV_POSIX_INTRA_ALGORITHM_mpir: - goto fallback; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_gather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_gatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; - -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_scatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatter_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_scatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_ALLTOALL_POSIX_INTRA_ALGORITHM) { - case MPIR_CVAR_ALLTOALL_POSIX_INTRA_ALGORITHM_ipc_read: - mpi_errno = MPIDI_POSIX_mpi_alltoall_gpu_ipc_read(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm, coll_attr); - break; - - case MPIR_CVAR_ALLTOALL_POSIX_INTRA_ALGORITHM_mpir: - goto fallback; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_reduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, int root, MPIR_Comm * comm, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__REDUCE, - .comm_ptr = comm, - .u.reduce.sendbuf = sendbuf, - .u.reduce.recvbuf = recvbuf, - .u.reduce.count = count, - .u.reduce.datatype = datatype, - .u.reduce.op = op, - .u.reduce.root = root, - }; - MPIDI_POSIX_csel_container_s *cnt; - - MPIR_FUNC_ENTER; - - switch (MPIR_CVAR_REDUCE_POSIX_INTRA_ALGORITHM) { - case MPIR_CVAR_REDUCE_POSIX_INTRA_ALGORITHM_release_gather: - MPII_COLLECTIVE_FALLBACK_CHECK(comm->rank, !MPIR_IS_THREADED && - MPIR_Op_is_commutative(op), mpi_errno, - "Reduce release_gather cannot be applied.\n"); - mpi_errno = - MPIDI_POSIX_mpi_reduce_release_gather(sendbuf, recvbuf, count, datatype, op, root, - comm, coll_attr); - break; - - case MPIR_CVAR_REDUCE_POSIX_INTRA_ALGORITHM_mpir: - goto fallback; - - case MPIR_CVAR_REDUCE_POSIX_INTRA_ALGORITHM_auto: - cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig); - if (cnt == NULL) - goto fallback; - - switch (cnt->id) { - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_reduce_release_gather: - mpi_errno = - MPIDI_POSIX_mpi_reduce_release_gather(sendbuf, recvbuf, count, datatype, op, - root, comm, coll_attr); - break; - - case MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_impl: - goto fallback; - - default: - MPIR_Assert(0); - } - break; - - default: - MPIR_Assert(0); - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, coll_attr); - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_reduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = - MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_reduce_scatter_block(const void *sendbuf, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_scan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_exscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_ERR_CHECK(mpi_errno); - - MPIR_FUNC_EXIT; - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_neighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = - MPIR_Neighbor_allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_neighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_neighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_neighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_neighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Neighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ineighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ineighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ineighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ineighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ineighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ineighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ibarrier(MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ibarrier_impl(comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ibcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ibcast_impl(buffer, count, datatype, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_iallgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_iallgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ialltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ialltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ialltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ialltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_iexscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iexscan_impl(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_igather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Igather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_igatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Igatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ireduce_scatter_block(const void *sendbuf, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = - MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ireduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_ireduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Ireduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_iallreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iallreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_iscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscan_impl(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_iscatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_iscatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - MPIR_Request ** request) -{ - int mpi_errno; - MPIR_FUNC_ENTER; - - mpi_errno = MPIR_Iscatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, request); - - MPIR_FUNC_EXIT; - return mpi_errno; -} - #endif /* POSIX_COLL_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/src/Makefile.mk b/src/mpid/ch4/shm/src/Makefile.mk index 77995e3d9ae..0445e225c75 100644 --- a/src/mpid/ch4/shm/src/Makefile.mk +++ b/src/mpid/ch4/shm/src/Makefile.mk @@ -7,14 +7,12 @@ AM_CPPFLAGS += -I$(top_srcdir)/src/mpid/ch4/shm/src noinst_HEADERS += src/mpid/ch4/shm/src/shm_impl.h \ src/mpid/ch4/shm/src/shm_am_fallback.h \ - src/mpid/ch4/shm/src/shm_am_fallback_coll.h \ src/mpid/ch4/shm/src/shm_am_fallback_probe.h \ src/mpid/ch4/shm/src/shm_am_fallback_recv.h \ src/mpid/ch4/shm/src/shm_am_fallback_rma.h \ src/mpid/ch4/shm/src/shm_am_fallback_send.h \ src/mpid/ch4/shm/src/shm_am_fallback_part.h \ src/mpid/ch4/shm/src/shm_am.h \ - src/mpid/ch4/shm/src/shm_coll.h \ src/mpid/ch4/shm/src/shm_hooks.h \ src/mpid/ch4/shm/src/shm_progress.h \ src/mpid/ch4/shm/src/shm_p2p.h \ diff --git a/src/mpid/ch4/shm/src/shm_am_fallback.h b/src/mpid/ch4/shm/src/shm_am_fallback.h index 83ceb6bd8fb..31a9d4e17cc 100644 --- a/src/mpid/ch4/shm/src/shm_am_fallback.h +++ b/src/mpid/ch4/shm/src/shm_am_fallback.h @@ -10,7 +10,6 @@ #include "shm_am_fallback_recv.h" #include "shm_am_fallback_probe.h" #include "shm_am_fallback_rma.h" -#include "shm_am_fallback_coll.h" #include "shm_am_fallback_part.h" #endif /* SHM_AM_FALLBACK_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/src/shm_am_fallback_coll.h b/src/mpid/ch4/shm/src/shm_am_fallback_coll.h deleted file mode 100644 index e6d0f5c6b86..00000000000 --- a/src/mpid/ch4/shm/src/shm_am_fallback_coll.h +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#ifndef SHM_AM_FALLBACK_COLL_H_INCLUDED -#define SHM_AM_FALLBACK_COLL_H_INCLUDED - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_barrier(MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Barrier_impl(comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_bcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Bcast_impl(buffer, count, datatype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_allreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - int coll_attr) -{ - return MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_gather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Gather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_gatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Gatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_scatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Scatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_scatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - int coll_attr) -{ - return MPIR_Alltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Alltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_reduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_reduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, - coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_reduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_scan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Scan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_exscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - return MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr) -{ - return MPIR_Neighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_allgather_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint displs[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_allgatherv_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_alltoall_impl(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_alltoallv(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_alltoallv_impl(sendbuf, sendcounts, sdispls, - sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_alltoallw(const void *sendbuf, - const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], - const MPI_Datatype sendtypes[], - void *recvbuf, - const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ineighbor_alltoallw_impl(sendbuf, sendcounts, sdispls, - sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ibarrier(MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ibarrier_impl(comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ibcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ibcast_impl(buffer, count, datatype, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iallgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Iallgather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iallgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Iallgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iallreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** request) -{ - return MPIR_Iallreduce_impl(sendbuf, recvbuf, count, datatype, op, comm, request); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ialltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ialltoall_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ialltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ialltoallv_impl(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ialltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Ialltoallw_impl(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iexscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Iexscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_igather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Igather_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_igatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Igatherv_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ireduce_scatter_block(const void *sendbuf, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ireduce_scatter_block_impl(sendbuf, recvbuf, recvcount, - datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ireduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ireduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ireduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - return MPIR_Ireduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - return MPIR_Iscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, req); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iscatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, - MPIR_Request ** request) -{ - return MPIR_Iscatter_impl(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, root, comm, request); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iscatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm, MPIR_Request ** request) -{ - return MPIR_Iscatterv_impl(sendbuf, sendcounts, displs, sendtype, - recvbuf, recvcount, recvtype, root, comm, request); -} - -#endif /* SHM_AM_FALLBACK_COLL_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/src/shm_coll.h b/src/mpid/ch4/shm/src/shm_coll.h deleted file mode 100644 index e1ba0315124..00000000000 --- a/src/mpid/ch4/shm/src/shm_coll.h +++ /dev/null @@ -1,755 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#ifndef SHM_COLL_H_INCLUDED -#define SHM_COLL_H_INCLUDED - -#include -#include "../posix/shm_inline.h" - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_barrier(MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_barrier(comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_bcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, MPIR_Comm * comm, - int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_bcast(buffer, count, datatype, root, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_allreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_allreduce(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_allgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_allgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_scatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_scatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_gather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_gatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, root, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_alltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm, - int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_alltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, - recvcounts, rdispls, recvtypes, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_reduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_reduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_reduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint * recvcounts, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_reduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, - comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_reduce_scatter_block(const void *sendbuf, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, - op, comm_ptr, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_scan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_scan(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_exscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, int coll_attr) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_exscan(sendbuf, recvbuf, count, datatype, op, comm, coll_attr); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_neighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_neighbor_allgatherv(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_neighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_alltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype * sendtypes, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype * recvtypes, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_neighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_neighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_allgather(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ineighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_allgatherv(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ineighbor_allgatherv(sendbuf, sendcount, sendtype, recvbuf, - recvcounts, displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_alltoall(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ineighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_alltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ineighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, - recvbuf, recvcounts, rdispls, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ineighbor_alltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype * sendtypes, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype * recvtypes, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, - recvbuf, recvcounts, rdispls, recvtypes, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ibarrier(MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ibarrier(comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ibcast(void *buffer, MPI_Aint count, - MPI_Datatype datatype, int root, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ibcast(buffer, count, datatype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iallgather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iallgatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iallreduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_iallreduce(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ialltoall(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ialltoallv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ialltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, - recvcounts, rdispls, recvtype, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ialltoallw(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, - const MPI_Datatype sendtypes[], void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * rdispls, - const MPI_Datatype recvtypes[], - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ialltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, - recvcounts, rdispls, recvtypes, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iexscan(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_iexscan(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_igather(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_igatherv(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ireduce_scatter_block(const void *sendbuf, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, - op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ireduce_scatter(const void *sendbuf, void *recvbuf, - const MPI_Aint * recvcounts, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_ireduce(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm_ptr, - MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_ireduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iscan(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, - MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_iscan(sendbuf, recvbuf, count, datatype, op, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iscatter(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - int root, MPIR_Comm * comm, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm, req); - - MPIR_FUNC_EXIT; - return ret; -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_SHM_mpi_iscatterv(const void *sendbuf, - const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, MPIR_Request ** req) -{ - int ret; - - MPIR_FUNC_ENTER; - - ret = MPIDI_POSIX_mpi_iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, - recvcount, recvtype, root, comm_ptr, req); - - MPIR_FUNC_EXIT; - return ret; -} - -#endif /* SHM_COLL_H_INCLUDED */ From 6aa4bb29d4dbd6ecdc39da35e4c31cad94580226 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 23 Aug 2025 21:05:58 -0500 Subject: [PATCH 07/47] ch4: remove device-layer json csel We will replace the device-algorithm selelction later at MPIR-layer. --- maint/json_gen.sh | 2 - src/mpid/ch4/include/mpidpre.h | 4 - src/mpid/ch4/shm/posix/posix_comm.c | 28 ------ src/mpid/ch4/shm/posix/posix_init.c | 77 --------------- src/mpid/ch4/shm/posix/posix_pre.h | 9 +- src/mpid/ch4/src/ch4_comm.c | 27 ------ src/mpid/ch4/src/ch4_init.c | 139 ---------------------------- src/mpid/ch4/src/ch4_types.h | 5 - 8 files changed, 1 insertion(+), 290 deletions(-) diff --git a/maint/json_gen.sh b/maint/json_gen.sh index 7413a32bbb6..5ed8dfc55f9 100755 --- a/maint/json_gen.sh +++ b/maint/json_gen.sh @@ -34,5 +34,3 @@ EOF # create specific json buffers create_json_buf maint/tuning/coll/mpir/generic.json MPII_coll_generic_json -create_json_buf maint/tuning/coll/ch4/generic.json MPIDI_coll_generic_json -create_json_buf maint/tuning/coll/ch4/posix_generic.json MPIDI_POSIX_coll_generic_json diff --git a/src/mpid/ch4/include/mpidpre.h b/src/mpid/ch4/include/mpidpre.h index 667751c705a..ba650cf63df 100644 --- a/src/mpid/ch4/include/mpidpre.h +++ b/src/mpid/ch4/include/mpidpre.h @@ -21,7 +21,6 @@ #include "shmpre.h" #endif #include "uthash.h" -#include "ch4_csel_container.h" #define MPID_TAG_DEV_BITS 0 #define MPID_MAX_BC_SIZE 4096 @@ -586,9 +585,6 @@ typedef struct MPIDI_Devcomm_t { struct MPIDI_Multileads_comp_info_t *alltoall_comp_info, *allgather_comp_info, *allreduce_comp_info; int shm_size_per_lead; - - void *csel_comm; /* collective selection handle */ - void *csel_comm_gpu; /* collective selection handle for gpu */ } ch4; } MPIDI_Devcomm_t; diff --git a/src/mpid/ch4/shm/posix/posix_comm.c b/src/mpid/ch4/shm/posix/posix_comm.c index 1e40b32f8ff..f6d9e1e4841 100644 --- a/src/mpid/ch4/shm/posix/posix_comm.c +++ b/src/mpid/ch4/shm/posix/posix_comm.c @@ -83,24 +83,6 @@ int MPIDI_POSIX_mpi_comm_commit_post_hook(MPIR_Comm * comm) } } - /* prune selection tree */ - if (MPIDI_global.shm.posix.csel_root) { - mpi_errno = MPIR_Csel_prune(MPIDI_global.shm.posix.csel_root, comm, - &MPIDI_POSIX_COMM(comm, csel_comm)); - MPIR_ERR_CHECK(mpi_errno); - } else { - MPIDI_POSIX_COMM(comm, csel_comm) = NULL; - } - - /* prune selection tree for gpu */ - if (MPIDI_global.shm.posix.csel_root_gpu) { - mpi_errno = MPIR_Csel_prune(MPIDI_global.shm.posix.csel_root_gpu, comm, - &MPIDI_POSIX_COMM(comm, csel_comm_gpu)); - MPIR_ERR_CHECK(mpi_errno); - } else { - MPIDI_POSIX_COMM(comm, csel_comm_gpu) = NULL; - } - fn_exit: MPIR_CHKLMEM_FREEALL(); MPIR_FUNC_EXIT; @@ -114,16 +96,6 @@ int MPIDI_POSIX_mpi_comm_free_hook(MPIR_Comm * comm) int mpi_errno = MPI_SUCCESS; MPIR_FUNC_ENTER; - if (MPIDI_POSIX_COMM(comm, csel_comm)) { - mpi_errno = MPIR_Csel_free(MPIDI_POSIX_COMM(comm, csel_comm)); - MPIR_ERR_CHECK(mpi_errno); - } - - if (MPIDI_POSIX_COMM(comm, csel_comm_gpu)) { - mpi_errno = MPIR_Csel_free(MPIDI_POSIX_COMM(comm, csel_comm_gpu)); - MPIR_ERR_CHECK(mpi_errno); - } - /* Release_gather primitives based collective algorithm works for Intra-comms only */ if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) { mpi_errno = MPIDI_POSIX_mpi_release_gather_comm_free(comm); diff --git a/src/mpid/ch4/shm/posix/posix_init.c b/src/mpid/ch4/shm/posix/posix_init.c index a991212b4f5..1cc95899e52 100644 --- a/src/mpid/ch4/shm/posix/posix_init.c +++ b/src/mpid/ch4/shm/posix/posix_init.c @@ -121,39 +121,6 @@ static int choose_posix_eager(void) goto fn_exit; } -static void *create_container(struct json_object *obj) -{ - MPIDI_POSIX_csel_container_s *cnt = - MPL_malloc(sizeof(MPIDI_POSIX_csel_container_s), MPL_MEM_COLL); - - json_object_object_foreach(obj, key, val) { - char *ckey = MPL_strdup_no_spaces(key); - - if (!strcmp(ckey, "algorithm=MPIDI_POSIX_mpi_bcast_release_gather")) - cnt->id = - MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_bcast_release_gather; - else if (!strcmp(ckey, "algorithm=MPIDI_POSIX_mpi_bcast_ipc_read")) - cnt->id = MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_bcast_ipc_read; - else if (!strcmp(ckey, "algorithm=MPIDI_POSIX_mpi_barrier_release_gather")) - cnt->id = - MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_barrier_release_gather; - else if (!strcmp(ckey, "algorithm=MPIDI_POSIX_mpi_allreduce_release_gather")) - cnt->id = - MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_allreduce_release_gather; - else if (!strcmp(ckey, "algorithm=MPIDI_POSIX_mpi_reduce_release_gather")) - cnt->id = - MPIDI_POSIX_CSEL_CONTAINER_TYPE__ALGORITHM__MPIDI_POSIX_mpi_reduce_release_gather; - else { - fprintf(stderr, "unrecognized key %s\n", key); - MPIR_Assert(0); - } - - MPL_free(ckey); - } - - return cnt; -} - int MPIDI_POSIX_init_vci(int vci) { int mpi_errno = MPI_SUCCESS; @@ -411,38 +378,8 @@ static int posix_coll_init(void) int mpi_errno = MPI_SUCCESS; MPIR_FUNC_ENTER; - /* Initialize collective selection */ - if (!strcmp(MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE, "")) { - mpi_errno = MPIR_Csel_create_from_buf(MPIDI_POSIX_coll_generic_json, - create_container, &MPIDI_global.shm.posix.csel_root); - MPIDI_global.shm.posix.csel_source = "MPIDI_POSIX_coll_generic_json"; - } else { - mpi_errno = MPIR_Csel_create_from_file(MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE, - create_container, &MPIDI_global.shm.posix.csel_root); - MPIDI_global.shm.posix.csel_source = MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE; - } - MPIR_ERR_CHECK(mpi_errno); - - /* Initialize collective selection for gpu */ - if (!strcmp(MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE_GPU, "")) { - mpi_errno = MPIR_Csel_create_from_buf(MPIDI_POSIX_coll_generic_json, - create_container, - &MPIDI_global.shm.posix.csel_root_gpu); - MPIDI_global.shm.posix.csel_source_gpu = "MPIDI_POSIX_coll_generic_json"; - } else { - mpi_errno = - MPIR_Csel_create_from_file(MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE_GPU, - create_container, &MPIDI_global.shm.posix.csel_root_gpu); - MPIDI_global.shm.posix.csel_source_gpu = - MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE_GPU; - } - MPIR_ERR_CHECK(mpi_errno); - - fn_exit: MPIR_FUNC_EXIT; return mpi_errno; - fn_fail: - goto fn_exit; } static int posix_coll_finalize(void) @@ -457,21 +394,7 @@ static int posix_coll_finalize(void) * avoid segmentation fault */ MPIDI_POSIX_shm_limit_counter = &MPIDI_POSIX_dummy_shm_limit_counter; - if (MPIDI_global.shm.posix.csel_root) { - mpi_errno = MPIR_Csel_free(MPIDI_global.shm.posix.csel_root); - MPIR_ERR_CHECK(mpi_errno); - } - - if (MPIDI_global.shm.posix.csel_root_gpu) { - mpi_errno = MPIR_Csel_free(MPIDI_global.shm.posix.csel_root_gpu); - MPIR_ERR_CHECK(mpi_errno); - } - - fn_exit: - MPIR_FUNC_EXIT; return mpi_errno; - fn_fail: - goto fn_exit; } void *MPIDI_POSIX_mpi_alloc_mem(MPI_Aint size, MPIR_Info * info_ptr) diff --git a/src/mpid/ch4/shm/posix/posix_pre.h b/src/mpid/ch4/shm/posix/posix_pre.h index 293bf2d5cca..0bbda3a783a 100644 --- a/src/mpid/ch4/shm/posix/posix_pre.h +++ b/src/mpid/ch4/shm/posix/posix_pre.h @@ -28,19 +28,12 @@ typedef enum { struct MPIR_Request; typedef struct { - void *csel_root; - const char *csel_source; - void *csel_root_gpu; - const char *csel_source_gpu; + /* empty */ } MPIDI_POSIX_Global_t; -extern char MPIDI_POSIX_coll_generic_json[]; - /* These structs are populated with dummy variables because empty structs are not supported in all * compilers: https://stackoverflow.com/a/755339/491687 */ typedef struct { - void *csel_comm; - void *csel_comm_gpu; MPIDI_POSIX_release_gather_comm_t release_gather, nb_release_gather; int nb_bcast_seq_no; /* Seq number of the release-gather based nonblocking bcast call */ int nb_reduce_seq_no; /* Seq number of the release-gather based nonblocking reduce call */ diff --git a/src/mpid/ch4/src/ch4_comm.c b/src/mpid/ch4/src/ch4_comm.c index 78994adb141..0d3a27e14ba 100644 --- a/src/mpid/ch4/src/ch4_comm.c +++ b/src/mpid/ch4/src/ch4_comm.c @@ -179,23 +179,6 @@ int MPID_Comm_commit_post_hook(MPIR_Comm * comm) MPIR_ERR_CHECK(mpi_errno); } - /* prune selection tree */ - if (MPIDI_global.csel_root) { - mpi_errno = MPIR_Csel_prune(MPIDI_global.csel_root, comm, &MPIDI_COMM(comm, csel_comm)); - MPIR_ERR_CHECK(mpi_errno); - } else { - MPIDI_COMM(comm, csel_comm) = NULL; - } - - /* prune selection tree for gpu */ - if (MPIDI_global.csel_root_gpu) { - mpi_errno = MPIR_Csel_prune(MPIDI_global.csel_root_gpu, comm, - &MPIDI_COMM(comm, csel_comm_gpu)); - MPIR_ERR_CHECK(mpi_errno); - } else { - MPIDI_COMM(comm, csel_comm_gpu) = NULL; - } - fn_exit: MPIR_FUNC_EXIT; return mpi_errno; @@ -263,16 +246,6 @@ int MPID_Comm_free_hook(MPIR_Comm * comm) MPIR_ERR_CHECK(mpi_errno); #endif - if (MPIDI_COMM(comm, csel_comm)) { - mpi_errno = MPIR_Csel_free(MPIDI_COMM(comm, csel_comm)); - MPIR_ERR_CHECK(mpi_errno); - } - - if (MPIDI_COMM(comm, csel_comm_gpu)) { - mpi_errno = MPIR_Csel_free(MPIDI_COMM(comm, csel_comm_gpu)); - MPIR_ERR_CHECK(mpi_errno); - } - mpi_errno = MPIDIG_destroy_comm(comm); MPIR_ERR_CHECK(mpi_errno); diff --git a/src/mpid/ch4/src/ch4_init.c b/src/mpid/ch4/src/ch4_init.c index ee82c745287..8bbaa736052 100644 --- a/src/mpid/ch4/src/ch4_init.c +++ b/src/mpid/ch4/src/ch4_init.c @@ -171,92 +171,6 @@ === END_MPI_T_CVAR_INFO_BLOCK === */ -static void *create_container(struct json_object *obj) -{ - MPIDI_Csel_container_s *cnt = MPL_malloc(sizeof(MPIDI_Csel_container_s), MPL_MEM_COLL); - - json_object_object_foreach(obj, key, val) { - char *ckey = MPL_strdup_no_spaces(key); - - if (!strcmp(ckey, "composition=MPIDI_Barrier_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Barrier_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Barrier_intra_composition_beta")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Barrier_intra_composition_beta; - else if (!strcmp(ckey, "composition=MPIDI_Bcast_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Bcast_intra_composition_beta")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_beta; - else if (!strcmp(ckey, "composition=MPIDI_Bcast_intra_composition_gamma")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_gamma; - else if (!strcmp(ckey, "composition=MPIDI_Bcast_intra_composition_delta")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Bcast_intra_composition_delta; - else if (!strcmp(ckey, "composition=MPIDI_Allreduce_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allreduce_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Allreduce_intra_composition_beta")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allreduce_intra_composition_beta; - else if (!strcmp(ckey, "composition=MPIDI_Allreduce_intra_composition_gamma")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allreduce_intra_composition_gamma; - else if (!strcmp(ckey, "composition=MPIDI_Reduce_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Reduce_intra_composition_beta")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_beta; - else if (!strcmp(ckey, "composition=MPIDI_Reduce_intra_composition_gamma")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_intra_composition_gamma; - else if (!strcmp(ckey, "composition=MPIDI_Alltoall_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Alltoall_intra_composition_beta")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoall_intra_composition_beta; - else if (!strcmp(ckey, "composition=MPIDI_Alltoallv_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoallv_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Alltoallw_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Alltoallw_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Allgather_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allgather_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Allgather_intra_composition_beta")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allgather_intra_composition_beta; - else if (!strcmp(ckey, "composition=MPIDI_Allgatherv_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Allgatherv_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Gather_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Gather_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Gatherv_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Gatherv_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Scatter_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scatter_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Scatterv_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scatterv_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Reduce_scatter_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_scatter_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Reduce_scatter_block_intra_composition_alpha")) - cnt->id = - MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Reduce_scatter_block_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Scan_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scan_intra_composition_alpha; - else if (!strcmp(ckey, "composition=MPIDI_Scan_intra_composition_beta")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Scan_intra_composition_beta; - else if (!strcmp(ckey, "composition=MPIDI_Exscan_intra_composition_alpha")) - cnt->id = MPIDI_CSEL_CONTAINER_TYPE__COMPOSITION__MPIDI_Exscan_intra_composition_alpha; - else { - fprintf(stderr, "unrecognized key %s\n", ckey); - MPIR_Assert(0); - } - - MPL_free(ckey); - } - - return (void *) cnt; -} - static int choose_netmod(void); static int choose_netmod(void) @@ -506,8 +420,6 @@ int MPID_Init(int requested, int *provided) MPIDIU_avt_init(); MPIDIU_map_create((void **) &(MPIDI_global.win_map), MPL_MEM_RMA); - MPIDI_global.csel_root = NULL; - MPIDI_global.csel_root_gpu = NULL; mpi_errno = MPIDI_vci_init(); MPIR_ERR_CHECK(mpi_errno); @@ -544,30 +456,6 @@ int MPID_Init(int requested, int *provided) MPIDIG_am_check_init(); - /* Initialize collective selection */ - if (!strcmp(MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE, "")) { - mpi_errno = MPIR_Csel_create_from_buf(MPIDI_coll_generic_json, - create_container, &MPIDI_global.csel_root); - MPIDI_global.csel_source = "MPIDI_coll_generic_json"; - } else { - mpi_errno = MPIR_Csel_create_from_file(MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE, - create_container, &MPIDI_global.csel_root); - MPIDI_global.csel_source = MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE; - } - MPIR_ERR_CHECK(mpi_errno); - - /* Initialize collective selection for gpu */ - if (!strcmp(MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE_GPU, "")) { - mpi_errno = MPIR_Csel_create_from_buf(MPIDI_coll_generic_json, - create_container, &MPIDI_global.csel_root_gpu); - MPIDI_global.csel_source_gpu = "MPIDI_coll_generic_json"; - } else { - mpi_errno = MPIR_Csel_create_from_file(MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE_GPU, - create_container, &MPIDI_global.csel_root_gpu); - MPIDI_global.csel_source_gpu = MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE_GPU; - } - MPIR_ERR_CHECK(mpi_errno); - /* Override split_type */ MPIDI_global.MPIR_Comm_fns_store.split_type = MPIDI_Comm_split_type; MPIR_Comm_fns = &MPIDI_global.MPIR_Comm_fns_store; @@ -595,23 +483,6 @@ int MPID_Init(int requested, int *provided) #endif fprintf(stdout, "==== Various sizes and limits ====\n"); fprintf(stdout, "sizeof(MPIDI_per_vci_t): %d\n", (int) sizeof(MPIDI_per_vci_t)); - printf("==== collective selection ====\n"); - MPIR_Assert(MPIR_Csel_source); - printf("MPIR: %s\n", MPIR_Csel_source); - MPIR_Assert(MPIDI_global.csel_source); - printf("MPID: %s\n", MPIDI_global.csel_source); -#ifndef MPIDI_CH4_DIRECT_NETMOD - MPIR_Assert(MPIDI_global.shm.posix.csel_source); - printf("MPID/shm: %s\n", MPIDI_global.shm.posix.csel_source); -#endif - if (MPIR_CVAR_ENABLE_GPU) { - MPIR_Assert(MPIDI_global.csel_source_gpu); - printf("MPID (GPU): %s\n", MPIDI_global.csel_source_gpu); -#ifndef MPIDI_CH4_DIRECT_NETMOD - MPIR_Assert(MPIDI_global.shm.posix.csel_source_gpu); - printf("MPID/shm (GPU): %s\n", MPIDI_global.shm.posix.csel_source_gpu); -#endif - } } fn_exit: @@ -699,16 +570,6 @@ int MPID_Finalize(void) MPIR_ERR_CHECK(mpi_errno); #endif - if (MPIDI_global.csel_root) { - mpi_errno = MPIR_Csel_free(MPIDI_global.csel_root); - MPIR_ERR_CHECK(mpi_errno); - } - - if (MPIDI_global.csel_root_gpu) { - mpi_errno = MPIR_Csel_free(MPIDI_global.csel_root_gpu); - MPIR_ERR_CHECK(mpi_errno); - } - MPIDIG_am_finalize(); MPIDU_genq_private_pool_destroy(MPIDI_global.gpu_coll_pool); diff --git a/src/mpid/ch4/src/ch4_types.h b/src/mpid/ch4/src/ch4_types.h index c84a93637d4..b4f3f5931f2 100644 --- a/src/mpid/ch4/src/ch4_types.h +++ b/src/mpid/ch4/src/ch4_types.h @@ -294,10 +294,6 @@ typedef struct MPIDI_CH4_Global_t { MPIDI_per_vci_t per_vci[MPIDI_CH4_MAX_VCIS]; MPIDI_CH4_configurations_t settings; - void *csel_root; - const char *csel_source; - void *csel_root_gpu; - const char *csel_source_gpu; #ifndef MPIDI_CH4_DIRECT_NETMOD MPIDI_SHM_Global_t shm; @@ -305,7 +301,6 @@ typedef struct MPIDI_CH4_Global_t { MPIDI_NM_Global_t nm; } MPIDI_CH4_Global_t; extern MPIDI_CH4_Global_t MPIDI_global; -extern char MPIDI_coll_generic_json[]; #ifdef MPL_USE_DBG_LOGGING extern MPL_dbg_class MPIDI_CH4_DBG_GENERAL; extern MPL_dbg_class MPIDI_CH4_DBG_MAP; From c90c338200d8d220b59aa8c08c684faf681626b7 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 20:13:15 -0500 Subject: [PATCH 08/47] ch4/shm: inclusion order for shm_am.h ipc_p2p.h references MPIDI_POSIX_am_eager_limit, which is defined in shm_am.h. Not sure how did it work before. --- src/mpid/ch4/shm/src/shm_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mpid/ch4/shm/src/shm_impl.h b/src/mpid/ch4/shm/src/shm_impl.h index dcb424154a7..df3adf9de55 100644 --- a/src/mpid/ch4/shm/src/shm_impl.h +++ b/src/mpid/ch4/shm/src/shm_impl.h @@ -12,17 +12,17 @@ #ifndef SHM_IMPL_H_INCLUDED #define SHM_IMPL_H_INCLUDED +#include "shm_am.h" + #ifdef MPIDI_ENABLE_AM_ONLY #include "shm_am_fallback.h" #else -#include "shm_coll.h" #include "shm_p2p.h" #include "shm_rma.h" #include "shm_part.h" #include "shm_hooks.h" #endif -#include "shm_am.h" #include "shm_progress.h" #include "shm_hooks_internal.h" From cdea774eb5c2556542176e88ccbf3b98498bbc2c Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 20:19:44 -0500 Subject: [PATCH 09/47] all: use fallback routines for internal collectives The fallback collectives (e.g. MPIR_Bcast_fallback) are manual "auto" functions that may not be the best algorithms for the system, but are sufficient for internal usages during init and object constricutions. --- src/binding/c/comm_api.txt | 4 +-- src/mpi/coll/algorithms/treealgo/treeutil.c | 6 ++-- src/mpi/coll/allgather/allgather_intra_smp.c | 17 +++++----- ...lgatherv_inter_remote_gather_local_bcast.c | 2 +- src/mpi/comm/comm_impl.c | 4 +-- src/mpi/comm/comm_split.c | 5 +-- src/mpi/comm/comm_split_type_nbhd.c | 9 +++--- src/mpi/comm/commutil.c | 3 +- src/mpi/comm/contextid.c | 14 ++++----- src/mpi/stream/stream_impl.c | 17 +++++----- src/mpi/threadcomm/threadcomm_impl.c | 4 +-- src/mpid/ch3/src/mpid_vc.c | 2 +- src/mpid/ch4/netmod/ofi/ofi_comm.c | 6 ++-- src/mpid/ch4/netmod/ofi/ofi_win.c | 11 ++++--- src/mpid/ch4/netmod/ucx/ucx_vci.c | 6 ++-- src/mpid/ch4/shm/ipc/src/ipc_fd.c | 17 +++++----- .../ch4/shm/posix/eager/iqueue/iqueue_init.c | 2 +- src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h | 7 +++-- src/mpid/ch4/shm/posix/posix_vci.c | 8 ++--- .../posix/release_gather/nb_release_gather.c | 13 ++++---- .../shm/posix/release_gather/release_gather.c | 13 ++++---- src/mpid/ch4/shm/src/topotree.c | 10 +++--- src/mpid/ch4/src/ch4_comm.c | 8 ++--- src/mpid/ch4/src/ch4_spawn.c | 4 +-- src/mpid/ch4/src/mpidig_win.c | 9 +++--- src/mpid/ch4/src/mpidig_win.h | 2 +- src/mpid/common/shm/mpidu_init_shm_alloc.c | 10 +++--- src/mpid/common/shm/mpidu_shm_alloc.c | 31 ++++++++++--------- src/util/mpir_nodemap.c | 10 +++--- 29 files changed, 134 insertions(+), 120 deletions(-) diff --git a/src/binding/c/comm_api.txt b/src/binding/c/comm_api.txt index 2ab5de90820..9c1aa06cd4c 100644 --- a/src/binding/c/comm_api.txt +++ b/src/binding/c/comm_api.txt @@ -300,8 +300,8 @@ MPI_Intercomm_merge: * The Intel test suite checks for this; it is also an easy * error to make */ acthigh = high ? 1 : 0; /* Clamp high into 1 or 0 */ - mpi_errno = MPIR_Allreduce(MPI_IN_PLACE, &acthigh, 1, MPIR_INT_INTERNAL, - MPI_SUM, intercomm_ptr->local_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, &acthigh, 1, MPIR_INT_INTERNAL, + MPI_SUM, intercomm_ptr->local_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* acthigh must either == 0 or the size of the local comm */ if (acthigh != 0 && acthigh != intercomm_ptr->local_size) { diff --git a/src/mpi/coll/algorithms/treealgo/treeutil.c b/src/mpi/coll/algorithms/treealgo/treeutil.c index 53ae9c925dd..ee164415275 100644 --- a/src/mpi/coll/algorithms/treealgo/treeutil.c +++ b/src/mpi/coll/algorithms/treealgo/treeutil.c @@ -758,9 +758,9 @@ int MPII_Treeutil_tree_topology_aware_k_init(MPIR_Comm * comm, int k, int root, } else { /* rank level - build a tree on the ranks */ /* Do an allgather to know the current num_children on each rank */ - mpi_errno = MPIR_Allgather_impl(&(ct->num_children), 1, MPIR_INT_INTERNAL, - num_childrens, 1, MPIR_INT_INTERNAL, comm, - MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(&(ct->num_children), 1, MPIR_INT_INTERNAL, + num_childrens, 1, MPIR_INT_INTERNAL, comm, + MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); int switch_leader = tree_ut_int_elt(&level->ranks, level->root_idx); diff --git a/src/mpi/coll/allgather/allgather_intra_smp.c b/src/mpi/coll/allgather/allgather_intra_smp.c index 6edb566b9e5..7af8c1c0186 100644 --- a/src/mpi/coll/allgather/allgather_intra_smp.c +++ b/src/mpi/coll/allgather/allgather_intra_smp.c @@ -29,8 +29,9 @@ int MPIR_Allgather_intra_smp_no_order(const void *sendbuf, MPI_Aint sendcount, int external_rank = comm_ptr->external_rank; if (local_size == comm_size || external_size == comm_size) { - mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, coll_attr); + mpi_errno = + MPIR_Allgather_fallback(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, + comm_ptr, coll_attr); goto fn_exit; } @@ -80,20 +81,20 @@ int MPIR_Allgather_intra_smp_no_order(const void *sendbuf, MPI_Aint sendcount, MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent); local_recvbuf = (char *) recvbuf + displs[external_rank] * recvtype_extent; } - mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype, - local_recvbuf, recvcount, recvtype, 0, node_comm, coll_attr); + mpi_errno = MPIR_Gather_fallback(sendbuf, sendcount, sendtype, + local_recvbuf, recvcount, recvtype, 0, node_comm, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* -- allgatherv over node roots -- */ if (local_rank == 0) { - mpi_errno = MPIR_Allgatherv_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, - recvbuf, counts, displs, recvtype, - node_roots_comm, coll_attr); + mpi_errno = MPIR_Allgatherv_fallback(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, + recvbuf, counts, displs, recvtype, + node_roots_comm, coll_attr); MPIR_ERR_CHECK(mpi_errno); } /* -- bcast over node -- */ - mpi_errno = MPIR_Bcast_impl(recvbuf, total_count, recvtype, 0, node_comm, coll_attr); + mpi_errno = MPIR_Bcast_fallback(recvbuf, total_count, recvtype, 0, node_comm, coll_attr); MPIR_ERR_CHECK(mpi_errno); fn_exit: diff --git a/src/mpi/coll/allgatherv/allgatherv_inter_remote_gather_local_bcast.c b/src/mpi/coll/allgatherv/allgatherv_inter_remote_gather_local_bcast.c index 16229a4df11..1a36720b38a 100644 --- a/src/mpi/coll/allgatherv/allgatherv_inter_remote_gather_local_bcast.c +++ b/src/mpi/coll/allgatherv/allgatherv_inter_remote_gather_local_bcast.c @@ -71,7 +71,7 @@ int MPIR_Allgatherv_inter_remote_gather_local_bcast(const void *sendbuf, MPI_Ain mpi_errno = MPIR_Type_commit_impl(&newtype); MPIR_ERR_CHECK(mpi_errno); - mpi_errno = MPIR_Bcast_allcomm_auto(recvbuf, 1, newtype, 0, newcomm_ptr, coll_attr); + mpi_errno = MPIR_Bcast_fallback(recvbuf, 1, newtype, 0, newcomm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); MPIR_Type_free_impl(&newtype); diff --git a/src/mpi/comm/comm_impl.c b/src/mpi/comm/comm_impl.c index 6547317cf75..a94518b65e3 100644 --- a/src/mpi/comm/comm_impl.c +++ b/src/mpi/comm/comm_impl.c @@ -690,8 +690,8 @@ int MPIR_Intercomm_create_from_groups_impl(MPIR_Group * local_group_ptr, int loc /* synchronize mpi_errno */ int tmp_err = mpi_errno; - mpi_errno = MPIR_Bcast_impl(&tmp_err, 1, MPIR_INT_INTERNAL, local_leader, local_comm, - MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(&tmp_err, 1, MPIR_INT_INTERNAL, local_leader, local_comm, + MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); mpi_errno = tmp_err; MPIR_ERR_CHECK(mpi_errno); diff --git a/src/mpi/comm/comm_split.c b/src/mpi/comm/comm_split.c index 7f941e43ff5..33f3c9f229b 100644 --- a/src/mpi/comm/comm_split.c +++ b/src/mpi/comm/comm_split.c @@ -111,8 +111,9 @@ int MPIR_Comm_split_impl(MPIR_Comm * comm_ptr, int color, int key, MPIR_Comm ** local_comm_ptr = comm_ptr; } /* Gather information on the local group of processes */ - mpi_errno = MPIR_Allgather(MPI_IN_PLACE, 2, MPIR_INT_INTERNAL, table, 2, MPIR_INT_INTERNAL, - local_comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allgather_fallback(MPI_IN_PLACE, 2, MPIR_INT_INTERNAL, table, 2, MPIR_INT_INTERNAL, + local_comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* Step 2: How many processes have our same color? */ diff --git a/src/mpi/comm/comm_split_type_nbhd.c b/src/mpi/comm/comm_split_type_nbhd.c index 4a9375245d4..a8d2efdfe9a 100644 --- a/src/mpi/comm/comm_split_type_nbhd.c +++ b/src/mpi/comm/comm_split_type_nbhd.c @@ -275,8 +275,9 @@ static int network_split_by_minsize(MPIR_Comm * comm_ptr, int key, int subcomm_m } MPIR_Assert(num_processes_at_node != NULL); /* Send the count to processes */ - mpi_errno = MPIR_Allreduce(MPI_IN_PLACE, num_processes_at_node, num_nodes, - MPIR_INT_INTERNAL, MPI_SUM, comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, num_processes_at_node, num_nodes, + MPIR_INT_INTERNAL, MPI_SUM, comm_ptr, + MPIR_COLL_ATTR_SYNC); if (topo_type == MPIR_NETTOPO_TYPE__FAT_TREE || topo_type == MPIR_NETTOPO_TYPE__CLOS_NETWORK) { @@ -375,8 +376,8 @@ static int network_split_by_minsize(MPIR_Comm * comm_ptr, int key, int subcomm_m tree_depth = MPIR_hwtopo_get_depth(obj_containing_cpuset); /* get min tree depth to all processes */ - MPIR_Allreduce(&tree_depth, &min_tree_depth, 1, MPIR_INT_INTERNAL, MPI_MIN, node_comm, - MPIR_COLL_ATTR_SYNC); + MPIR_Allreduce_fallback(&tree_depth, &min_tree_depth, 1, MPIR_INT_INTERNAL, MPI_MIN, + node_comm, MPIR_COLL_ATTR_SYNC); if (min_tree_depth) { int num_hwloc_objs_at_depth; diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c index da397e4007f..b763e02d98e 100644 --- a/src/mpi/comm/commutil.c +++ b/src/mpi/comm/commutil.c @@ -781,8 +781,7 @@ static int init_comm_seq(MPIR_Comm * comm) /* Every rank need share the same seq from root. NOTE: it is possible for * different communicators to have the same seq. It is only used as an * opportunistic optimization */ - mpi_errno = MPIR_Bcast_allcomm_auto(&tmp, 1, MPIR_INT_INTERNAL, 0, comm, - MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(&tmp, 1, MPIR_INT_INTERNAL, 0, comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); comm->seq = tmp; diff --git a/src/mpi/comm/contextid.c b/src/mpi/comm/contextid.c index 66dd26e7678..eda23239ffa 100644 --- a/src/mpi/comm/contextid.c +++ b/src/mpi/comm/contextid.c @@ -455,9 +455,9 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr MPIR_INT_INTERNAL, MPI_BAND, comm_ptr, group_ptr, coll_tag, MPIR_COLL_ATTR_SYNC); } else { - mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, - MPIR_INT_INTERNAL, MPI_BAND, comm_ptr, - MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allreduce_fallback(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, + MPIR_INT_INTERNAL, MPI_BAND, comm_ptr, MPIR_COLL_ATTR_SYNC); } MPIR_ERR_CHECK(mpi_errno); @@ -555,8 +555,8 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr MPI_MIN, comm_ptr, group_ptr, coll_tag, MPIR_COLL_ATTR_SYNC); } else { - mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, &minfree, 1, MPIR_INT_INTERNAL, - MPI_MIN, comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, &minfree, 1, MPIR_INT_INTERNAL, + MPI_MIN, comm_ptr, MPIR_COLL_ATTR_SYNC); } if (minfree > 0) { @@ -1053,8 +1053,8 @@ int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, int *context_id, int *rec /* Make sure that all of the local processes now have this * id */ - mpi_errno = MPIR_Bcast_impl(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, - 0, comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, + 0, comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* The recvcontext_id must be the one that was allocated out of the local * group, not the remote group. Otherwise we could end up posting two diff --git a/src/mpi/stream/stream_impl.c b/src/mpi/stream/stream_impl.c index f15ddf8bddb..f1910bed451 100644 --- a/src/mpi/stream/stream_impl.c +++ b/src/mpi/stream/stream_impl.c @@ -268,8 +268,9 @@ int MPIR_Stream_comm_create_impl(MPIR_Comm * comm_ptr, MPIR_Stream * stream_ptr, vci_table = MPL_malloc(comm_ptr->local_size * sizeof(int), MPL_MEM_OTHER); MPIR_ERR_CHKANDJUMP(!vci_table, mpi_errno, MPI_ERR_OTHER, "**nomem"); - mpi_errno = MPIR_Allgather_impl(&vci, 1, MPIR_INT_INTERNAL, - vci_table, 1, MPIR_INT_INTERNAL, comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(&vci, 1, MPIR_INT_INTERNAL, + vci_table, 1, MPIR_INT_INTERNAL, comm_ptr, + MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); (*newcomm_ptr)->stream_comm_type = MPIR_STREAM_COMM_SINGLE; @@ -312,9 +313,9 @@ int MPIR_Stream_comm_create_multiplex_impl(MPIR_Comm * comm_ptr, MPIR_ERR_CHKANDJUMP(!displs, mpi_errno, MPI_ERR_OTHER, "**nomem"); MPI_Aint num_tmp = num_streams; - mpi_errno = MPIR_Allgather_impl(&num_tmp, 1, MPIR_AINT_INTERNAL, - num_table, 1, MPIR_AINT_INTERNAL, comm_ptr, - MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(&num_tmp, 1, MPIR_AINT_INTERNAL, + num_table, 1, MPIR_AINT_INTERNAL, comm_ptr, + MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); MPI_Aint num_total = 0; @@ -346,9 +347,9 @@ int MPIR_Stream_comm_create_multiplex_impl(MPIR_Comm * comm_ptr, local_vcis[i] = stream_ptr ? stream_ptr->vci : 0; } - mpi_errno = MPIR_Allgatherv_impl(local_vcis, num_streams, MPI_INT, - vci_table, num_table, displs, MPI_INT, comm_ptr, - MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgatherv_fallback(local_vcis, num_streams, MPI_INT, + vci_table, num_table, displs, MPI_INT, comm_ptr, + MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); (*newcomm_ptr)->stream_comm_type = MPIR_STREAM_COMM_MULTIPLEX; diff --git a/src/mpi/threadcomm/threadcomm_impl.c b/src/mpi/threadcomm/threadcomm_impl.c index 025c4bb89b2..215d8c932c9 100644 --- a/src/mpi/threadcomm/threadcomm_impl.c +++ b/src/mpi/threadcomm/threadcomm_impl.c @@ -34,8 +34,8 @@ int MPIR_Threadcomm_init_impl(MPIR_Comm * comm, int num_threads, MPIR_Comm ** co threads_table = MPL_malloc(comm_size * sizeof(int), MPL_MEM_OTHER); MPIR_ERR_CHKANDJUMP(!threads_table, mpi_errno, MPI_ERR_OTHER, "**nomem"); - mpi_errno = MPIR_Allgather_impl(&num_threads, 1, MPIR_INT_INTERNAL, threads_table, 1, - MPIR_INT_INTERNAL, comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(&num_threads, 1, MPIR_INT_INTERNAL, threads_table, 1, + MPIR_INT_INTERNAL, comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); int *rank_offset_table;; diff --git a/src/mpid/ch3/src/mpid_vc.c b/src/mpid/ch3/src/mpid_vc.c index 1e62b5b3a94..9da1a4be1eb 100644 --- a/src/mpid/ch3/src/mpid_vc.c +++ b/src/mpid/ch3/src/mpid_vc.c @@ -637,7 +637,7 @@ int MPIDI_PG_ForwardPGInfo( MPIR_Comm *peer_ptr, MPIR_Comm *comm_ptr, } /* See if everyone is happy */ - mpi_errno = MPIR_Allreduce( MPI_IN_PLACE, &allfound, 1, MPIR_INT_INTERNAL, MPI_LAND, comm_ptr, 0 ); + mpi_errno = MPIR_Allreduce_fallback( MPI_IN_PLACE, &allfound, 1, MPIR_INT_INTERNAL, MPI_LAND, comm_ptr, 0 ); MPIR_ERR_CHECK(mpi_errno); if (allfound) return MPI_SUCCESS; diff --git a/src/mpid/ch4/netmod/ofi/ofi_comm.c b/src/mpid/ch4/netmod/ofi/ofi_comm.c index 619ee3f3c2a..b72772ee291 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_comm.c +++ b/src/mpid/ch4/netmod/ofi/ofi_comm.c @@ -105,9 +105,9 @@ static int update_nic_preferences(MPIR_Comm * comm) /* Collect the NIC IDs set for the other ranks. We always expect to receive a single * NIC id from each rank, i.e., one MPI_INT. */ - mpi_errno = MPIR_Allgather_allcomm_auto(MPI_IN_PLACE, 0, MPIR_INT_INTERNAL, - pref_nic_copy, 1, MPIR_INT_INTERNAL, comm, - MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(MPI_IN_PLACE, 0, MPIR_INT_INTERNAL, + pref_nic_copy, 1, MPIR_INT_INTERNAL, comm, + MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (MPIDI_OFI_COMM(comm).pref_nic == NULL) { diff --git a/src/mpid/ch4/netmod/ofi/ofi_win.c b/src/mpid/ch4/netmod/ofi/ofi_win.c index 9ac46837561..a738ce5ab2c 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_win.c +++ b/src/mpid/ch4/netmod/ofi/ofi_win.c @@ -132,8 +132,9 @@ static int win_allgather(MPIR_Win * win, void *base, int disp_unit) * Attempting to create an optimized memory region key. Gets the next MR key that's * available to the processes involved in the RMA window. Use the current maximum + 1 * to ensure that the key is available for all processes. */ - mpi_errno = MPIR_Allreduce(&MPIDI_OFI_global.global_max_optimized_mr_key, &local_key, 1, - MPI_UNSIGNED, MPI_MAX, comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allreduce_fallback(&MPIDI_OFI_global.global_max_optimized_mr_key, &local_key, + 1, MPI_UNSIGNED, MPI_MAX, comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (local_key + 1 < MPIDI_OFI_NUM_OPTIMIZED_MEMORY_REGIONS) { @@ -216,7 +217,8 @@ static int win_allgather(MPIR_Win * win, void *base, int disp_unit) } /* Check if any process fails to register. If so, release local MR and force AM path. */ - MPIR_Allreduce(&rc, &allrc, 1, MPIR_INT_INTERNAL, MPI_MIN, comm_ptr, MPIR_COLL_ATTR_SYNC); + MPIR_Allreduce_fallback(&rc, &allrc, 1, MPIR_INT_INTERNAL, MPI_MIN, comm_ptr, + MPIR_COLL_ATTR_SYNC); if (allrc < 0) { if (rc >= 0 && MPIDI_OFI_WIN(win).mr) MPIDI_OFI_CALL(fi_close(&MPIDI_OFI_WIN(win).mr->fid), fi_close); @@ -963,7 +965,8 @@ int MPIDI_OFI_mpi_win_attach_hook(MPIR_Win * win, void *base, MPI_Aint size) } /* Check if any process fails to register. If so, release local MR and force AM path. */ - MPIR_Allreduce(&rc, &allrc, 1, MPIR_INT_INTERNAL, MPI_MIN, comm_ptr, MPIR_COLL_ATTR_SYNC); + MPIR_Allreduce_fallback(&rc, &allrc, 1, MPIR_INT_INTERNAL, MPI_MIN, comm_ptr, + MPIR_COLL_ATTR_SYNC); if (allrc < 0) { if (rc >= 0) MPIDI_OFI_CALL(fi_close(&mr->fid), fi_close); diff --git a/src/mpid/ch4/netmod/ucx/ucx_vci.c b/src/mpid/ch4/netmod/ucx/ucx_vci.c index c58a5ace8c0..4c27c5d9fc4 100644 --- a/src/mpid/ch4/netmod/ucx/ucx_vci.c +++ b/src/mpid/ch4/netmod/ucx/ucx_vci.c @@ -75,9 +75,9 @@ static int all_vcis_address_exchange(MPIR_Comm * comm) MPIDI_UCX_global.ctx[i].addrname_len); } /* Allgather */ - mpi_errno = MPIR_Allgather_allcomm_auto(MPI_IN_PLACE, 0, MPIR_BYTE_INTERNAL, - all_names, my_len, MPIR_BYTE_INTERNAL, - comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(MPI_IN_PLACE, 0, MPIR_BYTE_INTERNAL, + all_names, my_len, MPIR_BYTE_INTERNAL, + comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* insert the addresses */ diff --git a/src/mpid/ch4/shm/ipc/src/ipc_fd.c b/src/mpid/ch4/shm/ipc/src/ipc_fd.c index a8713abf130..6101fce2128 100644 --- a/src/mpid/ch4/shm/ipc/src/ipc_fd.c +++ b/src/mpid/ch4/shm/ipc/src/ipc_fd.c @@ -46,8 +46,9 @@ int MPIDI_FD_comm_bootstrap(MPIR_Comm * comm) } int already_initialized; - mpi_errno = MPIR_Allreduce_impl(&ipc_fd_initialized, &already_initialized, 1, MPIR_INT_INTERNAL, - MPI_MAX, node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allreduce_fallback(&ipc_fd_initialized, &already_initialized, 1, MPIR_INT_INTERNAL, + MPI_MAX, node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (already_initialized) { @@ -141,7 +142,7 @@ static int MPIDI_IPC_mpi_socks_init(MPIR_Comm * node_comm, pid_t * all_pids, int MPIR_STRERROR_BUF_SIZE), errno); } - mpi_errno = MPIR_Barrier_impl(node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* create clients for higher ranks */ @@ -224,9 +225,9 @@ static int MPIDI_IPC_mpi_fd_init(MPIR_Comm * node_comm) MPIR_CHKLMEM_MALLOC(all_pids, local_size * sizeof(pid_t)); all_pids[local_rank] = getpid(); - mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, sizeof(pid_t), MPIR_BYTE_INTERNAL, - all_pids, sizeof(pid_t), MPIR_BYTE_INTERNAL, - node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(MPI_IN_PLACE, sizeof(pid_t), MPIR_BYTE_INTERNAL, + all_pids, sizeof(pid_t), MPIR_BYTE_INTERNAL, + node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* Initialize fd_socks */ @@ -252,10 +253,10 @@ static int MPIDI_IPC_mpi_fd_init(MPIR_Comm * node_comm) if (node_comm->rank == 0) { mpi_errno = MPIDI_IPC_mpi_fd_cleanup(local_size, local_rank, all_pids, fd_socks); MPIR_ERR_CHECK(mpi_errno); - mpi_errno = MPIR_Barrier_impl(node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); } else { - mpi_errno = MPIR_Barrier_impl(node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); mpi_errno = MPIDI_IPC_mpi_fd_cleanup(local_size, local_rank, all_pids, fd_socks); MPIR_ERR_CHECK(mpi_errno); diff --git a/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_init.c b/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_init.c index 9a4a1052b93..4c658d5d916 100644 --- a/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_init.c +++ b/src/mpid/ch4/shm/posix/eager/iqueue/iqueue_init.c @@ -161,7 +161,7 @@ int MPIDI_POSIX_iqueue_set_vcis(void *slab, MPIR_Comm * comm, int max_vcis) MPIR_Comm *node_comm = MPIR_Comm_get_node_comm(comm); MPIR_Assert(node_comm); - mpi_errno = MPIR_Barrier_impl(node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); fn_exit: diff --git a/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h b/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h index 296588d346e..4343057ae30 100644 --- a/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h +++ b/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h @@ -99,10 +99,11 @@ static int allgather_ipc_handles(const void *buf, MPI_Aint count, MPI_Datatype d } /* allgather is needed to exchange all the IPC handles */ + /* FIXME: call MPIR_Coll_auto */ mpi_errno = - MPIR_Allgather_impl(&my_ipc_handle, sizeof(MPIDI_IPCI_ipc_handle_t), MPIR_BYTE_INTERNAL, - ipc_handles, sizeof(MPIDI_IPCI_ipc_handle_t), MPIR_BYTE_INTERNAL, comm, - MPIR_COLL_ATTR_SYNC); + MPIR_Allgather_fallback(&my_ipc_handle, sizeof(MPIDI_IPCI_ipc_handle_t), MPIR_BYTE_INTERNAL, + ipc_handles, sizeof(MPIDI_IPCI_ipc_handle_t), MPIR_BYTE_INTERNAL, + comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* check the ipc_handles to make sure all the buffers are on GPU */ diff --git a/src/mpid/ch4/shm/posix/posix_vci.c b/src/mpid/ch4/shm/posix/posix_vci.c index 37ee06cfeee..d329342ec4f 100644 --- a/src/mpid/ch4/shm/posix/posix_vci.c +++ b/src/mpid/ch4/shm/posix/posix_vci.c @@ -17,8 +17,8 @@ int MPIDI_POSIX_comm_set_vcis(MPIR_Comm * comm, int num_vcis) MPIR_Assert(node_comm); int max_vcis; - mpi_errno = MPIR_Allreduce_impl(&num_vcis, &max_vcis, 1, MPIR_INT_INTERNAL, MPI_MAX, - node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allreduce_fallback(&num_vcis, &max_vcis, 1, MPIR_INT_INTERNAL, MPI_MAX, + node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (max_vcis > 1) { @@ -38,7 +38,7 @@ int MPIDI_POSIX_comm_set_vcis(MPIR_Comm * comm, int num_vcis) slab = MPL_initshm_open(MPIDI_POSIX_global.shm_vci_name, slab_size, NULL); MPIR_ERR_CHKANDJUMP(!slab, mpi_errno, MPI_ERR_OTHER, "**nomem"); - mpi_errno = MPIR_Barrier_impl(node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); #else mpi_errno = MPIDU_Init_shm_comm_alloc(comm, slab_size, (void *) &slab); @@ -49,7 +49,7 @@ int MPIDI_POSIX_comm_set_vcis(MPIR_Comm * comm, int num_vcis) mpi_errno = MPIDI_POSIX_eager_set_vcis(MPIDI_POSIX_global.shm_vci_slab, comm, max_vcis); MPIR_ERR_CHECK(mpi_errno); - mpi_errno = MPIR_Barrier_impl(node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); } diff --git a/src/mpid/ch4/shm/posix/release_gather/nb_release_gather.c b/src/mpid/ch4/shm/posix/release_gather/nb_release_gather.c index b26f9e4ce2a..62c4e4d8d81 100644 --- a/src/mpid/ch4/shm/posix/release_gather/nb_release_gather.c +++ b/src/mpid/ch4/shm/posix/release_gather/nb_release_gather.c @@ -121,18 +121,19 @@ int MPIDI_POSIX_nb_release_gather_comm_init(MPIR_Comm * comm_ptr, to other algorithms.\n"); } fallback = 1; - MPIR_Bcast_impl(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); + MPIR_Bcast_fallback(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_NO_MEM, "**nomem"); } else { /* More shm can be created, update the shared counter */ MPL_atomic_fetch_add_uint64(MPIDI_POSIX_shm_limit_counter, memory_to_be_allocated); fallback = 0; mpi_errno = - MPIR_Bcast_impl(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); + MPIR_Bcast_fallback(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } } else { - mpi_errno = MPIR_Bcast_impl(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); + mpi_errno = + MPIR_Bcast_fallback(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); if (fallback) { MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_NO_MEM, "**nomem"); @@ -168,8 +169,8 @@ int MPIDI_POSIX_nb_release_gather_comm_init(MPIR_Comm * comm_ptr, topotree_fail[0] = -1; topotree_fail[1] = -1; } - mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, topotree_fail, 2, MPIR_INT_INTERNAL, - MPI_MAX, comm_ptr, coll_attr); + mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, topotree_fail, 2, MPIR_INT_INTERNAL, + MPI_MAX, comm_ptr, coll_attr); } else { topotree_fail[0] = -1; topotree_fail[1] = -1; @@ -267,7 +268,7 @@ int MPIDI_POSIX_nb_release_gather_comm_init(MPIR_Comm * comm_ptr, if (initialize_ibcast_buf || initialize_ireduce_buf) { /* Make sure all the flags are set before ranks start reading each other's flags from shm */ - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } diff --git a/src/mpid/ch4/shm/posix/release_gather/release_gather.c b/src/mpid/ch4/shm/posix/release_gather/release_gather.c index ce79beac81b..15c12b80237 100644 --- a/src/mpid/ch4/shm/posix/release_gather/release_gather.c +++ b/src/mpid/ch4/shm/posix/release_gather/release_gather.c @@ -303,18 +303,19 @@ int MPIDI_POSIX_mpi_release_gather_comm_init(MPIR_Comm * comm_ptr, } fallback = 1; - MPIR_Bcast_impl(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); + MPIR_Bcast_fallback(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_NO_MEM, "**nomem"); } else { /* More shm can be created, update the shared counter */ MPL_atomic_fetch_add_uint64(MPIDI_POSIX_shm_limit_counter, memory_to_be_allocated); fallback = 0; mpi_errno = - MPIR_Bcast_impl(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); + MPIR_Bcast_fallback(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } } else { - mpi_errno = MPIR_Bcast_impl(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); + mpi_errno = + MPIR_Bcast_fallback(&fallback, 1, MPIR_INT_INTERNAL, 0, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); if (fallback) { MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_NO_MEM, "**nomem"); @@ -359,8 +360,8 @@ int MPIDI_POSIX_mpi_release_gather_comm_init(MPIR_Comm * comm_ptr, topotree_fail[0] = -1; topotree_fail[1] = -1; } - mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, topotree_fail, 2, MPIR_INT_INTERNAL, - MPI_MAX, comm_ptr, coll_attr); + mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, topotree_fail, 2, MPIR_INT_INTERNAL, + MPI_MAX, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } else { topotree_fail[0] = -1; @@ -424,7 +425,7 @@ int MPIDI_POSIX_mpi_release_gather_comm_init(MPIR_Comm * comm_ptr, release_gather_info_ptr->release_state); /* Make sure all the flags are set before ranks start reading each other's flags from shm */ - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } diff --git a/src/mpid/ch4/shm/src/topotree.c b/src/mpid/ch4/shm/src/topotree.c index 2569617c042..debdc65f6fb 100644 --- a/src/mpid/ch4/shm/src/topotree.c +++ b/src/mpid/ch4/shm/src/topotree.c @@ -499,7 +499,7 @@ int MPIDI_SHM_topology_tree_init(MPIR_Comm * comm_ptr, int root, int bcast_k, in shared_region_ptr[rank][depth++] = MPIR_hwtopo_get_lid(gid); gid = MPIR_hwtopo_get_ancestor(gid, topo_depth - depth - 1); } - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* STEP 3. Root has all the bind_map information, now build tree */ @@ -557,7 +557,7 @@ int MPIDI_SHM_topology_tree_init(MPIR_Comm * comm_ptr, int root, int bcast_k, in 0 /*left_skewed */ , bcast_tree_type); MPIR_ERR_CHECK(mpi_errno); } - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* Every rank copies their tree out from shared memory */ @@ -566,7 +566,7 @@ int MPIDI_SHM_topology_tree_init(MPIR_Comm * comm_ptr, int root, int bcast_k, in MPIDI_SHM_print_topotree_file("BCAST", comm_ptr->context_id, rank, bcast_tree); /* Wait until shared memory is available */ - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* Generate the reduce tree */ /* For Reduce, package leaders are added after the package local ranks, and the per_package @@ -580,7 +580,7 @@ int MPIDI_SHM_topology_tree_init(MPIR_Comm * comm_ptr, int root, int bcast_k, in MPIR_ERR_CHECK(mpi_errno); } - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* each rank copy the reduce tree out */ @@ -589,7 +589,7 @@ int MPIDI_SHM_topology_tree_init(MPIR_Comm * comm_ptr, int root, int bcast_k, in if (MPIDI_SHM_TOPOTREE_DEBUG) MPIDI_SHM_print_topotree_file("REDUCE", comm_ptr->context_id, rank, reduce_tree); /* Wait for all ranks to copy out the tree */ - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* Cleanup */ if (rank == root) { diff --git a/src/mpid/ch4/src/ch4_comm.c b/src/mpid/ch4/src/ch4_comm.c index 0d3a27e14ba..ea20ca96424 100644 --- a/src/mpid/ch4/src/ch4_comm.c +++ b/src/mpid/ch4/src/ch4_comm.c @@ -320,8 +320,8 @@ int MPID_Intercomm_exchange(MPIR_Comm * local_comm, int local_leader, bcast_data.mpi_errno = mpi_errno; bcast_data.remote_data_size = remote_data_size; } - mpi_errno = MPIR_Bcast_impl(&bcast_data, 2, MPIR_INT_INTERNAL, - local_leader, local_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(&bcast_data, 2, MPIR_INT_INTERNAL, + local_leader, local_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* error checking of previous leader exchange */ @@ -339,8 +339,8 @@ int MPID_Intercomm_exchange(MPIR_Comm * local_comm, int local_leader, MPIR_ERR_CHKANDJUMP(!remote_data, mpi_errno, MPI_ERR_OTHER, "**nomem"); } - mpi_errno = MPIR_Bcast_impl(remote_data, remote_data_size, MPIR_BYTE_INTERNAL, - local_leader, local_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(remote_data, remote_data_size, MPIR_BYTE_INTERNAL, + local_leader, local_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* Stage 3: Each process extract data (if necessary: add worlds, convert lpids) */ diff --git a/src/mpid/ch4/src/ch4_spawn.c b/src/mpid/ch4/src/ch4_spawn.c index 489815720b8..06a5dee05dd 100644 --- a/src/mpid/ch4/src/ch4_spawn.c +++ b/src/mpid/ch4/src/ch4_spawn.c @@ -378,10 +378,10 @@ static int dynamic_intercomm_create(const char *port_name, MPIR_Info * info, int fn_fail: /* In case root fails, we bcast mpi_errno so other ranks will abort too */ - MPIR_Bcast_impl(&mpi_errno, 1, MPIR_INT_INTERNAL, root, comm_ptr, MPIR_COLL_ATTR_SYNC); + MPIR_Bcast_fallback(&mpi_errno, 1, MPIR_INT_INTERNAL, root, comm_ptr, MPIR_COLL_ATTR_SYNC); } else { int root_errno; - MPIR_Bcast_impl(&root_errno, 1, MPIR_INT_INTERNAL, root, comm_ptr, MPIR_COLL_ATTR_SYNC); + MPIR_Bcast_fallback(&root_errno, 1, MPIR_INT_INTERNAL, root, comm_ptr, MPIR_COLL_ATTR_SYNC); if (root_errno) { MPIR_ERR_SET(mpi_errno, MPI_ERR_PORT, "**comm_connect_fail"); } diff --git a/src/mpid/ch4/src/mpidig_win.c b/src/mpid/ch4/src/mpidig_win.c index 923602e46c1..aa63a49ceb6 100644 --- a/src/mpid/ch4/src/mpidig_win.c +++ b/src/mpid/ch4/src/mpidig_win.c @@ -394,8 +394,8 @@ static int win_init(MPI_Aint length, int disp_unit, MPIR_Win ** win_ptr, MPIR_In if (!comm_ptr->node_comm) no_local = true; - mpi_errno = MPIR_Allreduce(&no_local, &all_no_local, 1, MPIR_C_BOOL_INTERNAL, - MPI_LAND, comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allreduce_fallback(&no_local, &all_no_local, 1, MPIR_C_BOOL_INTERNAL, + MPI_LAND, comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (all_no_local) MPIDI_WIN(win, winattr) |= MPIDI_WINATTR_ACCU_NO_SHM; @@ -561,8 +561,9 @@ static int win_shm_alloc_impl(MPI_Aint size, int disp_unit, MPIR_Comm * comm_ptr * thus all process can be assigned to a page aligned start address. * - user sets alloc_shared_noncontig=true, thus we can internally make * the size aligned on each process. */ - mpi_errno = MPIR_Allreduce(&symheap_flag, &global_symheap_flag, 1, MPIR_C_BOOL_INTERNAL, - MPI_LAND, comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allreduce_fallback(&symheap_flag, &global_symheap_flag, 1, MPIR_C_BOOL_INTERNAL, + MPI_LAND, comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); } else global_symheap_flag = false; diff --git a/src/mpid/ch4/src/mpidig_win.h b/src/mpid/ch4/src/mpidig_win.h index a172bceceec..b4a6e91dafa 100644 --- a/src/mpid/ch4/src/mpidig_win.h +++ b/src/mpid/ch4/src/mpidig_win.h @@ -522,7 +522,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDIG_mpi_win_fence(int massert, MPIR_Win * win) * the VCI lock internally. */ MPID_THREAD_CS_EXIT(VCI, MPIDI_VCI_LOCK(vci)); need_unlock = 0; - mpi_errno = MPIR_Barrier(win->comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(win->comm_ptr, MPIR_COLL_ATTR_SYNC); fn_exit: if (need_unlock) { diff --git a/src/mpid/common/shm/mpidu_init_shm_alloc.c b/src/mpid/common/shm/mpidu_init_shm_alloc.c index 55291a90b5d..37371505093 100644 --- a/src/mpid/common/shm/mpidu_init_shm_alloc.c +++ b/src/mpid/common/shm/mpidu_init_shm_alloc.c @@ -199,13 +199,13 @@ int MPIDU_Init_shm_comm_alloc(MPIR_Comm * comm, size_t len, void **ptr) } MPIR_Assert(serialized_hnd_size <= MPIDU_INIT_SHM_BLOCK_SIZE); if (node_comm) { - mpi_errno = MPIR_Bcast_impl(serialized_hnd, MPIDU_INIT_SHM_BLOCK_SIZE, - MPIR_BYTE_INTERNAL, 0, node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(serialized_hnd, MPIDU_INIT_SHM_BLOCK_SIZE, + MPIR_BYTE_INTERNAL, 0, node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); } } else { - mpi_errno = MPIR_Bcast_impl(serialized_hnd_buffer, MPIDU_INIT_SHM_BLOCK_SIZE, - MPIR_BYTE_INTERNAL, 0, node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(serialized_hnd_buffer, MPIDU_INIT_SHM_BLOCK_SIZE, + MPIR_BYTE_INTERNAL, 0, node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); serialized_hnd = serialized_hnd_buffer; serialized_hnd_size = strlen(serialized_hnd) + 1; /* add 1 for null char */ @@ -222,7 +222,7 @@ int MPIDU_Init_shm_comm_alloc(MPIR_Comm * comm, size_t len, void **ptr) } if (node_comm) { - mpi_errno = MPIR_Barrier_impl(node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Barrier_fallback(node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); } if (need_remove) { diff --git a/src/mpid/common/shm/mpidu_shm_alloc.c b/src/mpid/common/shm/mpidu_shm_alloc.c index d3e0faf5f90..2450399f698 100644 --- a/src/mpid/common/shm/mpidu_shm_alloc.c +++ b/src/mpid/common/shm/mpidu_shm_alloc.c @@ -235,8 +235,8 @@ static int allreduce_maxloc(size_t mysz, int myloc, MPIR_Comm * comm, size_t * m maxloc.loc = myloc; mpi_errno = - MPIR_Allreduce(&maxloc, &maxloc_result, 1, maxloc_type, maxloc_op->handle, comm, - MPIR_COLL_ATTR_SYNC); + MPIR_Allreduce_fallback(&maxloc, &maxloc_result, 1, maxloc_type, maxloc_op->handle, comm, + MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); *maxsz_loc = maxloc_result.loc; @@ -341,8 +341,9 @@ static int map_symm_shm(MPIR_Comm * shm_comm_ptr, MPIDU_shm_seg_t * shm_seg, int /* check results of all processes. If any failure happens (max result > 0), * return SYMSHM_OTHER_FAIL if anyone reports it (max result == 2). * Otherwise return SYMSHM_MAP_FAIL (max result == 1). */ - mpi_errno = MPIR_Allreduce(map_result_ptr, &all_map_result, 1, MPIR_INT_INTERNAL, - MPI_MAX, shm_comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allreduce_fallback(map_result_ptr, &all_map_result, 1, MPIR_INT_INTERNAL, + MPI_MAX, shm_comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (all_map_result != SYMSHM_SUCCESS) @@ -452,8 +453,8 @@ static int shm_alloc_symm_all(MPIR_Comm * comm_ptr, size_t offset, MPIDU_shm_seg MPIR_ERR_CHECK(mpi_errno); /* check if any mapping failure occurs */ - mpi_errno = MPIR_Allreduce(&map_result, &all_map_result, 1, MPIR_INT_INTERNAL, - MPI_MAX, comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allreduce_fallback(&map_result, &all_map_result, 1, MPIR_INT_INTERNAL, + MPI_MAX, comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* cleanup local shm segment if mapping failed on other process */ @@ -503,16 +504,17 @@ static int shm_alloc(MPIR_Comm * shm_comm_ptr, MPIDU_shm_seg_t * shm_seg) if (shm_fail_flag) serialized_hnd = &mpl_err_hnd[0]; - mpi_errno = MPIR_Bcast_impl(serialized_hnd, MPL_SHM_GHND_SZ, MPIR_BYTE_INTERNAL, 0, - shm_comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(serialized_hnd, MPL_SHM_GHND_SZ, MPIR_BYTE_INTERNAL, 0, + shm_comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (shm_fail_flag) goto map_fail; /* ensure all other processes have mapped successfully */ - mpi_errno = MPIR_Allreduce_impl(&shm_fail_flag, &any_shm_fail_flag, 1, MPIR_C_BOOL_INTERNAL, - MPI_LOR, shm_comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allreduce_fallback(&shm_fail_flag, &any_shm_fail_flag, 1, MPIR_C_BOOL_INTERNAL, + MPI_LOR, shm_comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* unlink shared memory region so it gets deleted when all processes exit */ @@ -526,8 +528,8 @@ static int shm_alloc(MPIR_Comm * shm_comm_ptr, MPIDU_shm_seg_t * shm_seg) char serialized_hnd[MPL_SHM_GHND_SZ] = { 0 }; /* get serialized handle from rank 0 and deserialize it */ - mpi_errno = MPIR_Bcast_impl(serialized_hnd, MPL_SHM_GHND_SZ, MPIR_CHAR_INTERNAL, 0, - shm_comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(serialized_hnd, MPL_SHM_GHND_SZ, MPIR_CHAR_INTERNAL, 0, + shm_comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); /* empty handler means root fails */ @@ -549,8 +551,9 @@ static int shm_alloc(MPIR_Comm * shm_comm_ptr, MPIDU_shm_seg_t * shm_seg) mapped_flag = true; result_sync: - mpi_errno = MPIR_Allreduce_impl(&shm_fail_flag, &any_shm_fail_flag, 1, MPIR_C_BOOL_INTERNAL, - MPI_LOR, shm_comm_ptr, MPIR_COLL_ATTR_SYNC); + mpi_errno = + MPIR_Allreduce_fallback(&shm_fail_flag, &any_shm_fail_flag, 1, MPIR_C_BOOL_INTERNAL, + MPI_LOR, shm_comm_ptr, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); if (any_shm_fail_flag) diff --git a/src/util/mpir_nodemap.c b/src/util/mpir_nodemap.c index ea10f44af0c..1a6ea95e612 100644 --- a/src/util/mpir_nodemap.c +++ b/src/util/mpir_nodemap.c @@ -454,16 +454,16 @@ int MPIR_nodeid_init(void) MPIR_Strerror(errno, strerrbuf, MPIR_STRERROR_BUF_SIZE), errno); my_hostname[MAX_HOSTNAME_LEN - 1] = '\0'; - mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, MAX_HOSTNAME_LEN, MPIR_CHAR_INTERNAL, - allhostnames, MAX_HOSTNAME_LEN, MPIR_CHAR_INTERNAL, - node_roots_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Allgather_fallback(MPI_IN_PLACE, MAX_HOSTNAME_LEN, MPIR_CHAR_INTERNAL, + allhostnames, MAX_HOSTNAME_LEN, MPIR_CHAR_INTERNAL, + node_roots_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); } MPIR_Comm *node_comm = MPIR_Process.comm_world->node_comm; if (node_comm) { - mpi_errno = MPIR_Bcast_impl(allhostnames, MAX_HOSTNAME_LEN * MPIR_Process.num_nodes, - MPIR_CHAR_INTERNAL, 0, node_comm, MPIR_COLL_ATTR_SYNC); + mpi_errno = MPIR_Bcast_fallback(allhostnames, MAX_HOSTNAME_LEN * MPIR_Process.num_nodes, + MPIR_CHAR_INTERNAL, 0, node_comm, MPIR_COLL_ATTR_SYNC); MPIR_ERR_CHECK(mpi_errno); } From 7eca0a985ddf0372566a27aaf134c55d6d674f4d Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 4 Sep 2025 12:53:26 -0500 Subject: [PATCH 10/47] ch3: use MPIR_Barrier_fallback internally Use MPIR_Barrier_fallback instead of MPIR_Bcast_allcomm_auto (doesn't exist now). --- src/mpid/ch3/src/ch3u_port.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mpid/ch3/src/ch3u_port.c b/src/mpid/ch3/src/ch3u_port.c index 0bca16298b5..d310d109fe2 100644 --- a/src/mpid/ch3/src/ch3u_port.c +++ b/src/mpid/ch3/src/ch3u_port.c @@ -656,7 +656,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root, /* broadcast the received info to local processes */ MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"broadcasting the received 3 ints"); - mpi_errno = MPIR_Bcast_allcomm_auto(recv_ints, 3, MPIR_INT_INTERNAL, root, comm_ptr, 0); + mpi_errno = MPIR_Bcast_fallback(recv_ints, 3, MPIR_INT_INTERNAL, root, comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); /* check if root was unable to connect to the port */ @@ -705,7 +705,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root, /* Broadcast out the remote rank translation array */ MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Broadcasting remote translation"); - mpi_errno = MPIR_Bcast_allcomm_auto(remote_translation, remote_comm_size * 2, MPIR_INT_INTERNAL, + mpi_errno = MPIR_Bcast_fallback(remote_translation, remote_comm_size * 2, MPIR_INT_INTERNAL, root, comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); @@ -744,7 +744,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root, } /*printf("connect:barrier\n");fflush(stdout);*/ - mpi_errno = MPIR_Barrier_allcomm_auto(comm_ptr, 0); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); /* Free new_vc. It was explicitly allocated in MPIDI_CH3_Connect_to_root.*/ @@ -790,7 +790,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root, /* notify other processes to return an error */ MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"broadcasting 3 ints: error case"); - mpi_errno2 = MPIR_Bcast_allcomm_auto(recv_ints, 3, MPIR_INT_INTERNAL, root, comm_ptr, 0); + mpi_errno2 = MPIR_Bcast_fallback(recv_ints, 3, MPIR_INT_INTERNAL, root, comm_ptr, 0); if (mpi_errno2) MPIR_ERR_ADD(mpi_errno, mpi_errno2); goto fn_fail; } @@ -937,7 +937,7 @@ static int ReceivePGAndDistribute( MPIR_Comm *tmp_comm, MPIR_Comm *comm_ptr, /* Broadcast the size and data to the local communicator */ /*printf("accept:broadcasting 1 int\n");fflush(stdout);*/ - mpi_errno = MPIR_Bcast_allcomm_auto(&j, 1, MPIR_INT_INTERNAL, root, comm_ptr, 0); + mpi_errno = MPIR_Bcast_fallback(&j, 1, MPIR_INT_INTERNAL, root, comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); if (rank != root) { @@ -948,7 +948,7 @@ static int ReceivePGAndDistribute( MPIR_Comm *tmp_comm, MPIR_Comm *comm_ptr, } } /*printf("accept:broadcasting string of length %d\n", j);fflush(stdout);*/ - mpi_errno = MPIR_Bcast_allcomm_auto(pg_str, j, MPIR_CHAR_INTERNAL, root, comm_ptr, 0); + mpi_errno = MPIR_Bcast_fallback(pg_str, j, MPIR_CHAR_INTERNAL, root, comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); /* Then reconstruct the received process group. This step also initializes the created process group */ @@ -1179,7 +1179,7 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root, /* broadcast the received info to local processes */ /*printf("accept:broadcasting 2 ints - %d and %d\n", recv_ints[0], recv_ints[1]);fflush(stdout);*/ - mpi_errno = MPIR_Bcast_allcomm_auto(recv_ints, 3, MPIR_INT_INTERNAL, root, comm_ptr, 0); + mpi_errno = MPIR_Bcast_fallback(recv_ints, 3, MPIR_INT_INTERNAL, root, comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); @@ -1229,7 +1229,7 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root, /* Broadcast out the remote rank translation array */ MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Broadcast remote_translation"); - mpi_errno = MPIR_Bcast_allcomm_auto(remote_translation, remote_comm_size * 2, MPIR_INT_INTERNAL, + mpi_errno = MPIR_Bcast_fallback(remote_translation, remote_comm_size * 2, MPIR_INT_INTERNAL, root, comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); #ifdef MPICH_DBG_OUTPUT @@ -1266,7 +1266,7 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root, } MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Barrier"); - mpi_errno = MPIR_Barrier_allcomm_auto(comm_ptr, 0); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); /* Free new_vc once the connection is completed. It was explicitly @@ -1345,7 +1345,7 @@ static int SetupNewIntercomm( MPIR_Comm *comm_ptr, int remote_comm_size, MPIR_ERR_CHECK(mpi_errno); MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Barrier"); - mpi_errno = MPIR_Barrier_allcomm_auto(comm_ptr, 0); + mpi_errno = MPIR_Barrier_fallback(comm_ptr, 0); MPIR_ERR_CHECK(mpi_errno); fn_exit: From f467a7659895e297bb3907b128bea8936c984c3a Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 4 Sep 2025 12:57:25 -0500 Subject: [PATCH 11/47] temp auto --- src/include/mpir_coll.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index d150998e47a..3a6b8fb8014 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -66,4 +66,7 @@ int MPIR_TSP_Ibarrier_sched_intra_tsp_auto(MPIR_Comm * comm, MPIR_TSP_sched_t sc int MPIR_TSP_Ireduce_sched_intra_tsp_auto(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm * comm_ptr, MPIR_TSP_sched_t sched); +int MPIR_Ireduce_sched_intra_tsp_flat_auto(const void *sendbuf, void *recvbuf, MPI_Aint count, + MPI_Datatype datatype, MPI_Op op, int root, + MPIR_Comm * comm_ptr, MPIR_TSP_sched_t sched); #endif /* MPIR_COLL_H_INCLUDED */ From fcd72ba47701bc1fb9674a292e2dabdbee0891cc Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 20:27:15 -0500 Subject: [PATCH 12/47] coll: remove fallback in collective algorithm The auto selection should take care of restrictions. Error rather than fallback. If user use CVAR to select specific algorithm, we should check restrictions before jumping the the algorithm. We will design a common fallback handling there. --- src/mpi/coll/allreduce/allreduce_intra_ccl.c | 3 +- src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h | 71 ++----------------- .../ch4/shm/posix/posix_coll_release_gather.h | 18 +++-- 3 files changed, 17 insertions(+), 75 deletions(-) diff --git a/src/mpi/coll/allreduce/allreduce_intra_ccl.c b/src/mpi/coll/allreduce/allreduce_intra_ccl.c index 57e3820762d..38c6732ea5e 100644 --- a/src/mpi/coll/allreduce/allreduce_intra_ccl.c +++ b/src/mpi/coll/allreduce/allreduce_intra_ccl.c @@ -46,5 +46,6 @@ int MPIR_Allreduce_intra_ccl(const void *sendbuf, void *recvbuf, MPI_Aint count, } fallback: - return MPIR_Allreduce_allcomm_auto(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); + /* FIXME: proper error */ + return MPI_ERR_OTHER; } diff --git a/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h b/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h index 4343057ae30..80bfb35607f 100644 --- a/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h +++ b/src/mpid/ch4/shm/posix/posix_coll_gpu_ipc.h @@ -187,9 +187,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast_gpu_ipc_read(void *buffer, fn_fail: goto fn_exit; fallback: - /* Fall back to other algorithms as gpu ipc bcast cannot be used */ - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ + mpi_errno = MPI_ERR_OTHER; goto fn_exit; } @@ -278,10 +277,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_alltoall_gpu_ipc_read(const void *s fn_fail: goto fn_exit; fallback: - /* Fall back to other algorithms as gpu ipc alltoall cannot be used */ - mpi_errno = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ + mpi_errno = MPI_ERR_OTHER; goto fn_exit; } @@ -370,10 +367,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allgather_gpu_ipc_read(const void * fn_fail: goto fn_exit; fallback: - /* Fall back to other algorithms as gpu ipc allgather cannot be used */ - mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ + mpi_errno = MPI_ERR_OTHER; goto fn_exit; } @@ -467,62 +462,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allgatherv_gpu_ipc_read(const void fn_fail: goto fn_exit; fallback: - /* Fall back to other algorithms as gpu ipc allgatherv cannot be used */ - mpi_errno = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, - recvtype, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ goto fn_exit; } -#else /* MPIDI_CH4_SHM_ENABLE_GPU */ -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast_gpu_ipc_read(void *buffer, - MPI_Aint count, - MPI_Datatype datatype, - int root, MPIR_Comm * comm_ptr, - int coll_attr) -{ - return MPIR_Bcast_impl(buffer, count, datatype, root, comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_alltoall_gpu_ipc_read(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - int coll_attr) -{ - return MPIR_Alltoall_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allgather_gpu_ipc_read(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - MPI_Aint recvcount, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - int coll_attr) -{ - return MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, coll_attr); -} - -MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allgatherv_gpu_ipc_read(const void *sendbuf, - MPI_Aint sendcount, - MPI_Datatype sendtype, - void *recvbuf, - const MPI_Aint * recvcounts, - const MPI_Aint * displs, - MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, - int coll_attr) -{ - return MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, - comm_ptr, coll_attr); -} #endif /* !MPIDI_CH4_SHM_ENABLE_GPU */ #endif /* POSIX_COLL_GPU_IPC_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/posix/posix_coll_release_gather.h b/src/mpid/ch4/shm/posix/posix_coll_release_gather.h index e26f8ffdbe0..40a0612b256 100644 --- a/src/mpid/ch4/shm/posix/posix_coll_release_gather.h +++ b/src/mpid/ch4/shm/posix/posix_coll_release_gather.h @@ -151,9 +151,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast_release_gather(void *buffer, fn_fail: goto fn_exit; fallback: - /* Fall back to other algo as release_gather based bcast cannot be used */ - mpi_errno = MPIR_Bcast_impl(buffer, count, datatype, root, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ + mpi_errno = MPI_ERR_OTHER; goto fn_exit; } @@ -250,9 +249,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_reduce_release_gather(const void *s fn_fail: goto fn_exit; fallback: - /* Fall back to other algo as release_gather algo cannot be used */ - mpi_errno = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ + mpi_errno = MPI_ERR_OTHER; goto fn_exit; } @@ -346,8 +344,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allreduce_release_gather(const void goto fn_exit; fallback: - mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ + mpi_errno = MPI_ERR_OTHER; goto fn_exit; } @@ -395,8 +393,8 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_barrier_release_gather(MPIR_Comm * goto fn_exit; fallback: - mpi_errno = MPIR_Barrier_impl(comm_ptr, coll_attr); - MPIR_ERR_CHECK(mpi_errno); + /* FIXME: proper error */ + mpi_errno = MPI_ERR_OTHER; goto fn_exit; } From 3d3d9023568460b72a2c00fb99470c40c14ff269 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sun, 24 Aug 2025 23:32:20 -0500 Subject: [PATCH 13/47] coll/prep: just use fallback in tsp auto functions Eventually we will make nonblocking compositional algorithms work by having the JSON tree check the sched framework types. For now, remove the json search and just use fallbacks. --- src/mpi/coll/iallreduce/iallreduce_tsp_auto.c | 142 ------------------ src/mpi/coll/ibarrier/ibarrier_tsp_auto.c | 60 -------- src/mpi/coll/ibcast/ibcast_tsp_auto.c | 97 ------------ src/mpi/coll/ireduce/ireduce_tsp_auto.c | 77 ---------- 4 files changed, 376 deletions(-) diff --git a/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c b/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c index e8ca1600030..9ad63c8d6d1 100644 --- a/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c +++ b/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c @@ -13,155 +13,13 @@ int MPIR_TSP_Iallreduce_sched_intra_tsp_auto(const void *sendbuf, void *recvbuf, MPIR_Comm * comm, MPIR_TSP_sched_t sched) { int mpi_errno = MPI_SUCCESS; - int is_commutative = MPIR_Op_is_commutative(op); - int nranks; - int rank; - MPIR_COMM_RANK_SIZE(comm, rank, nranks); MPIR_Assert(comm->comm_kind == MPIR_COMM_KIND__INTRACOMM); - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__IALLREDUCE, - .comm_ptr = comm, - - .u.iallreduce.sendbuf = sendbuf, - .u.iallreduce.recvbuf = recvbuf, - .u.iallreduce.count = count, - .u.iallreduce.datatype = datatype, - .u.iallreduce.op = op, - }; - - MPII_Csel_container_s *cnt; - - switch (MPIR_CVAR_IALLREDUCE_INTRA_ALGORITHM) { - case MPIR_CVAR_IALLREDUCE_INTRA_ALGORITHM_tsp_recexch_single_buffer: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch(sendbuf, recvbuf, count, - datatype, op, comm, - MPIR_IALLREDUCE_RECEXCH_TYPE_SINGLE_BUFFER, - MPIR_CVAR_IALLREDUCE_RECEXCH_KVAL, sched); - break; - - case MPIR_CVAR_IALLREDUCE_INTRA_ALGORITHM_tsp_recexch_multiple_buffer: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch(sendbuf, recvbuf, count, - datatype, op, comm, - MPIR_IALLREDUCE_RECEXCH_TYPE_MULTIPLE_BUFFER, - MPIR_CVAR_IALLREDUCE_RECEXCH_KVAL, sched); - break; - - case MPIR_CVAR_IALLREDUCE_INTRA_ALGORITHM_tsp_tree: - /*Only knomial_1 tree supports non-commutative operations */ - MPII_COLLECTIVE_FALLBACK_CHECK(rank, is_commutative || - MPIR_Iallreduce_tree_type == - MPIR_TREE_TYPE_KNOMIAL_1, mpi_errno, - "Iallreduce gentran_tree cannot be applied.\n"); - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, - comm, MPIR_Iallreduce_tree_type, - MPIR_CVAR_IALLREDUCE_TREE_KVAL, - MPIR_CVAR_IALLREDUCE_TREE_PIPELINE_CHUNK_SIZE, - MPIR_CVAR_IALLREDUCE_TREE_BUFFER_PER_CHILD, - sched); - break; - - case MPIR_CVAR_IALLREDUCE_INTRA_ALGORITHM_tsp_ring: - MPII_COLLECTIVE_FALLBACK_CHECK(rank, is_commutative, mpi_errno, - "Iallreduce gentran_ring cannot be applied.\n"); - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_ring(sendbuf, recvbuf, count, datatype, - op, comm, sched); - break; - case MPIR_CVAR_IALLREDUCE_INTRA_ALGORITHM_tsp_recexch_reduce_scatter_recexch_allgatherv: - /* This algorithm will work for commutative - * operations and if the count is bigger than total - * number of ranks. If it not commutative or if the - * count < nranks, MPIR_Iallreduce_sched algorithm - * will be run */ - MPII_COLLECTIVE_FALLBACK_CHECK(rank, is_commutative && - count >= nranks, mpi_errno, - "Iallreduce gentran_recexch_reduce_scatter_recexch_allgatherv cannot be applied.\n"); - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch_reduce_scatter_recexch_allgatherv(sendbuf, - recvbuf, - count, - datatype, - op, - comm, - MPIR_CVAR_IALLREDUCE_RECEXCH_KVAL, - sched); - break; - default: - cnt = MPIR_Csel_search(comm->csel_comm, coll_sig); - MPIR_Assert(cnt); - - switch (cnt->id) { - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch(sendbuf, recvbuf, count, - datatype, op, comm, - MPIR_IALLREDUCE_RECEXCH_TYPE_SINGLE_BUFFER, - cnt->u. - iallreduce.intra_tsp_recexch_single_buffer. - k, sched); - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch(sendbuf, recvbuf, count, - datatype, op, comm, - MPIR_IALLREDUCE_RECEXCH_TYPE_MULTIPLE_BUFFER, - cnt->u. - iallreduce.intra_tsp_recexch_single_buffer. - k, sched); - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, - comm, - cnt->u.iallreduce. - intra_tsp_tree.tree_type, - cnt->u.iallreduce.intra_tsp_tree.k, - cnt->u.iallreduce. - intra_tsp_tree.chunk_size, - cnt->u.iallreduce. - intra_tsp_tree.buffer_per_child, - sched); - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_ring: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_ring(sendbuf, recvbuf, count, datatype, op, - comm, sched); - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch_reduce_scatter_recexch_allgatherv - (sendbuf, recvbuf, count, datatype, op, comm, - cnt->u.iallreduce.intra_tsp_recexch_reduce_scatter_recexch_allgatherv.k, - sched); - break; - - default: - /* Replace this call with MPIR_Assert(0) when json files have gentran algos */ - goto fallback; - break; - } - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: mpi_errno = MPIR_TSP_Iallreduce_sched_intra_recexch(sendbuf, recvbuf, count, datatype, op, comm, MPIR_IALLREDUCE_RECEXCH_TYPE_MULTIPLE_BUFFER, MPIR_CVAR_IALLREDUCE_RECEXCH_KVAL, sched); - fn_exit: return mpi_errno; - fn_fail: - goto fn_exit; } diff --git a/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c b/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c index 0d2f14ae2a2..094230a278c 100644 --- a/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c +++ b/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c @@ -10,69 +10,9 @@ int MPIR_TSP_Ibarrier_sched_intra_tsp_auto(MPIR_Comm * comm, MPIR_TSP_sched_t sc { int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__IBARRIER, - .comm_ptr = comm, - }; - MPII_Csel_container_s *cnt; - void *recvbuf = NULL; - - MPIR_Assert(comm->comm_kind == MPIR_COMM_KIND__INTRACOMM); - - switch (MPIR_CVAR_IBARRIER_INTRA_ALGORITHM) { - case MPIR_CVAR_IBARRIER_INTRA_ALGORITHM_tsp_recexch: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch(MPI_IN_PLACE, recvbuf, 0, - MPIR_BYTE_INTERNAL, MPI_SUM, comm, - MPIR_IALLREDUCE_RECEXCH_TYPE_MULTIPLE_BUFFER, - MPIR_CVAR_IBARRIER_RECEXCH_KVAL, sched); - break; - - case MPIR_CVAR_IBARRIER_INTRA_ALGORITHM_tsp_k_dissemination: - mpi_errno = - MPIR_TSP_Ibarrier_sched_intra_k_dissemination(comm, MPIR_CVAR_IBARRIER_DISSEM_KVAL, - sched); - break; - - default: - cnt = MPIR_Csel_search(comm->csel_comm, coll_sig); - MPIR_Assert(cnt); - - switch (cnt->id) { - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_recexch: - mpi_errno = - MPIR_TSP_Iallreduce_sched_intra_recexch(MPI_IN_PLACE, recvbuf, 0, - MPIR_BYTE_INTERNAL, MPI_SUM, comm, - MPIR_IALLREDUCE_RECEXCH_TYPE_MULTIPLE_BUFFER, - cnt->u.ibarrier.intra_tsp_recexch.k, - sched); - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_k_dissemination: - mpi_errno = - MPIR_TSP_Ibarrier_sched_intra_k_dissemination(comm, - cnt->u. - ibarrier.intra_tsp_k_dissemination. - k, sched); - break; - - default: - /* Replace this call with MPIR_Assert(0) when json files have gentran algos */ - goto fallback; - break; - } - } - - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: mpi_errno = MPIR_TSP_Iallreduce_sched_intra_recexch(MPI_IN_PLACE, NULL, 0, MPIR_BYTE_INTERNAL, MPI_SUM, comm, 0, 2, sched); - fn_exit: return mpi_errno; - fn_fail: - goto fn_exit; } diff --git a/src/mpi/coll/ibcast/ibcast_tsp_auto.c b/src/mpi/coll/ibcast/ibcast_tsp_auto.c index 2902d8d965f..f74dc33bfb2 100644 --- a/src/mpi/coll/ibcast/ibcast_tsp_auto.c +++ b/src/mpi/coll/ibcast/ibcast_tsp_auto.c @@ -54,105 +54,8 @@ int MPIR_TSP_Ibcast_sched_intra_tsp_auto(void *buffer, MPI_Aint count, MPI_Datat { int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__IBCAST, - .comm_ptr = comm_ptr, - - .u.ibcast.buffer = buffer, - .u.ibcast.count = count, - .u.ibcast.datatype = datatype, - .u.ibcast.root = root, - }; - MPII_Csel_container_s *cnt; - - MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); - - switch (MPIR_CVAR_IBCAST_INTRA_ALGORITHM) { - case MPIR_CVAR_IBCAST_INTRA_ALGORITHM_tsp_tree: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_tree(buffer, count, datatype, root, comm_ptr, - MPIR_Ibcast_tree_type, - MPIR_CVAR_IBCAST_TREE_KVAL, - MPIR_CVAR_IBCAST_TREE_PIPELINE_CHUNK_SIZE, sched); - break; - - case MPIR_CVAR_IBCAST_INTRA_ALGORITHM_tsp_scatterv_recexch_allgatherv: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(buffer, count, datatype, - root, comm_ptr, - MPIR_CVAR_IALLGATHERV_INTRA_ALGORITHM_tsp_recexch_doubling, - MPIR_CVAR_IBCAST_SCATTERV_KVAL, - MPIR_CVAR_IBCAST_ALLGATHERV_RECEXCH_KVAL, - sched); - break; - - case MPIR_CVAR_IBCAST_INTRA_ALGORITHM_tsp_scatterv_ring_allgatherv: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_scatterv_ring_allgatherv(buffer, count, datatype, - root, comm_ptr, 1, sched); - break; - - case MPIR_CVAR_IBCAST_INTRA_ALGORITHM_tsp_ring: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_tree(buffer, count, datatype, root, comm_ptr, - MPIR_TREE_TYPE_KARY, 1, - MPIR_CVAR_IBCAST_RING_CHUNK_SIZE, sched); - break; - - default: - cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig); - MPIR_Assert(cnt); - - switch (cnt->id) { - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_tree(buffer, count, datatype, root, comm_ptr, - cnt->u.ibcast.intra_tsp_tree.tree_type, - cnt->u.ibcast.intra_tsp_tree.k, - cnt->u.ibcast.intra_tsp_tree.chunk_size, - sched); - break; - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(buffer, count, datatype, - root, comm_ptr, - MPIR_CVAR_IALLGATHERV_INTRA_ALGORITHM_tsp_recexch_doubling, - cnt->u. - ibcast.intra_tsp_scatterv_recexch_allgatherv.scatterv_k, - cnt->u. - ibcast.intra_tsp_scatterv_recexch_allgatherv.allgatherv_k, - sched); - break; - - case MPIR_CVAR_IBCAST_INTRA_ALGORITHM_tsp_scatterv_ring_allgatherv: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_scatterv_ring_allgatherv(buffer, count, - datatype, root, - comm_ptr, 1, sched); - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring: - mpi_errno = - MPIR_TSP_Ibcast_sched_intra_tree(buffer, count, datatype, root, comm_ptr, - MPIR_TREE_TYPE_KARY, 1, - cnt->u.ibcast.intra_tsp_tree.chunk_size, - sched); - break; - default: - /* Replace this call with MPIR_Assert(0) when json files have gentran algos */ - goto fallback; - break; - } - } - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: mpi_errno = MPIR_Ibcast_sched_intra_tsp_flat_auto(buffer, count, datatype, root, comm_ptr, sched); - fn_exit: return mpi_errno; - fn_fail: - goto fn_exit; } diff --git a/src/mpi/coll/ireduce/ireduce_tsp_auto.c b/src/mpi/coll/ireduce/ireduce_tsp_auto.c index 1666e2dd029..5f13cee8a04 100644 --- a/src/mpi/coll/ireduce/ireduce_tsp_auto.c +++ b/src/mpi/coll/ireduce/ireduce_tsp_auto.c @@ -44,80 +44,6 @@ int MPIR_TSP_Ireduce_sched_intra_tsp_auto(const void *sendbuf, void *recvbuf, MP { int mpi_errno = MPI_SUCCESS; - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__IREDUCE, - .comm_ptr = comm_ptr, - - .u.ireduce.sendbuf = sendbuf, - .u.ireduce.recvbuf = recvbuf, - .u.ireduce.count = count, - .u.ireduce.datatype = datatype, - .u.ireduce.op = op, - .u.ireduce.root = root, - }; - MPII_Csel_container_s *cnt; - - MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); - - switch (MPIR_CVAR_IREDUCE_INTRA_ALGORITHM) { - case MPIR_CVAR_IREDUCE_INTRA_ALGORITHM_tsp_tree: - /*Only knomial_1 tree supports non-commutative operations */ - MPII_COLLECTIVE_FALLBACK_CHECK(comm_ptr->rank, MPIR_Op_is_commutative(op) || - MPIR_Ireduce_tree_type == MPIR_TREE_TYPE_KNOMIAL_1, - mpi_errno, "Ireduce gentran_tree cannot be applied.\n"); - mpi_errno = - MPIR_TSP_Ireduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, root, - comm_ptr, MPIR_Ireduce_tree_type, - MPIR_CVAR_IREDUCE_TREE_KVAL, - MPIR_CVAR_IREDUCE_TREE_PIPELINE_CHUNK_SIZE, - MPIR_CVAR_IREDUCE_TREE_BUFFER_PER_CHILD, sched); - break; - - case MPIR_CVAR_IREDUCE_INTRA_ALGORITHM_tsp_ring: - mpi_errno = - MPIR_TSP_Ireduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, root, - comm_ptr, MPIR_TREE_TYPE_KARY, 1, - MPIR_CVAR_IREDUCE_RING_CHUNK_SIZE, - MPIR_CVAR_IREDUCE_TREE_BUFFER_PER_CHILD, sched); - break; - - default: - cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig); - MPIR_Assert(cnt); - - switch (cnt->id) { - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree: - mpi_errno = - MPIR_TSP_Ireduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, - root, comm_ptr, - cnt->u.ireduce.intra_tsp_tree.tree_type, - cnt->u.ireduce.intra_tsp_tree.k, - cnt->u.ireduce.intra_tsp_tree.chunk_size, - cnt->u.ireduce. - intra_tsp_tree.buffer_per_child, sched); - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring: - mpi_errno = - MPIR_TSP_Ireduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, - root, comm_ptr, MPIR_TREE_TYPE_KARY, 1, - cnt->u.ireduce.intra_tsp_ring.chunk_size, - cnt->u.ireduce. - intra_tsp_ring.buffer_per_child, sched); - break; - - default: - /* Replace this call with MPIR_Assert(0) when json files have gentran algos */ - mpi_errno = - MPIR_Ireduce_sched_intra_tsp_flat_auto(sendbuf, recvbuf, count, - datatype, op, root, comm_ptr, sched); - break; - } - } - MPIR_ERR_CHECK(mpi_errno); - goto fn_exit; - - fallback: mpi_errno = MPIR_TSP_Ireduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, MPIR_TREE_TYPE_KARY, 1, @@ -125,8 +51,5 @@ int MPIR_TSP_Ireduce_sched_intra_tsp_auto(const void *sendbuf, void *recvbuf, MP MPIR_CVAR_IREDUCE_TREE_BUFFER_PER_CHILD, sched); - fn_exit: return mpi_errno; - fn_fail: - goto fn_exit; } From 758fad6109602ed5a40aff13c5dca52ff86b779a Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sun, 24 Aug 2025 23:40:32 -0500 Subject: [PATCH 14/47] coll/prep: remove the optional json search for parameters --- src/mpi/coll/allreduce/allreduce_intra_tree.c | 20 ------------------ src/mpi/coll/bcast/bcast_intra_tree.c | 21 ------------------- src/mpi/coll/ireduce/ireduce_tsp_tree.c | 21 ------------------- 3 files changed, 62 deletions(-) diff --git a/src/mpi/coll/allreduce/allreduce_intra_tree.c b/src/mpi/coll/allreduce/allreduce_intra_tree.c index df779180e4a..272dc15d371 100644 --- a/src/mpi/coll/allreduce/allreduce_intra_tree.c +++ b/src/mpi/coll/allreduce/allreduce_intra_tree.c @@ -69,31 +69,11 @@ int MPIR_Allreduce_intra_tree(const void *sendbuf, MPIR_Treealgo_tree_create_topo_aware(comm_ptr, tree_type, k, root, MPIR_CVAR_ALLREDUCE_TOPO_REORDER_ENABLE, &my_tree); } else if (tree_type == MPIR_TREE_TYPE_TOPOLOGY_WAVE) { - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__ALLREDUCE, - .comm_ptr = comm_ptr, - .u.allreduce.sendbuf = sendbuf, - .u.allreduce.recvbuf = recvbuf, - .u.allreduce.count = count, - .u.allreduce.datatype = datatype, - .u.allreduce.op = op, - }; - int overhead = MPIR_CVAR_ALLREDUCE_TOPO_OVERHEAD; int lat_diff_groups = MPIR_CVAR_ALLREDUCE_TOPO_DIFF_GROUPS; int lat_diff_switches = MPIR_CVAR_ALLREDUCE_TOPO_DIFF_SWITCHES; int lat_same_switches = MPIR_CVAR_ALLREDUCE_TOPO_SAME_SWITCHES; - MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig); - MPIR_Assert(cnt); - - if (cnt->id == MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree) { - overhead = cnt->u.allreduce.intra_tree.topo_overhead; - lat_diff_groups = cnt->u.allreduce.intra_tree.topo_diff_groups; - lat_diff_switches = cnt->u.allreduce.intra_tree.topo_diff_switches; - lat_same_switches = cnt->u.allreduce.intra_tree.topo_same_switches; - } - mpi_errno = MPIR_Treealgo_tree_create_topo_wave(comm_ptr, k, root, MPIR_CVAR_ALLREDUCE_TOPO_REORDER_ENABLE, diff --git a/src/mpi/coll/bcast/bcast_intra_tree.c b/src/mpi/coll/bcast/bcast_intra_tree.c index f53664b61cf..cd504c2a793 100644 --- a/src/mpi/coll/bcast/bcast_intra_tree.c +++ b/src/mpi/coll/bcast/bcast_intra_tree.c @@ -83,27 +83,6 @@ int MPIR_Bcast_intra_tree(void *buffer, int lat_diff_switches = MPIR_CVAR_BCAST_TOPO_DIFF_SWITCHES; int lat_same_switches = MPIR_CVAR_BCAST_TOPO_SAME_SWITCHES; - if (comm_ptr->csel_comm) { - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__BCAST, - .comm_ptr = comm_ptr, - .u.bcast.buffer = buffer, - .u.bcast.count = count, - .u.bcast.datatype = datatype, - .u.bcast.root = root, - }; - - MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig); - MPIR_Assert(cnt); - - if (cnt->id == MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree) { - overhead = cnt->u.bcast.intra_tree.topo_overhead; - lat_diff_groups = cnt->u.bcast.intra_tree.topo_diff_groups; - lat_diff_switches = cnt->u.bcast.intra_tree.topo_diff_switches; - lat_same_switches = cnt->u.bcast.intra_tree.topo_same_switches; - } - } - mpi_errno = MPIR_Treealgo_tree_create_topo_wave(comm_ptr, branching_factor, root, MPIR_CVAR_BCAST_TOPO_REORDER_ENABLE, diff --git a/src/mpi/coll/ireduce/ireduce_tsp_tree.c b/src/mpi/coll/ireduce/ireduce_tsp_tree.c index c5f668ab286..47c8123dff0 100644 --- a/src/mpi/coll/ireduce/ireduce_tsp_tree.c +++ b/src/mpi/coll/ireduce/ireduce_tsp_tree.c @@ -71,32 +71,11 @@ int MPIR_TSP_Ireduce_sched_intra_tree(const void *sendbuf, void *recvbuf, MPI_Ai MPIR_Treealgo_tree_create_topo_aware(comm, tree_type, k, tree_root, MPIR_CVAR_IREDUCE_TOPO_REORDER_ENABLE, &my_tree); } else if (tree_type == MPIR_TREE_TYPE_TOPOLOGY_WAVE) { - MPIR_Csel_coll_sig_s coll_sig = { - .coll_type = MPIR_CSEL_COLL_TYPE__IREDUCE, - .comm_ptr = comm, - .u.ireduce.sendbuf = sendbuf, - .u.ireduce.recvbuf = recvbuf, - .u.ireduce.count = count, - .u.ireduce.datatype = datatype, - .u.ireduce.op = op, - .u.ireduce.root = tree_root, - }; - int overhead = MPIR_CVAR_IREDUCE_TOPO_OVERHEAD; int lat_diff_groups = MPIR_CVAR_IREDUCE_TOPO_DIFF_GROUPS; int lat_diff_switches = MPIR_CVAR_IREDUCE_TOPO_DIFF_SWITCHES; int lat_same_switches = MPIR_CVAR_IREDUCE_TOPO_SAME_SWITCHES; - MPII_Csel_container_s *cnt = MPIR_Csel_search(comm->csel_comm, coll_sig); - MPIR_Assert(cnt); - - if (cnt->id == MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree) { - overhead = cnt->u.ireduce.intra_tsp_tree.topo_overhead; - lat_diff_groups = cnt->u.ireduce.intra_tsp_tree.topo_diff_groups; - lat_diff_switches = cnt->u.ireduce.intra_tsp_tree.topo_diff_switches; - lat_same_switches = cnt->u.ireduce.intra_tsp_tree.topo_same_switches; - } - mpi_errno = MPIR_Treealgo_tree_create_topo_wave(comm, k, tree_root, MPIR_CVAR_IREDUCE_TOPO_REORDER_ENABLE, From 2dfe0fc9339d6cbd4828516c4ff4757603dc258b Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 23 Aug 2025 17:45:36 -0500 Subject: [PATCH 15/47] coll/prep: call MPIR collectives in compositional algorithms For consistency, call MPIR collectives (e.g. MPIR_Bcast) in compositional algorithms. TODO: rewrite compositional algorithms using coll_sig and container. --- .../bcast/bcast_inter_remote_send_local_bcast.c | 2 +- ...e_scatter_inter_remote_reduce_local_scatter.c | 16 ++++++++-------- ...ter_block_inter_remote_reduce_local_scatter.c | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/mpi/coll/bcast/bcast_inter_remote_send_local_bcast.c b/src/mpi/coll/bcast/bcast_inter_remote_send_local_bcast.c index e927cbf2814..627290aa4f8 100644 --- a/src/mpi/coll/bcast/bcast_inter_remote_send_local_bcast.c +++ b/src/mpi/coll/bcast/bcast_inter_remote_send_local_bcast.c @@ -50,7 +50,7 @@ int MPIR_Bcast_inter_remote_send_local_bcast(void *buffer, /* now do the usual broadcast on this intracommunicator * with rank 0 as root. */ - mpi_errno = MPIR_Bcast_allcomm_auto(buffer, count, datatype, 0, newcomm_ptr, coll_attr); + mpi_errno = MPIR_Bcast(buffer, count, datatype, 0, newcomm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } diff --git a/src/mpi/coll/reduce_scatter/reduce_scatter_inter_remote_reduce_local_scatter.c b/src/mpi/coll/reduce_scatter/reduce_scatter_inter_remote_reduce_local_scatter.c index 9b79908fe1f..e19a6b1217e 100644 --- a/src/mpi/coll/reduce_scatter/reduce_scatter_inter_remote_reduce_local_scatter.c +++ b/src/mpi/coll/reduce_scatter/reduce_scatter_inter_remote_reduce_local_scatter.c @@ -58,26 +58,26 @@ int MPIR_Reduce_scatter_inter_remote_reduce_local_scatter(const void *sendbuf, v if (comm_ptr->is_low_group) { /* reduce from right group to rank 0 */ root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* reduce to rank 0 of right group */ root = 0; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } else { /* reduce to rank 0 of left group */ root = 0; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* reduce from right group to rank 0 */ root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } diff --git a/src/mpi/coll/reduce_scatter_block/reduce_scatter_block_inter_remote_reduce_local_scatter.c b/src/mpi/coll/reduce_scatter_block/reduce_scatter_block_inter_remote_reduce_local_scatter.c index 06fd800823b..c088c3cf74b 100644 --- a/src/mpi/coll/reduce_scatter_block/reduce_scatter_block_inter_remote_reduce_local_scatter.c +++ b/src/mpi/coll/reduce_scatter_block/reduce_scatter_block_inter_remote_reduce_local_scatter.c @@ -49,26 +49,26 @@ int MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter(const void *send if (comm_ptr->is_low_group) { /* reduce from right group to rank 0 */ root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* reduce to rank 0 of right group */ root = 0; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } else { /* reduce to rank 0 of left group */ root = 0; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); /* reduce from right group to rank 0 */ root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL; - mpi_errno = MPIR_Reduce_allcomm_auto(sendbuf, tmp_buf, total_count, datatype, op, - root, comm_ptr, coll_attr); + mpi_errno = MPIR_Reduce(sendbuf, tmp_buf, total_count, datatype, op, + root, comm_ptr, coll_attr); MPIR_ERR_CHECK(mpi_errno); } From 8ec926cd198129ed8673c27adbfc7c0249d74213 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Tue, 19 Aug 2025 17:13:17 -0500 Subject: [PATCH 16/47] coll/csel: rename coll_info to coll_sig Rename coll_info to coll_sig for type MPIR_Csel_coll_sig_s. This is the convention other than in csel.c. Let's make it consistent. --- src/mpi/coll/src/csel.c | 307 ++++++++++++++++++++-------------------- 1 file changed, 153 insertions(+), 154 deletions(-) diff --git a/src/mpi/coll/src/csel.c b/src/mpi/coll/src/csel.c index b2712350764..43e5a034bcd 100644 --- a/src/mpi/coll/src/csel.c +++ b/src/mpi/coll/src/csel.c @@ -744,115 +744,115 @@ int MPIR_Csel_free(void *csel_) return mpi_errno; } -static inline bool is_sendbuf_inplace(MPIR_Csel_coll_sig_s coll_info) +static inline bool is_sendbuf_inplace(MPIR_Csel_coll_sig_s coll_sig) { bool sendbuf_inplace = false; - switch (coll_info.coll_type) { + switch (coll_sig.coll_type) { case MPIR_CSEL_COLL_TYPE__ALLTOALL: - sendbuf_inplace = (coll_info.u.alltoall.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig.u.alltoall.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__IALLTOALL: - sendbuf_inplace = (coll_info.u.ialltoall.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig.u.ialltoall.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__ALLTOALLV: - sendbuf_inplace = (coll_info.u.alltoallv.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig.u.alltoallv.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__IALLTOALLV: - sendbuf_inplace = (coll_info.u.ialltoallv.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig.u.ialltoallv.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__ALLTOALLW: - sendbuf_inplace = (coll_info.u.alltoallw.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig.u.alltoallw.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__IALLTOALLW: - sendbuf_inplace = (coll_info.u.ialltoallw.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig.u.ialltoallw.sendbuf == MPI_IN_PLACE); break; default: fprintf(stderr, "is_sendbuf_inplace not defined for coll_type %d\n", - coll_info.coll_type); + coll_sig.coll_type); MPIR_Assert(0); break; } return sendbuf_inplace; } -static inline bool is_commutative(MPIR_Csel_coll_sig_s coll_info) +static inline bool is_commutative(MPIR_Csel_coll_sig_s coll_sig) { bool commutative = false; - switch (coll_info.coll_type) { + switch (coll_sig.coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - commutative = MPIR_Op_is_commutative(coll_info.u.allreduce.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.allreduce.op); break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - commutative = MPIR_Op_is_commutative(coll_info.u.iallreduce.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.iallreduce.op); break; case MPIR_CSEL_COLL_TYPE__REDUCE: - commutative = MPIR_Op_is_commutative(coll_info.u.reduce.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.reduce.op); break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - commutative = MPIR_Op_is_commutative(coll_info.u.ireduce.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.ireduce.op); break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - commutative = MPIR_Op_is_commutative(coll_info.u.reduce_scatter.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.reduce_scatter.op); break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - commutative = MPIR_Op_is_commutative(coll_info.u.ireduce_scatter.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.ireduce_scatter.op); break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - commutative = MPIR_Op_is_commutative(coll_info.u.reduce_scatter_block.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.reduce_scatter_block.op); break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - commutative = MPIR_Op_is_commutative(coll_info.u.ireduce_scatter_block.op); + commutative = MPIR_Op_is_commutative(coll_sig.u.ireduce_scatter_block.op); break; default: - fprintf(stderr, "is_commutative not defined for coll_type %d\n", coll_info.coll_type); + fprintf(stderr, "is_commutative not defined for coll_type %d\n", coll_sig.coll_type); MPIR_Assert(0); break; } return commutative; } -static inline bool is_op_built_in(MPIR_Csel_coll_sig_s coll_info) +static inline bool is_op_built_in(MPIR_Csel_coll_sig_s coll_sig) { bool op_built_in = false; - switch (coll_info.coll_type) { + switch (coll_sig.coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - op_built_in = HANDLE_GET_KIND(coll_info.u.allreduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig.u.allreduce.op) == HANDLE_KIND_BUILTIN; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - op_built_in = HANDLE_GET_KIND(coll_info.u.iallreduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig.u.iallreduce.op) == HANDLE_KIND_BUILTIN; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - op_built_in = HANDLE_GET_KIND(coll_info.u.reduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig.u.reduce.op) == HANDLE_KIND_BUILTIN; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - op_built_in = HANDLE_GET_KIND(coll_info.u.ireduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig.u.ireduce.op) == HANDLE_KIND_BUILTIN; break; default: - fprintf(stderr, "is_op_builtin not defined for coll_type %d\n", coll_info.coll_type); + fprintf(stderr, "is_op_builtin not defined for coll_type %d\n", coll_sig.coll_type); MPIR_Assert(0); break; } return op_built_in; } -static inline bool is_block_regular(MPIR_Csel_coll_sig_s coll_info) +static inline bool is_block_regular(MPIR_Csel_coll_sig_s coll_sig) { bool is_regular = true; int i = 0; const MPI_Aint *recvcounts = NULL; - switch (coll_info.coll_type) { + switch (coll_sig.coll_type) { case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - recvcounts = coll_info.u.reduce_scatter.recvcounts; + recvcounts = coll_sig.u.reduce_scatter.recvcounts; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - recvcounts = coll_info.u.ireduce_scatter.recvcounts; + recvcounts = coll_sig.u.ireduce_scatter.recvcounts; break; default: MPIR_Assert(0); break; } - for (i = 0; i < (coll_info.comm_ptr->local_size - 1); ++i) { + for (i = 0; i < (coll_sig.comm_ptr->local_size - 1); ++i) { if (recvcounts[i] != recvcounts[i + 1]) { is_regular = false; break; @@ -861,53 +861,53 @@ static inline bool is_block_regular(MPIR_Csel_coll_sig_s coll_info) return is_regular; } -static inline MPI_Aint get_avg_msgsize(MPIR_Csel_coll_sig_s coll_info) +static inline MPI_Aint get_avg_msgsize(MPIR_Csel_coll_sig_s coll_sig) { MPI_Aint msgsize = 0; - switch (coll_info.coll_type) { + switch (coll_sig.coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.allreduce.datatype, msgsize); - msgsize *= coll_info.u.allreduce.count; + MPIR_Datatype_get_size_macro(coll_sig.u.allreduce.datatype, msgsize); + msgsize *= coll_sig.u.allreduce.count; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.iallreduce.datatype, msgsize); - msgsize *= coll_info.u.iallreduce.count; + MPIR_Datatype_get_size_macro(coll_sig.u.iallreduce.datatype, msgsize); + msgsize *= coll_sig.u.iallreduce.count; break; case MPIR_CSEL_COLL_TYPE__BCAST: - MPIR_Datatype_get_size_macro(coll_info.u.bcast.datatype, msgsize); - msgsize *= coll_info.u.bcast.count; + MPIR_Datatype_get_size_macro(coll_sig.u.bcast.datatype, msgsize); + msgsize *= coll_sig.u.bcast.count; break; case MPIR_CSEL_COLL_TYPE__IBCAST: - MPIR_Datatype_get_size_macro(coll_info.u.ibcast.datatype, msgsize); - msgsize *= coll_info.u.ibcast.count; + MPIR_Datatype_get_size_macro(coll_sig.u.ibcast.datatype, msgsize); + msgsize *= coll_sig.u.ibcast.count; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.reduce.datatype, msgsize); - msgsize *= coll_info.u.reduce.count; + MPIR_Datatype_get_size_macro(coll_sig.u.reduce.datatype, msgsize); + msgsize *= coll_sig.u.reduce.count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.ireduce.datatype, msgsize); - msgsize *= coll_info.u.ireduce.count; + MPIR_Datatype_get_size_macro(coll_sig.u.ireduce.datatype, msgsize); + msgsize *= coll_sig.u.ireduce.count; break; case MPIR_CSEL_COLL_TYPE__ALLTOALL: - MPIR_Datatype_get_size_macro(coll_info.u.alltoall.sendtype, msgsize); - msgsize *= coll_info.u.alltoall.sendcount; + MPIR_Datatype_get_size_macro(coll_sig.u.alltoall.sendtype, msgsize); + msgsize *= coll_sig.u.alltoall.sendcount; break; case MPIR_CSEL_COLL_TYPE__IALLTOALL: - MPIR_Datatype_get_size_macro(coll_info.u.ialltoall.sendtype, msgsize); - msgsize *= coll_info.u.ialltoall.sendcount; + MPIR_Datatype_get_size_macro(coll_sig.u.ialltoall.sendtype, msgsize); + msgsize *= coll_sig.u.ialltoall.sendcount; break; default: - fprintf(stderr, "avg_msg_size not defined for coll_type %d\n", coll_info.coll_type); + fprintf(stderr, "avg_msg_size not defined for coll_type %d\n", coll_sig.coll_type); MPIR_Assert(0); break; } @@ -915,56 +915,56 @@ static inline MPI_Aint get_avg_msgsize(MPIR_Csel_coll_sig_s coll_info) return msgsize; } -static inline MPI_Aint get_count(MPIR_Csel_coll_sig_s coll_info) +static inline MPI_Aint get_count(MPIR_Csel_coll_sig_s coll_sig) { MPI_Aint count = 0; int i = 0; - int comm_size = coll_info.comm_ptr->local_size; + int comm_size = coll_sig.comm_ptr->local_size; - switch (coll_info.coll_type) { + switch (coll_sig.coll_type) { case MPIR_CSEL_COLL_TYPE__BCAST: - count = coll_info.u.bcast.count; + count = coll_sig.u.bcast.count; break; case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - count = coll_info.u.allreduce.count; + count = coll_sig.u.allreduce.count; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - count = coll_info.u.iallreduce.count; + count = coll_sig.u.iallreduce.count; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - count = coll_info.u.reduce.count; + count = coll_sig.u.reduce.count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - count = coll_info.u.ireduce.count; + count = coll_sig.u.ireduce.count; break; case MPIR_CSEL_COLL_TYPE__ALLGATHER: - count = coll_info.u.allgather.recvcount; + count = coll_sig.u.allgather.recvcount; break; case MPIR_CSEL_COLL_TYPE__ALLGATHERV: count = 0; for (i = 0; i < comm_size; i++) - count += coll_info.u.allgatherv.recvcounts[i]; + count += coll_sig.u.allgatherv.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: for (i = 0; i < comm_size; i++) - count += coll_info.u.reduce_scatter.recvcounts[i]; + count += coll_sig.u.reduce_scatter.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - count = coll_info.u.reduce_scatter_block.recvcount; + count = coll_sig.u.reduce_scatter_block.recvcount; break; case MPIR_CSEL_COLL_TYPE__IALLGATHER: - count = coll_info.u.iallgather.recvcount; + count = coll_sig.u.iallgather.recvcount; break; case MPIR_CSEL_COLL_TYPE__IALLGATHERV: for (i = 0; i < comm_size; i++) - count += coll_info.u.iallgatherv.recvcounts[i]; + count += coll_sig.u.iallgatherv.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: for (i = 0; i < comm_size; i++) - count += coll_info.u.ireduce_scatter.recvcounts[i]; + count += coll_sig.u.ireduce_scatter.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - count = coll_info.u.ireduce_scatter_block.recvcount; + count = coll_sig.u.ireduce_scatter_block.recvcount; break; default: MPIR_Assert(0); @@ -973,37 +973,37 @@ static inline MPI_Aint get_count(MPIR_Csel_coll_sig_s coll_info) return count; } -static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_info) +static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_sig) { MPI_Aint total_bytes = 0, i = 0, count = 0, typesize = 0; - int comm_size = coll_info.comm_ptr->local_size; + int comm_size = coll_sig.comm_ptr->local_size; - switch (coll_info.coll_type) { + switch (coll_sig.coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.allreduce.datatype, total_bytes); - total_bytes *= coll_info.u.allreduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.allreduce.datatype, total_bytes); + total_bytes *= coll_sig.u.allreduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__BCAST: - MPIR_Datatype_get_size_macro(coll_info.u.bcast.datatype, total_bytes); - total_bytes *= coll_info.u.bcast.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.bcast.datatype, total_bytes); + total_bytes *= coll_sig.u.bcast.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.reduce.datatype, total_bytes); - total_bytes *= coll_info.u.reduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.reduce.datatype, total_bytes); + total_bytes *= coll_sig.u.reduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__ALLTOALL: - MPIR_Datatype_get_size_macro(coll_info.u.alltoall.sendtype, total_bytes); - total_bytes *= coll_info.u.alltoall.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.alltoall.sendtype, total_bytes); + total_bytes *= coll_sig.u.alltoall.sendcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__ALLTOALLV: - MPIR_Datatype_get_size_macro(coll_info.u.alltoallv.sendtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig.u.alltoallv.sendtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_info.u.alltoallv.sendcounts[i]; + count += coll_sig.u.alltoallv.sendcounts[i]; total_bytes *= count; break; @@ -1011,91 +1011,91 @@ static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_info) count = 0; typesize = 0; for (i = 0; i < comm_size; i++) { - MPIR_Datatype_get_size_macro(coll_info.u.alltoallw.sendtypes[i], typesize); - count = coll_info.u.alltoallw.sendcounts[i]; + MPIR_Datatype_get_size_macro(coll_sig.u.alltoallw.sendtypes[i], typesize); + count = coll_sig.u.alltoallw.sendcounts[i]; total_bytes += (count * typesize); } break; case MPIR_CSEL_COLL_TYPE__ALLGATHER: - MPIR_Datatype_get_size_macro(coll_info.u.allgather.recvtype, total_bytes); - total_bytes *= coll_info.u.allgather.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.allgather.recvtype, total_bytes); + total_bytes *= coll_sig.u.allgather.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__ALLGATHERV: - MPIR_Datatype_get_size_macro(coll_info.u.allgatherv.recvtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig.u.allgatherv.recvtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_info.u.allgatherv.recvcounts[i]; + count += coll_sig.u.allgatherv.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__GATHER: - if (coll_info.u.gather.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_info.u.gather.recvtype, total_bytes); + if (coll_sig.u.gather.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig.u.gather.recvtype, total_bytes); /* use remote size for intercomm root */ - if (coll_info.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_info.u.gather.recvcount * (coll_info.comm_ptr->remote_size); + if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig.u.gather.recvcount * (coll_sig.comm_ptr->remote_size); else - total_bytes = coll_info.u.gather.recvcount * comm_size; + total_bytes = coll_sig.u.gather.recvcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_info.u.gather.sendtype, total_bytes); - total_bytes = coll_info.u.gather.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.gather.sendtype, total_bytes); + total_bytes = coll_sig.u.gather.sendcount * comm_size; } break; case MPIR_CSEL_COLL_TYPE__SCATTER: - if (coll_info.u.scatter.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_info.u.scatter.sendtype, total_bytes); + if (coll_sig.u.scatter.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig.u.scatter.sendtype, total_bytes); /* use remote size for intercomm root */ - if (coll_info.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_info.u.scatter.sendcount * (coll_info.comm_ptr->remote_size); + if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig.u.scatter.sendcount * (coll_sig.comm_ptr->remote_size); else - total_bytes = coll_info.u.scatter.sendcount * comm_size; + total_bytes = coll_sig.u.scatter.sendcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_info.u.scatter.recvtype, total_bytes); - total_bytes = coll_info.u.scatter.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.scatter.recvtype, total_bytes); + total_bytes = coll_sig.u.scatter.recvcount * comm_size; } break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - MPIR_Datatype_get_size_macro(coll_info.u.reduce_scatter.datatype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig.u.reduce_scatter.datatype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_info.u.reduce_scatter.recvcounts[i]; + count += coll_sig.u.reduce_scatter.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - MPIR_Datatype_get_size_macro(coll_info.u.reduce_scatter_block.datatype, total_bytes); - total_bytes *= coll_info.u.reduce_scatter_block.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.reduce_scatter_block.datatype, total_bytes); + total_bytes *= coll_sig.u.reduce_scatter_block.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.iallreduce.datatype, total_bytes); - total_bytes *= coll_info.u.iallreduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.iallreduce.datatype, total_bytes); + total_bytes *= coll_sig.u.iallreduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__IBCAST: - MPIR_Datatype_get_size_macro(coll_info.u.ibcast.datatype, total_bytes); - total_bytes *= coll_info.u.ibcast.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.ibcast.datatype, total_bytes); + total_bytes *= coll_sig.u.ibcast.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - MPIR_Datatype_get_size_macro(coll_info.u.ireduce.datatype, total_bytes); - total_bytes *= coll_info.u.ireduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.ireduce.datatype, total_bytes); + total_bytes *= coll_sig.u.ireduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLTOALL: - MPIR_Datatype_get_size_macro(coll_info.u.ialltoall.sendtype, total_bytes); - total_bytes *= coll_info.u.ialltoall.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.ialltoall.sendtype, total_bytes); + total_bytes *= coll_sig.u.ialltoall.sendcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLTOALLV: - MPIR_Datatype_get_size_macro(coll_info.u.ialltoallv.sendtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig.u.ialltoallv.sendtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_info.u.ialltoallv.sendcounts[i]; + count += coll_sig.u.ialltoallv.sendcounts[i]; total_bytes *= count; break; @@ -1103,64 +1103,63 @@ static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_info) count = 0; typesize = 0; for (i = 0; i < comm_size; i++) { - MPIR_Datatype_get_size_macro(coll_info.u.ialltoallw.sendtypes[i], typesize); - count = coll_info.u.ialltoallw.sendcounts[i]; + MPIR_Datatype_get_size_macro(coll_sig.u.ialltoallw.sendtypes[i], typesize); + count = coll_sig.u.ialltoallw.sendcounts[i]; total_bytes += (count * typesize); } break; case MPIR_CSEL_COLL_TYPE__IALLGATHER: - MPIR_Datatype_get_size_macro(coll_info.u.iallgather.recvtype, total_bytes); - total_bytes *= coll_info.u.iallgather.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.iallgather.recvtype, total_bytes); + total_bytes *= coll_sig.u.iallgather.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLGATHERV: - MPIR_Datatype_get_size_macro(coll_info.u.iallgatherv.recvtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig.u.iallgatherv.recvtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_info.u.iallgatherv.recvcounts[i]; + count += coll_sig.u.iallgatherv.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - MPIR_Datatype_get_size_macro(coll_info.u.ireduce_scatter.datatype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig.u.ireduce_scatter.datatype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_info.u.ireduce_scatter.recvcounts[i]; + count += coll_sig.u.ireduce_scatter.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - MPIR_Datatype_get_size_macro(coll_info.u.ireduce_scatter_block.datatype, total_bytes); - total_bytes = coll_info.u.ireduce_scatter_block.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.ireduce_scatter_block.datatype, total_bytes); + total_bytes = coll_sig.u.ireduce_scatter_block.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IGATHER: - if (coll_info.u.igather.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_info.u.igather.recvtype, total_bytes); + if (coll_sig.u.igather.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig.u.igather.recvtype, total_bytes); /* use remote size for intercomm root */ - if (coll_info.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_info.u.igather.recvcount * (coll_info.comm_ptr->remote_size); + if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig.u.igather.recvcount * (coll_sig.comm_ptr->remote_size); else - total_bytes = coll_info.u.igather.recvcount * comm_size; + total_bytes = coll_sig.u.igather.recvcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_info.u.igather.sendtype, total_bytes); - total_bytes = coll_info.u.igather.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.igather.sendtype, total_bytes); + total_bytes = coll_sig.u.igather.sendcount * comm_size; } break; case MPIR_CSEL_COLL_TYPE__ISCATTER: - if (coll_info.u.iscatter.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_info.u.iscatter.sendtype, total_bytes); + if (coll_sig.u.iscatter.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig.u.iscatter.sendtype, total_bytes); /* use remote size for intercomm root */ - if (coll_info.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = - coll_info.u.iscatter.sendcount * (coll_info.comm_ptr->remote_size); + if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig.u.iscatter.sendcount * (coll_sig.comm_ptr->remote_size); else - total_bytes = coll_info.u.iscatter.sendcount * comm_size; + total_bytes = coll_sig.u.iscatter.sendcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_info.u.iscatter.recvtype, total_bytes); - total_bytes = coll_info.u.iscatter.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig.u.iscatter.recvtype, total_bytes); + total_bytes = coll_sig.u.iscatter.recvcount * comm_size; } break; @@ -1172,11 +1171,11 @@ static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_info) return total_bytes; } -void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_info) +void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_sig) { csel_s *csel = (csel_s *) csel_; csel_node_s *node = NULL; - MPIR_Comm *comm_ptr = coll_info.comm_ptr; + MPIR_Comm *comm_ptr = coll_sig.comm_ptr; MPIR_Assert(csel_); @@ -1184,7 +1183,7 @@ void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_info) if (csel->type == CSEL_TYPE__ROOT) root = csel->u.root.tree; else - root = csel->u.pruned.coll_trees[coll_info.coll_type]; + root = csel->u.pruned.coll_trees[coll_sig.coll_type]; for (node = root; node;) { switch (node->type) { @@ -1238,84 +1237,84 @@ void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_info) break; case CSEL_NODE_TYPE__OPERATOR__COLLECTIVE: - if (node->u.collective.coll_type == coll_info.coll_type) + if (node->u.collective.coll_type == coll_sig.coll_type) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LE: - if (get_avg_msgsize(coll_info) <= node->u.avg_msg_size_le.val) + if (get_avg_msgsize(coll_sig) <= node->u.avg_msg_size_le.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LT: - if (get_avg_msgsize(coll_info) < node->u.avg_msg_size_lt.val) + if (get_avg_msgsize(coll_sig) < node->u.avg_msg_size_lt.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LE: - if (get_total_msgsize(coll_info) <= node->u.total_msg_size_le.val) + if (get_total_msgsize(coll_sig) <= node->u.total_msg_size_le.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LT: - if (get_total_msgsize(coll_info) < node->u.total_msg_size_lt.val) + if (get_total_msgsize(coll_sig) < node->u.total_msg_size_lt.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__COUNT_LE: - if (get_count(coll_info) <= node->u.count_le.val) + if (get_count(coll_sig) <= node->u.count_le.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__COUNT_LT_POW2: - if (get_count(coll_info) < MPL_pof2(coll_info.comm_ptr->local_size)) + if (get_count(coll_sig) < MPL_pof2(coll_sig.comm_ptr->local_size)) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__IS_COMMUTATIVE: - if (is_commutative(coll_info) == node->u.is_commutative.val) + if (is_commutative(coll_sig) == node->u.is_commutative.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__IS_SBUF_INPLACE: - if (is_sendbuf_inplace(coll_info) == node->u.is_sbuf_inplace.val) + if (is_sendbuf_inplace(coll_sig) == node->u.is_sbuf_inplace.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__IS_OP_BUILT_IN: - if (is_op_built_in(coll_info) == node->u.is_op_built_in.val) + if (is_op_built_in(coll_sig) == node->u.is_op_built_in.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__IS_BLOCK_REGULAR: - if (is_block_regular(coll_info) == node->u.is_block_regular.val) + if (is_block_regular(coll_sig) == node->u.is_block_regular.val) node = node->success; else node = node->failure; break; case CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE: - if (MPII_Comm_is_node_consecutive(coll_info.comm_ptr) == + if (MPII_Comm_is_node_consecutive(coll_sig.comm_ptr) == node->u.is_node_consecutive.val) node = node->success; else From ca84b21f65871cac3772d23fdb19b1d517d01eb1 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 21 Aug 2025 12:12:18 -0500 Subject: [PATCH 17/47] coll/csel: pass coll_sig by pointer Pass coll_sig by pointer rather than by copy. The structure can be big and pass by pointer avoids the extra copy. Also allows coll_sig to serve as a persistent state throughout the collective selelction chain. --- maint/gen_coll.py | 4 +- src/include/mpir_csel.h | 2 +- src/mpi/coll/src/csel.c | 289 ++++++++++++++++++++-------------------- 3 files changed, 148 insertions(+), 147 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index 5495e7c4ffd..80a430d31f3 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -99,7 +99,7 @@ def dump_allcomm_auto_blocking(name): G.out.append(".u.%s.%s = %s," % (func_name, p['name'], p['name'])) dump_close("};") G.out.append("") - G.out.append("MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig);") + G.out.append("MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, &coll_sig);") G.out.append("MPIR_Assert(cnt);") G.out.append("") @@ -174,7 +174,7 @@ def dump_allcomm_sched_auto(name): G.out.append(".u.%s.%s = %s," % (func_name, p['name'], p['name'])) dump_close("};") G.out.append("") - G.out.append("MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, coll_sig);") + G.out.append("MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, &coll_sig);") G.out.append("MPIR_Assert(cnt);") G.out.append("") diff --git a/src/include/mpir_csel.h b/src/include/mpir_csel.h index 07f061e98ef..ebf01c5e2ec 100644 --- a/src/include/mpir_csel.h +++ b/src/include/mpir_csel.h @@ -207,6 +207,6 @@ int MPIR_Csel_create_from_buf(const char *json, void *(*create_container) (struct json_object *), void **csel); int MPIR_Csel_free(void *csel); int MPIR_Csel_prune(void *root_csel, MPIR_Comm * comm_ptr, void **comm_csel); -void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s coll_sig); +void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); #endif /* MPIR_CSEL_H_INCLUDED */ diff --git a/src/mpi/coll/src/csel.c b/src/mpi/coll/src/csel.c index 43e5a034bcd..3b9bed141d1 100644 --- a/src/mpi/coll/src/csel.c +++ b/src/mpi/coll/src/csel.c @@ -744,115 +744,115 @@ int MPIR_Csel_free(void *csel_) return mpi_errno; } -static inline bool is_sendbuf_inplace(MPIR_Csel_coll_sig_s coll_sig) +static inline bool is_sendbuf_inplace(MPIR_Csel_coll_sig_s * coll_sig) { bool sendbuf_inplace = false; - switch (coll_sig.coll_type) { + switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__ALLTOALL: - sendbuf_inplace = (coll_sig.u.alltoall.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig->u.alltoall.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__IALLTOALL: - sendbuf_inplace = (coll_sig.u.ialltoall.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig->u.ialltoall.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__ALLTOALLV: - sendbuf_inplace = (coll_sig.u.alltoallv.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig->u.alltoallv.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__IALLTOALLV: - sendbuf_inplace = (coll_sig.u.ialltoallv.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig->u.ialltoallv.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__ALLTOALLW: - sendbuf_inplace = (coll_sig.u.alltoallw.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig->u.alltoallw.sendbuf == MPI_IN_PLACE); break; case MPIR_CSEL_COLL_TYPE__IALLTOALLW: - sendbuf_inplace = (coll_sig.u.ialltoallw.sendbuf == MPI_IN_PLACE); + sendbuf_inplace = (coll_sig->u.ialltoallw.sendbuf == MPI_IN_PLACE); break; default: fprintf(stderr, "is_sendbuf_inplace not defined for coll_type %d\n", - coll_sig.coll_type); + coll_sig->coll_type); MPIR_Assert(0); break; } return sendbuf_inplace; } -static inline bool is_commutative(MPIR_Csel_coll_sig_s coll_sig) +static inline bool is_commutative(MPIR_Csel_coll_sig_s * coll_sig) { bool commutative = false; - switch (coll_sig.coll_type) { + switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - commutative = MPIR_Op_is_commutative(coll_sig.u.allreduce.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.allreduce.op); break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - commutative = MPIR_Op_is_commutative(coll_sig.u.iallreduce.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.iallreduce.op); break; case MPIR_CSEL_COLL_TYPE__REDUCE: - commutative = MPIR_Op_is_commutative(coll_sig.u.reduce.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.reduce.op); break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - commutative = MPIR_Op_is_commutative(coll_sig.u.ireduce.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.ireduce.op); break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - commutative = MPIR_Op_is_commutative(coll_sig.u.reduce_scatter.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.reduce_scatter.op); break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - commutative = MPIR_Op_is_commutative(coll_sig.u.ireduce_scatter.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.ireduce_scatter.op); break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - commutative = MPIR_Op_is_commutative(coll_sig.u.reduce_scatter_block.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.reduce_scatter_block.op); break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - commutative = MPIR_Op_is_commutative(coll_sig.u.ireduce_scatter_block.op); + commutative = MPIR_Op_is_commutative(coll_sig->u.ireduce_scatter_block.op); break; default: - fprintf(stderr, "is_commutative not defined for coll_type %d\n", coll_sig.coll_type); + fprintf(stderr, "is_commutative not defined for coll_type %d\n", coll_sig->coll_type); MPIR_Assert(0); break; } return commutative; } -static inline bool is_op_built_in(MPIR_Csel_coll_sig_s coll_sig) +static inline bool is_op_built_in(MPIR_Csel_coll_sig_s * coll_sig) { bool op_built_in = false; - switch (coll_sig.coll_type) { + switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig.u.allreduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig->u.allreduce.op) == HANDLE_KIND_BUILTIN; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig.u.iallreduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig->u.iallreduce.op) == HANDLE_KIND_BUILTIN; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig.u.reduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig->u.reduce.op) == HANDLE_KIND_BUILTIN; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig.u.ireduce.op) == HANDLE_KIND_BUILTIN; + op_built_in = HANDLE_GET_KIND(coll_sig->u.ireduce.op) == HANDLE_KIND_BUILTIN; break; default: - fprintf(stderr, "is_op_builtin not defined for coll_type %d\n", coll_sig.coll_type); + fprintf(stderr, "is_op_builtin not defined for coll_type %d\n", coll_sig->coll_type); MPIR_Assert(0); break; } return op_built_in; } -static inline bool is_block_regular(MPIR_Csel_coll_sig_s coll_sig) +static inline bool is_block_regular(MPIR_Csel_coll_sig_s * coll_sig) { bool is_regular = true; int i = 0; const MPI_Aint *recvcounts = NULL; - switch (coll_sig.coll_type) { + switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - recvcounts = coll_sig.u.reduce_scatter.recvcounts; + recvcounts = coll_sig->u.reduce_scatter.recvcounts; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - recvcounts = coll_sig.u.ireduce_scatter.recvcounts; + recvcounts = coll_sig->u.ireduce_scatter.recvcounts; break; default: MPIR_Assert(0); break; } - for (i = 0; i < (coll_sig.comm_ptr->local_size - 1); ++i) { + for (i = 0; i < (coll_sig->comm_ptr->local_size - 1); ++i) { if (recvcounts[i] != recvcounts[i + 1]) { is_regular = false; break; @@ -861,53 +861,53 @@ static inline bool is_block_regular(MPIR_Csel_coll_sig_s coll_sig) return is_regular; } -static inline MPI_Aint get_avg_msgsize(MPIR_Csel_coll_sig_s coll_sig) +static inline MPI_Aint get_avg_msgsize(MPIR_Csel_coll_sig_s * coll_sig) { MPI_Aint msgsize = 0; - switch (coll_sig.coll_type) { + switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.allreduce.datatype, msgsize); - msgsize *= coll_sig.u.allreduce.count; + MPIR_Datatype_get_size_macro(coll_sig->u.allreduce.datatype, msgsize); + msgsize *= coll_sig->u.allreduce.count; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.iallreduce.datatype, msgsize); - msgsize *= coll_sig.u.iallreduce.count; + MPIR_Datatype_get_size_macro(coll_sig->u.iallreduce.datatype, msgsize); + msgsize *= coll_sig->u.iallreduce.count; break; case MPIR_CSEL_COLL_TYPE__BCAST: - MPIR_Datatype_get_size_macro(coll_sig.u.bcast.datatype, msgsize); - msgsize *= coll_sig.u.bcast.count; + MPIR_Datatype_get_size_macro(coll_sig->u.bcast.datatype, msgsize); + msgsize *= coll_sig->u.bcast.count; break; case MPIR_CSEL_COLL_TYPE__IBCAST: - MPIR_Datatype_get_size_macro(coll_sig.u.ibcast.datatype, msgsize); - msgsize *= coll_sig.u.ibcast.count; + MPIR_Datatype_get_size_macro(coll_sig->u.ibcast.datatype, msgsize); + msgsize *= coll_sig->u.ibcast.count; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.reduce.datatype, msgsize); - msgsize *= coll_sig.u.reduce.count; + MPIR_Datatype_get_size_macro(coll_sig->u.reduce.datatype, msgsize); + msgsize *= coll_sig->u.reduce.count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.ireduce.datatype, msgsize); - msgsize *= coll_sig.u.ireduce.count; + MPIR_Datatype_get_size_macro(coll_sig->u.ireduce.datatype, msgsize); + msgsize *= coll_sig->u.ireduce.count; break; case MPIR_CSEL_COLL_TYPE__ALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig.u.alltoall.sendtype, msgsize); - msgsize *= coll_sig.u.alltoall.sendcount; + MPIR_Datatype_get_size_macro(coll_sig->u.alltoall.sendtype, msgsize); + msgsize *= coll_sig->u.alltoall.sendcount; break; case MPIR_CSEL_COLL_TYPE__IALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig.u.ialltoall.sendtype, msgsize); - msgsize *= coll_sig.u.ialltoall.sendcount; + MPIR_Datatype_get_size_macro(coll_sig->u.ialltoall.sendtype, msgsize); + msgsize *= coll_sig->u.ialltoall.sendcount; break; default: - fprintf(stderr, "avg_msg_size not defined for coll_type %d\n", coll_sig.coll_type); + fprintf(stderr, "avg_msg_size not defined for coll_type %d\n", coll_sig->coll_type); MPIR_Assert(0); break; } @@ -915,56 +915,56 @@ static inline MPI_Aint get_avg_msgsize(MPIR_Csel_coll_sig_s coll_sig) return msgsize; } -static inline MPI_Aint get_count(MPIR_Csel_coll_sig_s coll_sig) +static inline MPI_Aint get_count(MPIR_Csel_coll_sig_s * coll_sig) { MPI_Aint count = 0; int i = 0; - int comm_size = coll_sig.comm_ptr->local_size; + int comm_size = coll_sig->comm_ptr->local_size; - switch (coll_sig.coll_type) { + switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__BCAST: - count = coll_sig.u.bcast.count; + count = coll_sig->u.bcast.count; break; case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - count = coll_sig.u.allreduce.count; + count = coll_sig->u.allreduce.count; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - count = coll_sig.u.iallreduce.count; + count = coll_sig->u.iallreduce.count; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - count = coll_sig.u.reduce.count; + count = coll_sig->u.reduce.count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - count = coll_sig.u.ireduce.count; + count = coll_sig->u.ireduce.count; break; case MPIR_CSEL_COLL_TYPE__ALLGATHER: - count = coll_sig.u.allgather.recvcount; + count = coll_sig->u.allgather.recvcount; break; case MPIR_CSEL_COLL_TYPE__ALLGATHERV: count = 0; for (i = 0; i < comm_size; i++) - count += coll_sig.u.allgatherv.recvcounts[i]; + count += coll_sig->u.allgatherv.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: for (i = 0; i < comm_size; i++) - count += coll_sig.u.reduce_scatter.recvcounts[i]; + count += coll_sig->u.reduce_scatter.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - count = coll_sig.u.reduce_scatter_block.recvcount; + count = coll_sig->u.reduce_scatter_block.recvcount; break; case MPIR_CSEL_COLL_TYPE__IALLGATHER: - count = coll_sig.u.iallgather.recvcount; + count = coll_sig->u.iallgather.recvcount; break; case MPIR_CSEL_COLL_TYPE__IALLGATHERV: for (i = 0; i < comm_size; i++) - count += coll_sig.u.iallgatherv.recvcounts[i]; + count += coll_sig->u.iallgatherv.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: for (i = 0; i < comm_size; i++) - count += coll_sig.u.ireduce_scatter.recvcounts[i]; + count += coll_sig->u.ireduce_scatter.recvcounts[i]; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - count = coll_sig.u.ireduce_scatter_block.recvcount; + count = coll_sig->u.ireduce_scatter_block.recvcount; break; default: MPIR_Assert(0); @@ -973,37 +973,37 @@ static inline MPI_Aint get_count(MPIR_Csel_coll_sig_s coll_sig) return count; } -static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_sig) +static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s * coll_sig) { MPI_Aint total_bytes = 0, i = 0, count = 0, typesize = 0; - int comm_size = coll_sig.comm_ptr->local_size; + int comm_size = coll_sig->comm_ptr->local_size; - switch (coll_sig.coll_type) { + switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.allreduce.datatype, total_bytes); - total_bytes *= coll_sig.u.allreduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.allreduce.datatype, total_bytes); + total_bytes *= coll_sig->u.allreduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__BCAST: - MPIR_Datatype_get_size_macro(coll_sig.u.bcast.datatype, total_bytes); - total_bytes *= coll_sig.u.bcast.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.bcast.datatype, total_bytes); + total_bytes *= coll_sig->u.bcast.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__REDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.reduce.datatype, total_bytes); - total_bytes *= coll_sig.u.reduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.reduce.datatype, total_bytes); + total_bytes *= coll_sig->u.reduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__ALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig.u.alltoall.sendtype, total_bytes); - total_bytes *= coll_sig.u.alltoall.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.alltoall.sendtype, total_bytes); + total_bytes *= coll_sig->u.alltoall.sendcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__ALLTOALLV: - MPIR_Datatype_get_size_macro(coll_sig.u.alltoallv.sendtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig->u.alltoallv.sendtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_sig.u.alltoallv.sendcounts[i]; + count += coll_sig->u.alltoallv.sendcounts[i]; total_bytes *= count; break; @@ -1011,91 +1011,91 @@ static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_sig) count = 0; typesize = 0; for (i = 0; i < comm_size; i++) { - MPIR_Datatype_get_size_macro(coll_sig.u.alltoallw.sendtypes[i], typesize); - count = coll_sig.u.alltoallw.sendcounts[i]; + MPIR_Datatype_get_size_macro(coll_sig->u.alltoallw.sendtypes[i], typesize); + count = coll_sig->u.alltoallw.sendcounts[i]; total_bytes += (count * typesize); } break; case MPIR_CSEL_COLL_TYPE__ALLGATHER: - MPIR_Datatype_get_size_macro(coll_sig.u.allgather.recvtype, total_bytes); - total_bytes *= coll_sig.u.allgather.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.allgather.recvtype, total_bytes); + total_bytes *= coll_sig->u.allgather.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__ALLGATHERV: - MPIR_Datatype_get_size_macro(coll_sig.u.allgatherv.recvtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig->u.allgatherv.recvtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_sig.u.allgatherv.recvcounts[i]; + count += coll_sig->u.allgatherv.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__GATHER: - if (coll_sig.u.gather.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig.u.gather.recvtype, total_bytes); + if (coll_sig->u.gather.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig->u.gather.recvtype, total_bytes); /* use remote size for intercomm root */ - if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_sig.u.gather.recvcount * (coll_sig.comm_ptr->remote_size); + if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig->u.gather.recvcount * (coll_sig->comm_ptr->remote_size); else - total_bytes = coll_sig.u.gather.recvcount * comm_size; + total_bytes = coll_sig->u.gather.recvcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_sig.u.gather.sendtype, total_bytes); - total_bytes = coll_sig.u.gather.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.gather.sendtype, total_bytes); + total_bytes = coll_sig->u.gather.sendcount * comm_size; } break; case MPIR_CSEL_COLL_TYPE__SCATTER: - if (coll_sig.u.scatter.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig.u.scatter.sendtype, total_bytes); + if (coll_sig->u.scatter.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig->u.scatter.sendtype, total_bytes); /* use remote size for intercomm root */ - if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_sig.u.scatter.sendcount * (coll_sig.comm_ptr->remote_size); + if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig->u.scatter.sendcount * (coll_sig->comm_ptr->remote_size); else - total_bytes = coll_sig.u.scatter.sendcount * comm_size; + total_bytes = coll_sig->u.scatter.sendcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_sig.u.scatter.recvtype, total_bytes); - total_bytes = coll_sig.u.scatter.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.scatter.recvtype, total_bytes); + total_bytes = coll_sig->u.scatter.recvcount * comm_size; } break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - MPIR_Datatype_get_size_macro(coll_sig.u.reduce_scatter.datatype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig->u.reduce_scatter.datatype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_sig.u.reduce_scatter.recvcounts[i]; + count += coll_sig->u.reduce_scatter.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - MPIR_Datatype_get_size_macro(coll_sig.u.reduce_scatter_block.datatype, total_bytes); - total_bytes *= coll_sig.u.reduce_scatter_block.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.reduce_scatter_block.datatype, total_bytes); + total_bytes *= coll_sig->u.reduce_scatter_block.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.iallreduce.datatype, total_bytes); - total_bytes *= coll_sig.u.iallreduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.iallreduce.datatype, total_bytes); + total_bytes *= coll_sig->u.iallreduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__IBCAST: - MPIR_Datatype_get_size_macro(coll_sig.u.ibcast.datatype, total_bytes); - total_bytes *= coll_sig.u.ibcast.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.ibcast.datatype, total_bytes); + total_bytes *= coll_sig->u.ibcast.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__IREDUCE: - MPIR_Datatype_get_size_macro(coll_sig.u.ireduce.datatype, total_bytes); - total_bytes *= coll_sig.u.ireduce.count * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.ireduce.datatype, total_bytes); + total_bytes *= coll_sig->u.ireduce.count * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig.u.ialltoall.sendtype, total_bytes); - total_bytes *= coll_sig.u.ialltoall.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.ialltoall.sendtype, total_bytes); + total_bytes *= coll_sig->u.ialltoall.sendcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLTOALLV: - MPIR_Datatype_get_size_macro(coll_sig.u.ialltoallv.sendtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig->u.ialltoallv.sendtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_sig.u.ialltoallv.sendcounts[i]; + count += coll_sig->u.ialltoallv.sendcounts[i]; total_bytes *= count; break; @@ -1103,63 +1103,64 @@ static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_sig) count = 0; typesize = 0; for (i = 0; i < comm_size; i++) { - MPIR_Datatype_get_size_macro(coll_sig.u.ialltoallw.sendtypes[i], typesize); - count = coll_sig.u.ialltoallw.sendcounts[i]; + MPIR_Datatype_get_size_macro(coll_sig->u.ialltoallw.sendtypes[i], typesize); + count = coll_sig->u.ialltoallw.sendcounts[i]; total_bytes += (count * typesize); } break; case MPIR_CSEL_COLL_TYPE__IALLGATHER: - MPIR_Datatype_get_size_macro(coll_sig.u.iallgather.recvtype, total_bytes); - total_bytes *= coll_sig.u.iallgather.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.iallgather.recvtype, total_bytes); + total_bytes *= coll_sig->u.iallgather.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IALLGATHERV: - MPIR_Datatype_get_size_macro(coll_sig.u.iallgatherv.recvtype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig->u.iallgatherv.recvtype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_sig.u.iallgatherv.recvcounts[i]; + count += coll_sig->u.iallgatherv.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - MPIR_Datatype_get_size_macro(coll_sig.u.ireduce_scatter.datatype, total_bytes); + MPIR_Datatype_get_size_macro(coll_sig->u.ireduce_scatter.datatype, total_bytes); count = 0; for (i = 0; i < comm_size; i++) - count += coll_sig.u.ireduce_scatter.recvcounts[i]; + count += coll_sig->u.ireduce_scatter.recvcounts[i]; total_bytes *= count; break; case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - MPIR_Datatype_get_size_macro(coll_sig.u.ireduce_scatter_block.datatype, total_bytes); - total_bytes = coll_sig.u.ireduce_scatter_block.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.ireduce_scatter_block.datatype, total_bytes); + total_bytes = coll_sig->u.ireduce_scatter_block.recvcount * comm_size; break; case MPIR_CSEL_COLL_TYPE__IGATHER: - if (coll_sig.u.igather.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig.u.igather.recvtype, total_bytes); + if (coll_sig->u.igather.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig->u.igather.recvtype, total_bytes); /* use remote size for intercomm root */ - if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_sig.u.igather.recvcount * (coll_sig.comm_ptr->remote_size); + if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig->u.igather.recvcount * (coll_sig->comm_ptr->remote_size); else - total_bytes = coll_sig.u.igather.recvcount * comm_size; + total_bytes = coll_sig->u.igather.recvcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_sig.u.igather.sendtype, total_bytes); - total_bytes = coll_sig.u.igather.sendcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.igather.sendtype, total_bytes); + total_bytes = coll_sig->u.igather.sendcount * comm_size; } break; case MPIR_CSEL_COLL_TYPE__ISCATTER: - if (coll_sig.u.iscatter.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig.u.iscatter.sendtype, total_bytes); + if (coll_sig->u.iscatter.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig->u.iscatter.sendtype, total_bytes); /* use remote size for intercomm root */ - if (coll_sig.comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_sig.u.iscatter.sendcount * (coll_sig.comm_ptr->remote_size); + if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = + coll_sig->u.iscatter.sendcount * (coll_sig->comm_ptr->remote_size); else - total_bytes = coll_sig.u.iscatter.sendcount * comm_size; + total_bytes = coll_sig->u.iscatter.sendcount * comm_size; } else { - MPIR_Datatype_get_size_macro(coll_sig.u.iscatter.recvtype, total_bytes); - total_bytes = coll_sig.u.iscatter.recvcount * comm_size; + MPIR_Datatype_get_size_macro(coll_sig->u.iscatter.recvtype, total_bytes); + total_bytes = coll_sig->u.iscatter.recvcount * comm_size; } break; @@ -1171,11 +1172,11 @@ static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s coll_sig) return total_bytes; } -void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_sig) +void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s * coll_sig) { csel_s *csel = (csel_s *) csel_; csel_node_s *node = NULL; - MPIR_Comm *comm_ptr = coll_sig.comm_ptr; + MPIR_Comm *comm_ptr = coll_sig->comm_ptr; MPIR_Assert(csel_); @@ -1183,7 +1184,7 @@ void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_sig) if (csel->type == CSEL_TYPE__ROOT) root = csel->u.root.tree; else - root = csel->u.pruned.coll_trees[coll_sig.coll_type]; + root = csel->u.pruned.coll_trees[coll_sig->coll_type]; for (node = root; node;) { switch (node->type) { @@ -1237,7 +1238,7 @@ void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_sig) break; case CSEL_NODE_TYPE__OPERATOR__COLLECTIVE: - if (node->u.collective.coll_type == coll_sig.coll_type) + if (node->u.collective.coll_type == coll_sig->coll_type) node = node->success; else node = node->failure; @@ -1279,7 +1280,7 @@ void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_sig) break; case CSEL_NODE_TYPE__OPERATOR__COUNT_LT_POW2: - if (get_count(coll_sig) < MPL_pof2(coll_sig.comm_ptr->local_size)) + if (get_count(coll_sig) < MPL_pof2(coll_sig->comm_ptr->local_size)) node = node->success; else node = node->failure; @@ -1314,7 +1315,7 @@ void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s coll_sig) break; case CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE: - if (MPII_Comm_is_node_consecutive(coll_sig.comm_ptr) == + if (MPII_Comm_is_node_consecutive(coll_sig->comm_ptr) == node->u.is_node_consecutive.val) node = node->success; else From ffb3a91acdea0ad4bb7e6c04ac5cbcfc8ed9d530 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 21 Aug 2025 15:27:13 -0500 Subject: [PATCH 18/47] coll/csel: merge csel_container.h into mpir_csel.h MPIR_Csel_coll_sig_s contains input arguments to an MPI collective. MPII_Csel_container_s contains extra parameters to an MPI algorithm. We define a unified collective algo function interface using both MPIR_Csel_coll_sig_s and MPII_Csel_container_s. Both structures will need MPID extensions to support MPID-specific algorithms. Defining both in the same header for easier management. --- src/include/mpiimpl.h | 2 +- src/include/mpir_csel.h | 403 +++++++++++++++++++++++++ src/mpi/coll/include/coll_impl.h | 2 - src/mpi/coll/include/csel_container.h | 412 -------------------------- src/mpi/coll/src/csel_container.c | 1 - 5 files changed, 404 insertions(+), 416 deletions(-) delete mode 100644 src/mpi/coll/include/csel_container.h diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h index 09f9079d934..07e5b7fd242 100644 --- a/src/include/mpiimpl.h +++ b/src/include/mpiimpl.h @@ -205,8 +205,8 @@ typedef struct MPIR_Stream MPIR_Stream; #include "mpir_request.h" #include "mpir_progress_hook.h" #include "mpir_win.h" -#include "mpir_coll.h" #include "mpir_csel.h" +#include "mpir_coll.h" #include "mpir_func.h" #include "mpir_nbc.h" #include "mpir_bsend.h" diff --git a/src/include/mpir_csel.h b/src/include/mpir_csel.h index ebf01c5e2ec..544e194ed0d 100644 --- a/src/include/mpir_csel.h +++ b/src/include/mpir_csel.h @@ -57,6 +57,203 @@ typedef enum { MPIR_CSEL_COLL_TYPE__END, } MPIR_Csel_coll_type_e; +typedef enum { + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_k_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_inter_local_gather_remote_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_inter_remote_gather_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_multiplying, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_reduce_scatter_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_k_reduce_scatter_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ccl, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_inter_reduce_exchange_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_k_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise_sendrecv_replace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_inter_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_pairwise_sendrecv_replace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_inter_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_pairwise_sendrecv_replace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_inter_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_recursive_doubling_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_ring_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_pipelined_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_inter_remote_send_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_local_gather_remote_send, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_inter_sched_local_gather_remote_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_inter_sched_remote_gather_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_naive, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_reduce_scatter_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_inter_sched_remote_reduce_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_permuted_sendrecv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_inter_sched_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_inter_sched_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_inter_sched_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_k_dissemination, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_inter_sched_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_ring_allgatherv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_recursive_doubling_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_ring_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_inter_sched_flat, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iexscan_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_long, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_short, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_reduce_scatter_gather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_inter_sched_local_reduce_remote_send, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_tsp_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_inter_sched_remote_reduce_local_scatterv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_tsp_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_tsp_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_remote_send_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgather_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgatherv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoall_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallw_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_reduce_scatter_gather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_inter_local_reduce_remote_send, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_inter_remote_reduce_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_remote_send_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count, +} MPII_Csel_container_type_e; + typedef struct { MPIR_Csel_coll_type_e coll_type; MPIR_Comm *comm_ptr; @@ -201,6 +398,208 @@ typedef struct { } u; } MPIR_Csel_coll_sig_s; +typedef struct { + MPII_Csel_container_type_e id; + + union { + struct { + struct { + int k; + } intra_tsp_brucks; + struct { + int k; + } intra_tsp_recexch_doubling; + struct { + int k; + } intra_tsp_recexch_halving; + } iallgather; + struct { + struct { + int k; + } intra_tsp_brucks; + struct { + int k; + } intra_tsp_recexch_doubling; + struct { + int k; + } intra_tsp_recexch_halving; + } iallgatherv; + struct { + struct { + int k; + } intra_tsp_recexch_single_buffer; + struct { + int k; + } intra_tsp_recexch_multiple_buffer; + struct { + int tree_type; + int k; + int chunk_size; + int buffer_per_child; + } intra_tsp_tree; + struct { + int k; + } intra_tsp_recexch_reduce_scatter_recexch_allgatherv; + } iallreduce; + struct { + struct { + int k; + int buffer_per_phase; + } intra_tsp_brucks; + struct { + int batch_size; + int bblock; + } intra_tsp_scattered; + } ialltoall; + struct { + struct { + int batch_size; + int bblock; + } intra_tsp_scattered; + struct { + int bblock; + } intra_tsp_blocked; + } ialltoallv; + struct { + struct { + int bblock; + } intra_tsp_blocked; + } ialltoallw; + struct { + struct { + int k; + } intra_k_dissemination; + struct { + int k; + bool single_phase_recv; + } intra_recexch; + } barrier; + struct { + struct { + int k; + } intra_tsp_recexch; + struct { + int k; + } intra_tsp_k_dissemination; + } ibarrier; + struct { + struct { + int tree_type; + int k; + int chunk_size; + } intra_tsp_tree; + struct { + int chunk_size; + } intra_tsp_ring; + struct { + int scatterv_k; + int allgatherv_k; + } intra_tsp_scatterv_recexch_allgatherv; + struct { + int scatterv_k; + } intra_tsp_scatterv_ring_allgatherv; + } ibcast; + struct { + struct { + int tree_type; + int k; + int is_non_blocking; + int topo_overhead; + int topo_diff_groups; + int topo_diff_switches; + int topo_same_switches; + } intra_tree; + struct { + int tree_type; + int k; + int is_non_blocking; + int chunk_size; + int recv_pre_posted; + } intra_pipelined_tree; + } bcast; + struct { + struct { + int k; + } intra_k_brucks; + struct { + int k; + bool single_phase_recv; + } intra_recexch_doubling; + struct { + int k; + bool single_phase_recv; + } intra_recexch_halving; + } allgather; + struct { + struct { + int k; + } intra_k_brucks; + } alltoall; + struct { + struct { + int k; + } intra_tsp_tree; + } igather; + struct { + struct { + int tree_type; + int k; + int chunk_size; + int buffer_per_child; + int topo_overhead; + int topo_diff_groups; + int topo_diff_switches; + int topo_same_switches; + } intra_tsp_tree; + struct { + int chunk_size; + int buffer_per_child; + } intra_tsp_ring; + } ireduce; + struct { + struct { + int k; + } intra_tsp_recexch; + } ireduce_scatter; + struct { + struct { + int k; + } intra_tsp_recexch; + } ireduce_scatter_block; + struct { + struct { + int k; + } intra_recursive_multiplying; + struct { + int tree_type; + int k; + int chunk_size; + int buffer_per_child; + int topo_overhead; + int topo_diff_groups; + int topo_diff_switches; + int topo_same_switches; + } intra_tree; + struct { + int k; + bool single_phase_recv; + } intra_recexch; + struct { + int k; + bool single_phase_recv; + } intra_k_reduce_scatter_allgather; + struct { + int ccl; + } intra_ccl; + } allreduce; + struct { + struct { + int k; + } intra_tsp_tree; + } iscatter; + } u; +} MPII_Csel_container_s; + int MPIR_Csel_create_from_file(const char *json_file, void *(*create_container) (struct json_object *), void **csel); int MPIR_Csel_create_from_buf(const char *json, @@ -209,4 +608,8 @@ int MPIR_Csel_free(void *csel); int MPIR_Csel_prune(void *root_csel, MPIR_Comm * comm_ptr, void **comm_csel); void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); +void *MPII_Create_container(struct json_object *obj); + +typedef int (*MPIR_Coll_algo_fn) (MPII_Csel_container_s * cnt, MPIR_Csel_coll_sig_s * coll_sig); + #endif /* MPIR_CSEL_H_INCLUDED */ diff --git a/src/mpi/coll/include/coll_impl.h b/src/mpi/coll/include/coll_impl.h index 75eaeab6e94..7d67479f0af 100644 --- a/src/mpi/coll/include/coll_impl.h +++ b/src/mpi/coll/include/coll_impl.h @@ -14,8 +14,6 @@ #include "../algorithms/treealgo/treealgo.h" #include "../algorithms/recexchalgo/recexchalgo.h" -#include "csel_container.h" - #define MPII_COLLECTIVE_FALLBACK_CHECK(rank, check, mpi_errno, ...) \ do { \ if ((check) == 0) { \ diff --git a/src/mpi/coll/include/csel_container.h b/src/mpi/coll/include/csel_container.h deleted file mode 100644 index fae6ea479e4..00000000000 --- a/src/mpi/coll/include/csel_container.h +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#ifndef MPIR_CSEL_CONTAINER_H_INCLUDED -#define MPIR_CSEL_CONTAINER_H_INCLUDED - -#include - -void *MPII_Create_container(struct json_object *obj); - -typedef enum { - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_k_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_inter_local_gather_remote_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_inter_remote_gather_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_multiplying, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_k_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ccl, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_inter_reduce_exchange_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_k_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_inter_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_recursive_doubling_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_ring_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_pipelined_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_inter_remote_send_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_local_gather_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_inter_sched_local_gather_remote_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_inter_sched_remote_gather_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_naive, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_inter_sched_remote_reduce_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_permuted_sendrecv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_k_dissemination, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_inter_sched_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_ring_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_recursive_doubling_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_ring_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_inter_sched_flat, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iexscan_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_long, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_short, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_reduce_scatter_gather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_inter_sched_local_reduce_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_inter_sched_remote_reduce_local_scatterv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_tsp_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_remote_send_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoall_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallw_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_reduce_scatter_gather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_inter_local_reduce_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_inter_remote_reduce_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_remote_send_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count, -} MPII_Csel_container_type_e; - -typedef struct { - MPII_Csel_container_type_e id; - - union { - struct { - struct { - int k; - } intra_tsp_brucks; - struct { - int k; - } intra_tsp_recexch_doubling; - struct { - int k; - } intra_tsp_recexch_halving; - } iallgather; - struct { - struct { - int k; - } intra_tsp_brucks; - struct { - int k; - } intra_tsp_recexch_doubling; - struct { - int k; - } intra_tsp_recexch_halving; - } iallgatherv; - struct { - struct { - int k; - } intra_tsp_recexch_single_buffer; - struct { - int k; - } intra_tsp_recexch_multiple_buffer; - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - } intra_tsp_tree; - struct { - int k; - } intra_tsp_recexch_reduce_scatter_recexch_allgatherv; - } iallreduce; - struct { - struct { - int k; - int buffer_per_phase; - } intra_tsp_brucks; - struct { - int batch_size; - int bblock; - } intra_tsp_scattered; - } ialltoall; - struct { - struct { - int batch_size; - int bblock; - } intra_tsp_scattered; - struct { - int bblock; - } intra_tsp_blocked; - } ialltoallv; - struct { - struct { - int bblock; - } intra_tsp_blocked; - } ialltoallw; - struct { - struct { - int k; - } intra_k_dissemination; - struct { - int k; - bool single_phase_recv; - } intra_recexch; - } barrier; - struct { - struct { - int k; - } intra_tsp_recexch; - struct { - int k; - } intra_tsp_k_dissemination; - } ibarrier; - struct { - struct { - int tree_type; - int k; - int chunk_size; - } intra_tsp_tree; - struct { - int chunk_size; - } intra_tsp_ring; - struct { - int scatterv_k; - int allgatherv_k; - } intra_tsp_scatterv_recexch_allgatherv; - struct { - int scatterv_k; - } intra_tsp_scatterv_ring_allgatherv; - } ibcast; - struct { - struct { - int tree_type; - int k; - int is_non_blocking; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tree; - struct { - int tree_type; - int k; - int is_non_blocking; - int chunk_size; - int recv_pre_posted; - } intra_pipelined_tree; - } bcast; - struct { - struct { - int k; - } intra_k_brucks; - struct { - int k; - bool single_phase_recv; - } intra_recexch_doubling; - struct { - int k; - bool single_phase_recv; - } intra_recexch_halving; - } allgather; - struct { - struct { - int k; - } intra_k_brucks; - } alltoall; - struct { - struct { - int k; - } intra_tsp_tree; - } igather; - struct { - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tsp_tree; - struct { - int chunk_size; - int buffer_per_child; - } intra_tsp_ring; - } ireduce; - struct { - struct { - int k; - } intra_tsp_recexch; - } ireduce_scatter; - struct { - struct { - int k; - } intra_tsp_recexch; - } ireduce_scatter_block; - struct { - struct { - int k; - } intra_recursive_multiplying; - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tree; - struct { - int k; - bool single_phase_recv; - } intra_recexch; - struct { - int k; - bool single_phase_recv; - } intra_k_reduce_scatter_allgather; - struct { - int ccl; - } intra_ccl; - } allreduce; - struct { - struct { - int k; - } intra_tsp_tree; - } iscatter; - } u; -} MPII_Csel_container_s; - -#endif /* MPIR_CSEL_CONTAINER_H_INCLUDED */ diff --git a/src/mpi/coll/src/csel_container.c b/src/mpi/coll/src/csel_container.c index 5b9c0aeb739..e966a9162f3 100644 --- a/src/mpi/coll/src/csel_container.c +++ b/src/mpi/coll/src/csel_container.c @@ -5,7 +5,6 @@ #include "mpiimpl.h" #include "coll_impl.h" -#include "csel_container.h" #include "mpl.h" static void parse_container_params(struct json_object *obj, MPII_Csel_container_s * cnt) From a86f92b223a6ecd064bdd6b740895d9561fbfa5e Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 21 Aug 2025 11:25:23 -0500 Subject: [PATCH 19/47] coll: add coll_composition.json and coll_selection.json Two main JSON tuning files. "coll_composition.json" selects compositional algorithms, and "coll_selection.json" selects basic algorithms. Basic algorithm does not call another collectives. --- maint/json_gen.sh | 3 + src/mpi/coll/coll_composition.json | 3 + src/mpi/coll/coll_selection.json | 809 +++++++++++++++++++++++++++++ src/mpi/coll/include/coll_impl.h | 2 + src/mpi/coll/src/coll_impl.c | 40 ++ 5 files changed, 857 insertions(+) create mode 100644 src/mpi/coll/coll_composition.json create mode 100644 src/mpi/coll/coll_selection.json diff --git a/maint/json_gen.sh b/maint/json_gen.sh index 5ed8dfc55f9..5e879614839 100755 --- a/maint/json_gen.sh +++ b/maint/json_gen.sh @@ -34,3 +34,6 @@ EOF # create specific json buffers create_json_buf maint/tuning/coll/mpir/generic.json MPII_coll_generic_json + +create_json_buf src/mpi/coll/coll_composition.json MPII_coll_composition_json +create_json_buf src/mpi/coll/coll_selection.json MPII_coll_selection_json diff --git a/src/mpi/coll/coll_composition.json b/src/mpi/coll/coll_composition.json new file mode 100644 index 00000000000..9b67d5f131e --- /dev/null +++ b/src/mpi/coll/coll_composition.json @@ -0,0 +1,3 @@ +{ + "algorithm=MPIR_Coll_auto":{} +} diff --git a/src/mpi/coll/coll_selection.json b/src/mpi/coll/coll_selection.json new file mode 100644 index 00000000000..a099c857f18 --- /dev/null +++ b/src/mpi/coll/coll_selection.json @@ -0,0 +1,809 @@ +{ + "collective=bcast": + { + "comm_type=intra": + { + "comm_size<8": + { + "algorithm=MPIR_Bcast_intra_binomial":{} + }, + "comm_size=pow2": + { + "avg_msg_size<=12288": + { + "algorithm=MPIR_Bcast_intra_binomial":{} + }, + "avg_msg_size<=524288": + { + "algorithm=MPIR_Bcast_intra_scatter_recursive_doubling_allgather":{} + }, + "avg_msg_size=any": + { + "algorithm=MPIR_Bcast_intra_scatter_ring_allgather":{} + } + }, + "comm_size=any": + { + "avg_msg_size<=12288": + { + "algorithm=MPIR_Bcast_intra_binomial":{} + }, + "avg_msg_size=any": + { + "algorithm=MPIR_Bcast_intra_scatter_ring_allgather":{} + } + } + }, + "comm_type=inter": + { + "algorithm=MPIR_Bcast_inter_remote_send_local_bcast":{} + } + }, + "collective=allreduce": + { + "comm_type=intra": + { + "avg_msg_size<=8": + { + "algorithm=MPIR_Allreduce_intra_recursive_doubling":{} + }, + "avg_msg_size=any": + { + "is_op_built_in=no": + { + "algorithm=MPIR_Allreduce_intra_recursive_doubling":{} + }, + "is_op_built_in=yes": + { + "count- Defines the location of tuning file. + - name : MPIR_CVAR_COLL_SELECTION_JSON_FILE + category : COLLECTIVE + type : string + default : "" + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_ALL_EQ + description : >- + Defines the location of tuning file that selects basic collective algorithms. + + - name : MPIR_CVAR_COLL_COMPOSITION_JSON_FILE + category : COLLECTIVE + type : string + default : "" + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_ALL_EQ + description : >- + Defines the location of tuning file that selects composition collective algorithms. + - name : MPIR_CVAR_HIERARCHY_DUMP category : COLLECTIVE type : boolean @@ -95,6 +115,10 @@ MPIR_Tree_type_t MPIR_Ireduce_tree_type = MPIR_TREE_TYPE_KARY; void *MPIR_Csel_root = NULL; const char *MPIR_Csel_source; +/* TODO: remove the old MPIR_Csel_root etc. */ +void *MPIR_Csel_composition = NULL; +void *MPIR_Csel_selection = NULL; + MPIR_Tree_type_t get_tree_type_from_string(const char *tree_str) { MPIR_Tree_type_t tree_type = MPIR_TREE_TYPE_KARY; @@ -141,6 +165,16 @@ int get_ccl_from_string(const char *ccl_str) return ccl; } +#define LOAD_CSEL_JSON(csel_var, cvar_name, builtin_str) \ + do { \ + if (!strcmp(cvar_name, "")) { \ + mpi_errno = MPIR_Csel_create_from_buf(builtin_str, MPII_Create_container, &csel_var); \ + } else { \ + mpi_errno = MPIR_Csel_create_from_file(cvar_name, MPII_Create_container, &csel_var); \ + } \ + MPIR_ERR_CHECK(mpi_errno); \ + } while (0) + int MPII_Coll_init(void) { int mpi_errno = MPI_SUCCESS; @@ -187,6 +221,12 @@ int MPII_Coll_init(void) MPIR_Csel_source = MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE; } MPIR_ERR_CHECK(mpi_errno); + /* TODO: remove the old MPIR_Csel_root etc. */ + + LOAD_CSEL_JSON(MPIR_Csel_composition, + MPIR_CVAR_COLL_COMPOSITION_JSON_FILE, MPII_coll_composition_json); + LOAD_CSEL_JSON(MPIR_Csel_selection, + MPIR_CVAR_COLL_SELECTION_JSON_FILE, MPII_coll_selection_json); fn_exit: return mpi_errno; From ac28fb313ada29cb0803e9ce8d254af0e5ffeb4d Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 21 Aug 2025 12:10:43 -0500 Subject: [PATCH 20/47] coll: add MPIR_Coll_auto and MPIR_Composition_auto Add the two auto functions that executes CSEL search. --- src/include/mpir_csel.h | 10 +++++++++- src/mpi/coll/src/coll_impl.c | 14 ++++++++++++++ src/mpi/coll/src/csel_container.c | 2 ++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/include/mpir_csel.h b/src/include/mpir_csel.h index 544e194ed0d..e946e6aab62 100644 --- a/src/include/mpir_csel.h +++ b/src/include/mpir_csel.h @@ -251,6 +251,9 @@ typedef enum { MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_allcomm_nb, MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear, MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb, + /* composition algorithms */ + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto, + /* end */ MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count, } MPII_Csel_container_type_e; @@ -610,6 +613,11 @@ void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); void *MPII_Create_container(struct json_object *obj); -typedef int (*MPIR_Coll_algo_fn) (MPII_Csel_container_s * cnt, MPIR_Csel_coll_sig_s * coll_sig); +typedef int (*MPIR_Coll_algo_fn) (MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); +void MPIR_Coll_algo_init(void); +/* NOTE: MPIR_Coll_auto is one of the composition container functions. However, + * MPIR_Coll_composition_auto is a gate function, thus does not take "cnt" parameter. */ +int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig); +int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); #endif /* MPIR_CSEL_H_INCLUDED */ diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index cb9a67dddac..7f0d24e90c6 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -398,3 +398,17 @@ void MPIR_Coll_host_buffer_persist_set(void *host_sendbuf, void *host_recvbuf, v MPIR_Datatype_add_ref_if_not_builtin(datatype); } } + +int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig) +{ + int mpi_errno = MPI_SUCCESS; + + return mpi_errno; +} + +int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig) +{ + int mpi_errno = MPI_SUCCESS; + + return mpi_errno; +} diff --git a/src/mpi/coll/src/csel_container.c b/src/mpi/coll/src/csel_container.c index e966a9162f3..99357d4cac9 100644 --- a/src/mpi/coll/src/csel_container.c +++ b/src/mpi/coll/src/csel_container.c @@ -840,6 +840,8 @@ void *MPII_Create_container(struct json_object *obj) cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear; else if (!strcmp(ckey, "algorithm=MPIR_Scatterv_allcomm_nb")) cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb; + else if (!strcmp(ckey, "algorithm=MPIR_Coll_auto")) + cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto; else { fprintf(stderr, "unrecognized key %s\n", key); MPIR_Assert(0); From b286fdc1692c097abd003e001f0f08e4ae89fc02 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sun, 24 Aug 2025 18:19:21 -0500 Subject: [PATCH 21/47] coll: add MPIR_Coll_nb A universal nb alglorithm for blocking collectives. --- src/mpi/coll/src/coll_impl.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 7f0d24e90c6..4dcf8c35763 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -412,3 +412,32 @@ int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig) return mpi_errno; } + +/* blocking collectives by calling its nonblocking forms */ +int MPIR_Coll_nb(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me) +{ + int mpi_errno = MPI_SUCCESS; + + /* Trick: blocking coll_type is even. Its nonblocking type is +1 */ + MPIR_Assert(coll_sig->coll_type % 2 == 0); + coll_sig->coll_type += 1; + + mpi_errno = MPIR_Coll_auto(coll_sig, NULL); + MPIR_ERR_CHECK(mpi_errno); + + MPIR_Request *req; + MPII_SCHED_START(coll_sig->sched_type, coll_sig->sched, coll_sig->comm_ptr, &req); + + mpi_errno = MPIC_Wait(req); + MPIR_ERR_CHECK(mpi_errno); + MPIR_Request_free(req); + + /* clean up coll_sig just in case */ + coll_sig->coll_type -= 1; + coll_sig->sched = NULL; + + fn_exit: + return mpi_errno; + fn_fail: + goto fn_exit; +} From 387fbe706b05d0126d74cfd92c421d7db29c697a Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 4 Sep 2025 10:44:00 -0500 Subject: [PATCH 22/47] coll: remove all allcomm_nb algorithms They are replaced by MPIR_Coll_nb. --- src/mpi/coll/allgather/Makefile.mk | 1 - src/mpi/coll/allgather/allgather_allcomm_nb.c | 29 ----------------- src/mpi/coll/allgatherv/Makefile.mk | 1 - .../coll/allgatherv/allgatherv_allcomm_nb.c | 29 ----------------- src/mpi/coll/allreduce/Makefile.mk | 1 - src/mpi/coll/allreduce/allreduce_allcomm_nb.c | 26 ---------------- src/mpi/coll/alltoall/Makefile.mk | 1 - src/mpi/coll/alltoall/alltoall_allcomm_nb.c | 29 ----------------- src/mpi/coll/alltoallv/Makefile.mk | 1 - src/mpi/coll/alltoallv/alltoallv_allcomm_nb.c | 30 ------------------ src/mpi/coll/alltoallw/Makefile.mk | 1 - src/mpi/coll/alltoallw/alltoallw_allcomm_nb.c | 30 ------------------ src/mpi/coll/barrier/Makefile.mk | 1 - src/mpi/coll/barrier/barrier_allcomm_nb.c | 25 --------------- src/mpi/coll/bcast/Makefile.mk | 1 - src/mpi/coll/bcast/bcast_allcomm_nb.c | 26 ---------------- src/mpi/coll/exscan/Makefile.mk | 1 - src/mpi/coll/exscan/exscan_allcomm_nb.c | 26 ---------------- src/mpi/coll/gather/Makefile.mk | 1 - src/mpi/coll/gather/gather_allcomm_nb.c | 28 ----------------- src/mpi/coll/gatherv/Makefile.mk | 1 - src/mpi/coll/gatherv/gatherv_allcomm_nb.c | 29 ----------------- src/mpi/coll/neighbor_allgather/Makefile.mk | 3 +- .../neighbor_allgather_allcomm_nb.c | 29 ----------------- src/mpi/coll/neighbor_allgatherv/Makefile.mk | 3 +- .../neighbor_allgatherv_allcomm_nb.c | 30 ------------------ src/mpi/coll/neighbor_alltoall/Makefile.mk | 3 +- .../neighbor_alltoall_allcomm_nb.c | 28 ----------------- src/mpi/coll/neighbor_alltoallv/Makefile.mk | 3 +- .../neighbor_alltoallv_allcomm_nb.c | 31 ------------------- src/mpi/coll/neighbor_alltoallw/Makefile.mk | 3 +- .../neighbor_alltoallw_allcomm_nb.c | 31 ------------------- src/mpi/coll/reduce/Makefile.mk | 1 - src/mpi/coll/reduce/reduce_allcomm_nb.c | 27 ---------------- src/mpi/coll/reduce_scatter/Makefile.mk | 1 - .../reduce_scatter_allcomm_nb.c | 28 ----------------- src/mpi/coll/reduce_scatter_block/Makefile.mk | 3 +- .../reduce_scatter_block_allcomm_nb.c | 28 ----------------- src/mpi/coll/scan/Makefile.mk | 1 - src/mpi/coll/scan/scan_allcomm_nb.c | 26 ---------------- src/mpi/coll/scatter/Makefile.mk | 1 - src/mpi/coll/scatter/scatter_allcomm_nb.c | 29 ----------------- src/mpi/coll/scatterv/Makefile.mk | 1 - src/mpi/coll/scatterv/scatterv_allcomm_nb.c | 30 ------------------ 44 files changed, 6 insertions(+), 652 deletions(-) delete mode 100644 src/mpi/coll/allgather/allgather_allcomm_nb.c delete mode 100644 src/mpi/coll/allgatherv/allgatherv_allcomm_nb.c delete mode 100644 src/mpi/coll/allreduce/allreduce_allcomm_nb.c delete mode 100644 src/mpi/coll/alltoall/alltoall_allcomm_nb.c delete mode 100644 src/mpi/coll/alltoallv/alltoallv_allcomm_nb.c delete mode 100644 src/mpi/coll/alltoallw/alltoallw_allcomm_nb.c delete mode 100644 src/mpi/coll/barrier/barrier_allcomm_nb.c delete mode 100644 src/mpi/coll/bcast/bcast_allcomm_nb.c delete mode 100644 src/mpi/coll/exscan/exscan_allcomm_nb.c delete mode 100644 src/mpi/coll/gather/gather_allcomm_nb.c delete mode 100644 src/mpi/coll/gatherv/gatherv_allcomm_nb.c delete mode 100644 src/mpi/coll/neighbor_allgather/neighbor_allgather_allcomm_nb.c delete mode 100644 src/mpi/coll/neighbor_allgatherv/neighbor_allgatherv_allcomm_nb.c delete mode 100644 src/mpi/coll/neighbor_alltoall/neighbor_alltoall_allcomm_nb.c delete mode 100644 src/mpi/coll/neighbor_alltoallv/neighbor_alltoallv_allcomm_nb.c delete mode 100644 src/mpi/coll/neighbor_alltoallw/neighbor_alltoallw_allcomm_nb.c delete mode 100644 src/mpi/coll/reduce/reduce_allcomm_nb.c delete mode 100644 src/mpi/coll/reduce_scatter/reduce_scatter_allcomm_nb.c delete mode 100644 src/mpi/coll/reduce_scatter_block/reduce_scatter_block_allcomm_nb.c delete mode 100644 src/mpi/coll/scan/scan_allcomm_nb.c delete mode 100644 src/mpi/coll/scatter/scatter_allcomm_nb.c delete mode 100644 src/mpi/coll/scatterv/scatterv_allcomm_nb.c diff --git a/src/mpi/coll/allgather/Makefile.mk b/src/mpi/coll/allgather/Makefile.mk index 0085e0965df..2ef57a17846 100644 --- a/src/mpi/coll/allgather/Makefile.mk +++ b/src/mpi/coll/allgather/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/allgather/allgather_allcomm_nb.c \ src/mpi/coll/allgather/allgather_intra_smp.c \ src/mpi/coll/allgather/allgather_intra_recursive_doubling.c \ src/mpi/coll/allgather/allgather_intra_brucks.c \ diff --git a/src/mpi/coll/allgather/allgather_allcomm_nb.c b/src/mpi/coll/allgather/allgather_allcomm_nb.c deleted file mode 100644 index 4e4f09ff6c7..00000000000 --- a/src/mpi/coll/allgather/allgather_allcomm_nb.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Allgather_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, - &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/allgatherv/Makefile.mk b/src/mpi/coll/allgatherv/Makefile.mk index cf96245a97c..d69ad2daa20 100644 --- a/src/mpi/coll/allgatherv/Makefile.mk +++ b/src/mpi/coll/allgatherv/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/allgatherv/allgatherv_allcomm_nb.c \ src/mpi/coll/allgatherv/allgatherv_intra_recursive_doubling.c \ src/mpi/coll/allgatherv/allgatherv_intra_brucks.c \ src/mpi/coll/allgatherv/allgatherv_intra_ring.c \ diff --git a/src/mpi/coll/allgatherv/allgatherv_allcomm_nb.c b/src/mpi/coll/allgatherv/allgatherv_allcomm_nb.c deleted file mode 100644 index fd2aa393b73..00000000000 --- a/src/mpi/coll/allgatherv/allgatherv_allcomm_nb.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Allgatherv_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, - comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/allreduce/Makefile.mk b/src/mpi/coll/allreduce/Makefile.mk index 5db420bd059..73ca4b901eb 100644 --- a/src/mpi/coll/allreduce/Makefile.mk +++ b/src/mpi/coll/allreduce/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/allreduce/allreduce_allcomm_nb.c \ src/mpi/coll/allreduce/allreduce_intra_recursive_doubling.c \ src/mpi/coll/allreduce/allreduce_intra_recursive_multiplying.c \ src/mpi/coll/allreduce/allreduce_intra_reduce_scatter_allgather.c \ diff --git a/src/mpi/coll/allreduce/allreduce_allcomm_nb.c b/src/mpi/coll/allreduce/allreduce_allcomm_nb.c deleted file mode 100644 index 04e4c366c2c..00000000000 --- a/src/mpi/coll/allreduce/allreduce_allcomm_nb.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Allreduce_allcomm_nb(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/alltoall/Makefile.mk b/src/mpi/coll/alltoall/Makefile.mk index 4b13038dcce..b06a301fcba 100644 --- a/src/mpi/coll/alltoall/Makefile.mk +++ b/src/mpi/coll/alltoall/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/alltoall/alltoall_allcomm_nb.c \ src/mpi/coll/alltoall/alltoall_intra_pairwise_sendrecv_replace.c \ src/mpi/coll/alltoall/alltoall_intra_brucks.c \ src/mpi/coll/alltoall/alltoall_intra_k_brucks.c \ diff --git a/src/mpi/coll/alltoall/alltoall_allcomm_nb.c b/src/mpi/coll/alltoall/alltoall_allcomm_nb.c deleted file mode 100644 index 22d4aaa9f5f..00000000000 --- a/src/mpi/coll/alltoall/alltoall_allcomm_nb.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Alltoall_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, - &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/alltoallv/Makefile.mk b/src/mpi/coll/alltoallv/Makefile.mk index 932b9507751..8e63bb01407 100644 --- a/src/mpi/coll/alltoallv/Makefile.mk +++ b/src/mpi/coll/alltoallv/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/alltoallv/alltoallv_allcomm_nb.c \ src/mpi/coll/alltoallv/alltoallv_intra_pairwise_sendrecv_replace.c \ src/mpi/coll/alltoallv/alltoallv_intra_scattered.c \ src/mpi/coll/alltoallv/alltoallv_inter_pairwise_exchange.c diff --git a/src/mpi/coll/alltoallv/alltoallv_allcomm_nb.c b/src/mpi/coll/alltoallv/alltoallv_allcomm_nb.c deleted file mode 100644 index 434e9c116b1..00000000000 --- a/src/mpi/coll/alltoallv/alltoallv_allcomm_nb.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Alltoallv_allcomm_nb(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * sdispls, MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, const MPI_Aint * rdispls, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ialltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, - recvtype, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/alltoallw/Makefile.mk b/src/mpi/coll/alltoallw/Makefile.mk index 7b9df5f239b..d1eec01c212 100644 --- a/src/mpi/coll/alltoallw/Makefile.mk +++ b/src/mpi/coll/alltoallw/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/alltoallw/alltoallw_allcomm_nb.c \ src/mpi/coll/alltoallw/alltoallw_intra_pairwise_sendrecv_replace.c \ src/mpi/coll/alltoallw/alltoallw_intra_scattered.c \ src/mpi/coll/alltoallw/alltoallw_inter_pairwise_exchange.c diff --git a/src/mpi/coll/alltoallw/alltoallw_allcomm_nb.c b/src/mpi/coll/alltoallw/alltoallw_allcomm_nb.c deleted file mode 100644 index 669f8517c0a..00000000000 --- a/src/mpi/coll/alltoallw/alltoallw_allcomm_nb.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Alltoallw_allcomm_nb(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], const MPI_Aint rdispls[], - const MPI_Datatype recvtypes[], MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ialltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, - recvtypes, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/barrier/Makefile.mk b/src/mpi/coll/barrier/Makefile.mk index 082d1f0fac4..c1e7aecbec3 100644 --- a/src/mpi/coll/barrier/Makefile.mk +++ b/src/mpi/coll/barrier/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/barrier/barrier_allcomm_nb.c \ src/mpi/coll/barrier/barrier_intra_k_dissemination.c \ src/mpi/coll/barrier/barrier_intra_recexch.c \ src/mpi/coll/barrier/barrier_intra_smp.c \ diff --git a/src/mpi/coll/barrier/barrier_allcomm_nb.c b/src/mpi/coll/barrier/barrier_allcomm_nb.c deleted file mode 100644 index f1f38075135..00000000000 --- a/src/mpi/coll/barrier/barrier_allcomm_nb.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Barrier_allcomm_nb(MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Ibarrier(comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/bcast/Makefile.mk b/src/mpi/coll/bcast/Makefile.mk index 32fb8bbe2be..989a3ea2bee 100644 --- a/src/mpi/coll/bcast/Makefile.mk +++ b/src/mpi/coll/bcast/Makefile.mk @@ -9,7 +9,6 @@ mpi_core_sources += \ src/mpi/coll/bcast/bcast_utils.c \ - src/mpi/coll/bcast/bcast_allcomm_nb.c \ src/mpi/coll/bcast/bcast_intra_binomial.c \ src/mpi/coll/bcast/bcast_intra_scatter_recursive_doubling_allgather.c \ src/mpi/coll/bcast/bcast_intra_scatter_ring_allgather.c \ diff --git a/src/mpi/coll/bcast/bcast_allcomm_nb.c b/src/mpi/coll/bcast/bcast_allcomm_nb.c deleted file mode 100644 index 6879d59886d..00000000000 --- a/src/mpi/coll/bcast/bcast_allcomm_nb.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Bcast_allcomm_nb(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Ibcast(buffer, count, datatype, root, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/exscan/Makefile.mk b/src/mpi/coll/exscan/Makefile.mk index a2ee7989ecf..5efd175be22 100644 --- a/src/mpi/coll/exscan/Makefile.mk +++ b/src/mpi/coll/exscan/Makefile.mk @@ -8,5 +8,4 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/exscan/exscan_allcomm_nb.c \ src/mpi/coll/exscan/exscan_intra_recursive_doubling.c diff --git a/src/mpi/coll/exscan/exscan_allcomm_nb.c b/src/mpi/coll/exscan/exscan_allcomm_nb.c deleted file mode 100644 index c015af7b101..00000000000 --- a/src/mpi/coll/exscan/exscan_allcomm_nb.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Exscan_allcomm_nb(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Iexscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/gather/Makefile.mk b/src/mpi/coll/gather/Makefile.mk index f6cd7ea1ac5..3846c425629 100644 --- a/src/mpi/coll/gather/Makefile.mk +++ b/src/mpi/coll/gather/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/gather/gather_allcomm_nb.c \ src/mpi/coll/gather/gather_intra_binomial.c \ src/mpi/coll/gather/gather_inter_linear.c \ src/mpi/coll/gather/gather_inter_local_gather_remote_send.c diff --git a/src/mpi/coll/gather/gather_allcomm_nb.c b/src/mpi/coll/gather/gather_allcomm_nb.c deleted file mode 100644 index 9817b26f5b7..00000000000 --- a/src/mpi/coll/gather/gather_allcomm_nb.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Gather_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, - &req_ptr); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/gatherv/Makefile.mk b/src/mpi/coll/gatherv/Makefile.mk index 6d7221fa917..bf2e48dddf3 100644 --- a/src/mpi/coll/gatherv/Makefile.mk +++ b/src/mpi/coll/gatherv/Makefile.mk @@ -8,5 +8,4 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/gatherv/gatherv_allcomm_nb.c \ src/mpi/coll/gatherv/gatherv_allcomm_linear.c diff --git a/src/mpi/coll/gatherv/gatherv_allcomm_nb.c b/src/mpi/coll/gatherv/gatherv_allcomm_nb.c deleted file mode 100644 index c8a90e5fa3f..00000000000 --- a/src/mpi/coll/gatherv/gatherv_allcomm_nb.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Gatherv_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, int root, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, - comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/neighbor_allgather/Makefile.mk b/src/mpi/coll/neighbor_allgather/Makefile.mk index 30c4f43957d..a8003424c82 100644 --- a/src/mpi/coll/neighbor_allgather/Makefile.mk +++ b/src/mpi/coll/neighbor_allgather/Makefile.mk @@ -7,5 +7,4 @@ # The code for the MPI operations (e.g., MPI_SUM) is not included in # mpi_sources -mpi_core_sources += \ - src/mpi/coll/neighbor_allgather/neighbor_allgather_allcomm_nb.c +# Currently the only algorithm for neighbor_allgather is MPIR_Coll_nb. diff --git a/src/mpi/coll/neighbor_allgather/neighbor_allgather_allcomm_nb.c b/src/mpi/coll/neighbor_allgather/neighbor_allgather_allcomm_nb.c deleted file mode 100644 index 30df508c89d..00000000000 --- a/src/mpi/coll/neighbor_allgather/neighbor_allgather_allcomm_nb.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Neighbor_allgather_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ineighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, - comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/neighbor_allgatherv/Makefile.mk b/src/mpi/coll/neighbor_allgatherv/Makefile.mk index b2b92fb8c4b..241af00feb2 100644 --- a/src/mpi/coll/neighbor_allgatherv/Makefile.mk +++ b/src/mpi/coll/neighbor_allgatherv/Makefile.mk @@ -7,5 +7,4 @@ # The code for the MPI operations (e.g., MPI_SUM) is not included in # mpi_sources -mpi_core_sources += \ - src/mpi/coll/neighbor_allgatherv/neighbor_allgatherv_allcomm_nb.c +# Currently the only algorithm for neighbor_allgatherv is MPIR_Coll_nb. diff --git a/src/mpi/coll/neighbor_allgatherv/neighbor_allgatherv_allcomm_nb.c b/src/mpi/coll/neighbor_allgatherv/neighbor_allgatherv_allcomm_nb.c deleted file mode 100644 index 471bd766f50..00000000000 --- a/src/mpi/coll/neighbor_allgatherv/neighbor_allgatherv_allcomm_nb.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Neighbor_allgatherv_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint recvcounts[], const MPI_Aint displs[], - MPI_Datatype recvtype, MPIR_Comm * comm_ptr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Ineighbor_allgatherv(sendbuf, sendcount, sendtype, - recvbuf, recvcounts, displs, recvtype, comm_ptr, - &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/neighbor_alltoall/Makefile.mk b/src/mpi/coll/neighbor_alltoall/Makefile.mk index adacd6c806f..a086126e18c 100644 --- a/src/mpi/coll/neighbor_alltoall/Makefile.mk +++ b/src/mpi/coll/neighbor_alltoall/Makefile.mk @@ -7,5 +7,4 @@ # The code for the MPI operations (e.g., MPI_SUM) is not included in # mpi_sources -mpi_core_sources += \ - src/mpi/coll/neighbor_alltoall/neighbor_alltoall_allcomm_nb.c +# Currently the only algorithm for neighbor_alltoall is MPIR_Coll_nb. diff --git a/src/mpi/coll/neighbor_alltoall/neighbor_alltoall_allcomm_nb.c b/src/mpi/coll/neighbor_alltoall/neighbor_alltoall_allcomm_nb.c deleted file mode 100644 index 4fda947004f..00000000000 --- a/src/mpi/coll/neighbor_alltoall/neighbor_alltoall_allcomm_nb.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Neighbor_alltoall_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, MPIR_Comm * comm_ptr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Ineighbor_alltoall(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/neighbor_alltoallv/Makefile.mk b/src/mpi/coll/neighbor_alltoallv/Makefile.mk index 2a70776f2cb..e50aeecbb8b 100644 --- a/src/mpi/coll/neighbor_alltoallv/Makefile.mk +++ b/src/mpi/coll/neighbor_alltoallv/Makefile.mk @@ -7,5 +7,4 @@ # The code for the MPI operations (e.g., MPI_SUM) is not included in # mpi_sources -mpi_core_sources += \ - src/mpi/coll/neighbor_alltoallv/neighbor_alltoallv_allcomm_nb.c +# Currently the only algorithm for neighbor_alltoallv is MPIR_Coll_nb. diff --git a/src/mpi/coll/neighbor_alltoallv/neighbor_alltoallv_allcomm_nb.c b/src/mpi/coll/neighbor_alltoallv/neighbor_alltoallv_allcomm_nb.c deleted file mode 100644 index af010b378c7..00000000000 --- a/src/mpi/coll/neighbor_alltoallv/neighbor_alltoallv_allcomm_nb.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Neighbor_alltoallv_allcomm_nb(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], MPI_Datatype sendtype, - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], MPI_Datatype recvtype, - MPIR_Comm * comm_ptr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ineighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, - rdispls, recvtype, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/neighbor_alltoallw/Makefile.mk b/src/mpi/coll/neighbor_alltoallw/Makefile.mk index addd9aa0751..463dad0fe31 100644 --- a/src/mpi/coll/neighbor_alltoallw/Makefile.mk +++ b/src/mpi/coll/neighbor_alltoallw/Makefile.mk @@ -7,5 +7,4 @@ # The code for the MPI operations (e.g., MPI_SUM) is not included in # mpi_sources -mpi_core_sources += \ - src/mpi/coll/neighbor_alltoallw/neighbor_alltoallw_allcomm_nb.c +# Currently the only algorithm for neighbor_alltoallw is MPIR_Coll_nb. diff --git a/src/mpi/coll/neighbor_alltoallw/neighbor_alltoallw_allcomm_nb.c b/src/mpi/coll/neighbor_alltoallw/neighbor_alltoallw_allcomm_nb.c deleted file mode 100644 index e98e9cd7bd6..00000000000 --- a/src/mpi/coll/neighbor_alltoallw/neighbor_alltoallw_allcomm_nb.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Neighbor_alltoallw_allcomm_nb(const void *sendbuf, const MPI_Aint sendcounts[], - const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], - void *recvbuf, const MPI_Aint recvcounts[], - const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], - MPIR_Comm * comm_ptr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, - rdispls, recvtypes, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/reduce/Makefile.mk b/src/mpi/coll/reduce/Makefile.mk index e686543e43a..6be89ca43d3 100644 --- a/src/mpi/coll/reduce/Makefile.mk +++ b/src/mpi/coll/reduce/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/reduce/reduce_allcomm_nb.c \ src/mpi/coll/reduce/reduce_intra_binomial.c \ src/mpi/coll/reduce/reduce_intra_reduce_scatter_gather.c \ src/mpi/coll/reduce/reduce_intra_smp.c \ diff --git a/src/mpi/coll/reduce/reduce_allcomm_nb.c b/src/mpi/coll/reduce/reduce_allcomm_nb.c deleted file mode 100644 index 5bfbdbb973e..00000000000 --- a/src/mpi/coll/reduce/reduce_allcomm_nb.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Reduce_allcomm_nb(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Ireduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/reduce_scatter/Makefile.mk b/src/mpi/coll/reduce_scatter/Makefile.mk index 96c9263b6a8..2bb1b69ef73 100644 --- a/src/mpi/coll/reduce_scatter/Makefile.mk +++ b/src/mpi/coll/reduce_scatter/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/reduce_scatter/reduce_scatter_allcomm_nb.c \ src/mpi/coll/reduce_scatter/reduce_scatter_intra_recursive_halving.c \ src/mpi/coll/reduce_scatter/reduce_scatter_intra_pairwise.c \ src/mpi/coll/reduce_scatter/reduce_scatter_intra_recursive_doubling.c \ diff --git a/src/mpi/coll/reduce_scatter/reduce_scatter_allcomm_nb.c b/src/mpi/coll/reduce_scatter/reduce_scatter_allcomm_nb.c deleted file mode 100644 index 674babdc285..00000000000 --- a/src/mpi/coll/reduce_scatter/reduce_scatter_allcomm_nb.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Reduce_scatter_allcomm_nb(const void *sendbuf, void *recvbuf, const MPI_Aint recvcounts[], - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/reduce_scatter_block/Makefile.mk b/src/mpi/coll/reduce_scatter_block/Makefile.mk index 86f19e28c64..4d493454707 100644 --- a/src/mpi/coll/reduce_scatter_block/Makefile.mk +++ b/src/mpi/coll/reduce_scatter_block/Makefile.mk @@ -7,8 +7,7 @@ # The code for the MPI operations (e.g., MPI_SUM) is not included in # mpi_sources -mpi_core_sources += \ - src/mpi/coll/reduce_scatter_block/reduce_scatter_block_allcomm_nb.c \ +mpi_core_sources += \ src/mpi/coll/reduce_scatter_block/reduce_scatter_block_intra_recursive_halving.c \ src/mpi/coll/reduce_scatter_block/reduce_scatter_block_intra_pairwise.c \ src/mpi/coll/reduce_scatter_block/reduce_scatter_block_intra_recursive_doubling.c \ diff --git a/src/mpi/coll/reduce_scatter_block/reduce_scatter_block_allcomm_nb.c b/src/mpi/coll/reduce_scatter_block/reduce_scatter_block_allcomm_nb.c deleted file mode 100644 index a64b7d949d8..00000000000 --- a/src/mpi/coll/reduce_scatter_block/reduce_scatter_block_allcomm_nb.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Reduce_scatter_block_allcomm_nb(const void *sendbuf, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm_ptr, - int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/scan/Makefile.mk b/src/mpi/coll/scan/Makefile.mk index bd3326bb471..d559778ed7d 100644 --- a/src/mpi/coll/scan/Makefile.mk +++ b/src/mpi/coll/scan/Makefile.mk @@ -8,6 +8,5 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/scan/scan_allcomm_nb.c \ src/mpi/coll/scan/scan_intra_recursive_doubling.c \ src/mpi/coll/scan/scan_intra_smp.c diff --git a/src/mpi/coll/scan/scan_allcomm_nb.c b/src/mpi/coll/scan/scan_allcomm_nb.c deleted file mode 100644 index 314335ffbdc..00000000000 --- a/src/mpi/coll/scan/scan_allcomm_nb.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Scan_allcomm_nb(const void *sendbuf, void *recvbuf, MPI_Aint count, MPI_Datatype datatype, - MPI_Op op, MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = MPIR_Iscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/scatter/Makefile.mk b/src/mpi/coll/scatter/Makefile.mk index 9ba53c1a05a..474b4adfb9b 100644 --- a/src/mpi/coll/scatter/Makefile.mk +++ b/src/mpi/coll/scatter/Makefile.mk @@ -8,7 +8,6 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/scatter/scatter_allcomm_nb.c \ src/mpi/coll/scatter/scatter_intra_binomial.c \ src/mpi/coll/scatter/scatter_inter_linear.c \ src/mpi/coll/scatter/scatter_inter_remote_send_local_scatter.c diff --git a/src/mpi/coll/scatter/scatter_allcomm_nb.c b/src/mpi/coll/scatter/scatter_allcomm_nb.c deleted file mode 100644 index 0ae69e48206..00000000000 --- a/src/mpi/coll/scatter/scatter_allcomm_nb.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Scatter_allcomm_nb(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, - void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, - &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} diff --git a/src/mpi/coll/scatterv/Makefile.mk b/src/mpi/coll/scatterv/Makefile.mk index 89ddc847dc6..06a3d1d7ff5 100644 --- a/src/mpi/coll/scatterv/Makefile.mk +++ b/src/mpi/coll/scatterv/Makefile.mk @@ -8,5 +8,4 @@ # mpi_sources mpi_core_sources += \ - src/mpi/coll/scatterv/scatterv_allcomm_nb.c \ src/mpi/coll/scatterv/scatterv_allcomm_linear.c diff --git a/src/mpi/coll/scatterv/scatterv_allcomm_nb.c b/src/mpi/coll/scatterv/scatterv_allcomm_nb.c deleted file mode 100644 index bcbb8424878..00000000000 --- a/src/mpi/coll/scatterv/scatterv_allcomm_nb.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" - -int MPIR_Scatterv_allcomm_nb(const void *sendbuf, const MPI_Aint * sendcounts, - const MPI_Aint * displs, MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, int root, - MPIR_Comm * comm_ptr, int coll_attr) -{ - int mpi_errno = MPI_SUCCESS; - MPIR_Request *req_ptr = NULL; - - /* just call the nonblocking version and wait on it */ - mpi_errno = - MPIR_Iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, - comm_ptr, &req_ptr); - MPIR_ERR_CHECK(mpi_errno); - - mpi_errno = MPIC_Wait(req_ptr); - MPIR_ERR_CHECK(mpi_errno); - MPIR_Request_free(req_ptr); - - fn_exit: - return mpi_errno; - fn_fail: - goto fn_exit; -} From b498b79b57455f08f5ee012bf20511a757f23204 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 21 Aug 2025 14:55:48 -0500 Subject: [PATCH 23/47] coll: add abstract coll algo interface and auto functions Define an abstract collective algorithm function interface that uses MPII_Csel_container_s and MPIR_Csel_coll_sig_s. Both structure will have mechanism for device layer to extend with its own fields. All collective algorithms will be populated in a global MPIR_Coll_algo_table. Device layer can fillin its device-specific entries in MPID_Init. MPIR_Coll_auto and MPIR_Coll_composition_auto serve as auto collective functions that runs Csel search then call the selected algorithm by looking up the entries from MPIR_Coll_algo_table. --- src/mpi/coll/src/coll_impl.c | 39 +++++++++++++++++++++++++++++++++++- src/mpi/errhan/errnames.txt | 3 +++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 4dcf8c35763..b5c0f876d95 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -119,6 +119,9 @@ const char *MPIR_Csel_source; void *MPIR_Csel_composition = NULL; void *MPIR_Csel_selection = NULL; +/* table of all collective algorithms */ +MPIR_Coll_algo_fn *MPIR_Coll_algo_table; + MPIR_Tree_type_t get_tree_type_from_string(const char *tree_str) { MPIR_Tree_type_t tree_type = MPIR_TREE_TYPE_KARY; @@ -228,6 +231,10 @@ int MPII_Coll_init(void) LOAD_CSEL_JSON(MPIR_Csel_selection, MPIR_CVAR_COLL_SELECTION_JSON_FILE, MPII_coll_selection_json); + MPIR_Coll_algo_table = MPL_malloc(MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count * + sizeof(MPIR_Coll_algo_fn), MPL_MEM_COLL); + MPIR_Coll_algo_init(); + fn_exit: return mpi_errno; fn_fail: @@ -247,6 +254,8 @@ int MPII_Coll_finalize(void) mpi_errno = MPIR_Csel_free(MPIR_Csel_root); MPIR_ERR_CHECK(mpi_errno); + MPL_free(MPIR_Coll_algo_table); + fn_exit: return mpi_errno; fn_fail: @@ -399,18 +408,46 @@ void MPIR_Coll_host_buffer_persist_set(void *host_sendbuf, void *host_recvbuf, v } } +void MPIR_Coll_algo_init(void) +{ + /* manual entries now, but we will replace it with autogen later */ + MPIR_Coll_algo_table[MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto] = MPIR_Coll_auto; +} + int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig) { int mpi_errno = MPI_SUCCESS; + /* TODO: need a mechanism in coll_sig so we can assert and prevent a dead recursion loop */ + + MPII_Csel_container_s *cnt = MPIR_Csel_search(MPIR_Csel_composition, coll_sig); + MPIR_ERR_CHKANDJUMP(!cnt, mpi_errno, MPI_ERR_OTHER, "**csel_noresult"); + + mpi_errno = MPIR_Coll_algo_table[cnt->id] (coll_sig, cnt); + MPIR_ERR_CHECK(mpi_errno); + + fn_exit: return mpi_errno; + fn_fail: + goto fn_exit; } -int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig) +int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me) { int mpi_errno = MPI_SUCCESS; + MPII_Csel_container_s *cnt = MPIR_Csel_search(MPIR_Csel_selection, coll_sig); + MPIR_ERR_CHKANDJUMP(!cnt, mpi_errno, MPI_ERR_OTHER, "**csel_noresult"); + + /* TODO: assert the selected algorithm is not a composition algorithm */ + + mpi_errno = MPIR_Coll_algo_table[cnt->id] (coll_sig, cnt); + MPIR_ERR_CHECK(mpi_errno); + + fn_exit: return mpi_errno; + fn_fail: + goto fn_exit; } /* blocking collectives by calling its nonblocking forms */ diff --git a/src/mpi/errhan/errnames.txt b/src/mpi/errhan/errnames.txt index 3783948d523..3c6e780b2c8 100644 --- a/src/mpi/errhan/errnames.txt +++ b/src/mpi/errhan/errnames.txt @@ -944,6 +944,9 @@ is too big (> MPIU_SHMW_GHND_SZ) **mpir_wingetattr:MPII_Win_get_attr failed **mpir_wingetattr %W %d %p %p:MPII_Win_get_attr(%W, win_keyval=%d, attribute_val=%p, flag=%p) failed +## Collective selection +**csel_noresult: Collective selection failed to find an algorithm + ## Gentran related error messages **nofence: invalid Gentran fence **nullvertex: Gentran vertex is NULL From c71941fad05e6c1d4300b57505aeecba8d728717 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 10:16:39 -0500 Subject: [PATCH 24/47] coll/gen_coll: dump algo functions Dump a wrapper function for each algorithm that takes (cont, coll_sig). Separately Declare algorithm prototypes. Separately Decleare sched_auto prototypes. --- maint/gen_coll.py | 167 +++++++++++++++++++++++++++++-- src/include/mpir_csel.h | 3 + src/mpi/coll/include/coll_impl.h | 31 ++++++ 3 files changed, 192 insertions(+), 9 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index 80a430d31f3..4c1d6d0ce88 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -25,9 +25,155 @@ def main(): dump_coll(a, "blocking") dump_coll(a, "nonblocking") dump_coll(a, "persistent") + # dump the container version of the algorithms + dump_algo_cnt_fns() + add_algo_prototypes() + for a in coll_names: + add_sched_auto_prototypes(a) + dump_c_file("src/mpi/coll/mpir_coll.c", G.out) dump_prototypes("src/mpi/coll/include/coll_algos.h", G.prototypes) +def dump_algo_cnt_fns(): + def get_coll_args(func, func_name): + args = [] + for p in func['parameters']: + if p['name'] == 'comm': + args.append("coll_sig->comm_ptr") + else: + args.append("coll_sig->u.%s.%s" % (func_name, p['name'])) + return ', '.join(args) + + def get_algo_args(func, func_name, algo): + args = get_coll_args(func, func_name) + if 'extra_params' in algo: + args += ", " + get_algo_extra_args(algo, "csel") + + if func_name.startswith('i'): + args += ", coll_sig->sched" + elif func_name.startswith('neighbor_'): + pass + else: + args += ", 0" # coll_attr + + return args + + def dump_algo_prep(func_name, algo): + if func_name.startswith('i'): + if algo['name'].startswith('tsp_'): + G.out.append("MPII_CSEL_CREATE_TSP_SCHED(coll_sig);") + else: + G.out.append("MPII_CSEL_CREATE_SCHED(coll_sig);") + + algo_funcname_hash = {} + for func_commkind in sorted(G.algos): + func_name, commkind = func_commkind.split("-") + if func_name.startswith('i'): + # use blocking func for base parameters + func = G.FUNCS["mpi_" + func_name[1:]] + else: + func = G.FUNCS["mpi_" + func_name] + for algo in G.algos[func_commkind]: + if "allcomm" in algo and commkind == "inter": + continue + algo_funcname = get_algo_funcname(func_name, commkind, algo) + if algo_funcname in algo_funcname_hash: + # skip alias algorithms + continue + else: + algo_funcname_hash[algo_funcname] = 1 + algo_args = get_algo_args(func, func_name, algo) + decl = "int %s_cnt(%s)" % (algo_funcname, "MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt") + add_prototype(decl) + dump_split(0, decl) + dump_open('{') + G.out.append("int mpi_errno = MPI_SUCCESS;") + G.out.append("") + dump_algo_prep(func_name, algo) + dump_split(1, "mpi_errno = %s(%s);" % (algo_funcname, algo_args)) + G.out.append("MPIR_ERR_CHECK(mpi_errno);") + G.out.append("") + G.out.append("fn_exit:") + G.out.append("return mpi_errno;") + G.out.append("fn_fail:") + G.out.append("goto fn_exit;") + dump_close('}') + G.out.append("") + +def add_algo_prototypes(): + def get_coll_params(func): + mapping = G.MAPS['SMALL_C_KIND_MAP'] + params = [] + for p in func['parameters']: + if p['name'] == 'comm': + params.append("MPIR_Comm * comm_ptr") + else: + s = get_C_param(p, func, mapping) + if p['kind'].startswith('POLY'): + s = re.sub(r'\bint ', 'MPI_Aint ', s) + params.append(s) + return ', '.join(params) + + def get_algo_params(func, func_name, algo): + params = get_coll_params(func) + if 'extra_params' in algo: + params += ", " + get_algo_extra_params(algo) + + if func_name.startswith('i'): + if algo['name'].startswith('tsp_'): + params += ", MPIR_TSP_sched_t s" + else: + params += ", MPIR_Sched_t s" + elif func_name.startswith('neighbor_'): + pass + else: + params += ", int coll_attr" # coll_attr + + return params + + for func_commkind in sorted(G.algos): + func_name, commkind = func_commkind.split("-") + if func_name.startswith('i'): + # use blocking func for base parameters + func = G.FUNCS["mpi_" + func_name[1:]] + else: + func = G.FUNCS["mpi_" + func_name] + + algo_funcname_hash = {} + for algo in G.algos[func_commkind]: + if "allcomm" in algo and commkind == "inter": + continue + algo_funcname = get_algo_funcname(func_name, commkind, algo) + if algo_funcname in algo_funcname_hash: + # skip alias algorithms + continue + else: + algo_funcname_hash[algo_funcname] = 1 + algo_params = get_algo_params(func, func_name, algo) + decl = "int %s(%s)" % (algo_funcname, algo_params) + add_prototype(decl) + +def add_sched_auto_prototypes(name): + def get_coll_params(func): + mapping = G.MAPS['SMALL_C_KIND_MAP'] + params = [] + for p in func['parameters']: + if p['name'] == 'comm': + params.append("MPIR_Comm * comm_ptr") + else: + s = get_C_param(p, func, mapping) + if p['kind'].startswith('POLY'): + s = re.sub(r'\bint ', 'MPI_Aint ', s) + params.append(s) + return ', '.join(params) + + func = G.FUNCS["mpi_" + name] + params = get_coll_params(func) + params += ", MPIR_Sched_t s" + add_prototype("int MPIR_I%s_intra_sched_auto(%s)" % (name, params)) + if not re.match(r'(scan|exscan|neighbor_)', name): + add_prototype("int MPIR_I%s_inter_sched_auto(%s)" % (name, params)) + def add_prototype(l): if RE.match(r'int\s+(\w+)\(', l): func_name = RE.m.group(1) @@ -110,7 +256,6 @@ def dump_cnt_algo_blocking(algo, commkind): algo_name = get_algo_name(algo) algo_args = get_algo_args(args, algo, "csel") algo_params = get_algo_params(params, algo) - add_prototype("int MPIR_%s_%s_%s(%s)" % (Name, commkind, algo_name, algo_params)) dump_split(3, "mpi_errno = MPIR_%s_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) dump_open("switch (cnt->id) {") @@ -178,19 +323,12 @@ def dump_allcomm_sched_auto(name): G.out.append("MPIR_Assert(cnt);") G.out.append("") - # -- add shced_auto prototypes - sched_auto_params = get_func_params(params, name, "sched_auto") - add_prototype("int MPIR_%s_intra_sched_auto(%s)" % (Name, sched_auto_params)) - if not re.match(r'(scan|exscan|neighbor_)', name): - add_prototype("int MPIR_%s_inter_sched_auto(%s)" % (Name, sched_auto_params)) - # -- switch def dump_cnt_algo_tsp(algo, commkind): G.out.append("MPII_GENTRAN_CREATE_SCHED_P();") algo_name = get_algo_name(algo) algo_args = get_algo_args(args, algo, "csel") algo_params = get_algo_params(params, algo) - add_prototype("int MPIR_TSP_%s_sched_%s_%s(%s)" % (Name, commkind, algo_name, algo_params)) dump_split(3, "mpi_errno = MPIR_TSP_%s_sched_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) def dump_cnt_algo_sched(algo, commkind): @@ -198,7 +336,6 @@ def dump_cnt_algo_sched(algo, commkind): algo_name = get_algo_name(algo) algo_args = get_algo_args(args, algo, "csel") algo_params = get_algo_params(params, algo) - add_prototype("int MPIR_%s_%s_%s(%s)" % (Name, commkind, algo_name, algo_params)) dump_split(3, "mpi_errno = MPIR_%s_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) dump_open("switch (cnt->id) {") @@ -578,6 +715,18 @@ def get_func_name(name, blocking_type): elif blocking_type == "persistent": return name + "_init" +def get_algo_funcname(func_name, commkind, algo): + if 'allcomm' in algo: + commkind = 'allcomm' + Name = func_name.capitalize() + if func_name.startswith('i'): + if algo['name'].startswith('tsp_'): + return "MPIR_TSP_%s_sched_%s_%s" % (Name, commkind, get_algo_name(algo)) + else: + return "MPIR_%s_%s_%s" % (Name, commkind, get_algo_name(algo)) + else: + return "MPIR_%s_%s_%s" % (Name, commkind, get_algo_name(algo)) + def get_params_and_args(func): mapping = G.MAPS['SMALL_C_KIND_MAP'] diff --git a/src/include/mpir_csel.h b/src/include/mpir_csel.h index e946e6aab62..30c9953263c 100644 --- a/src/include/mpir_csel.h +++ b/src/include/mpir_csel.h @@ -260,6 +260,9 @@ typedef enum { typedef struct { MPIR_Csel_coll_type_e coll_type; MPIR_Comm *comm_ptr; + void *sched; + enum MPIR_sched_type sched_type; + bool is_persistent; union { struct { diff --git a/src/mpi/coll/include/coll_impl.h b/src/mpi/coll/include/coll_impl.h index 0f64378397b..0f626c13f3b 100644 --- a/src/mpi/coll/include/coll_impl.h +++ b/src/mpi/coll/include/coll_impl.h @@ -84,6 +84,37 @@ int MPII_Coll_finalize(void); *sched_p = s; \ } while (0) +#define MPII_CSEL_CREATE_TSP_SCHED(coll_sig) \ + do { \ + if (coll_sig->sched == NULL) { \ + coll_sig->sched_type = MPIR_SCHED_GENTRAN; \ + MPIR_TSP_sched_create(&coll_sig->sched, coll_sig->is_persistent); \ + } else { \ + MPIR_Assert(coll_sig->sched_type = MPIR_SCHED_GENTRAN); \ + } \ + } while (0) + +#define MPII_CSEL_CREATE_SCHED(coll_sig) \ + do { \ + if (coll_sig->sched == NULL) { \ + MPIR_Sched_t s = MPIR_SCHED_NULL; \ + enum MPIR_Sched_kind sched_kind = MPIR_SCHED_KIND_REGULAR; \ + if (coll_sig->is_persistent) { \ + sched_kind = MPIR_SCHED_KIND_PERSISTENT; \ + } \ + mpi_errno = MPIR_Sched_create(&s, sched_kind); \ + MPIR_ERR_CHECK(mpi_errno); \ + int tag = -1; \ + mpi_errno = MPIR_Sched_next_tag(coll_sig->comm_ptr, &tag); \ + MPIR_ERR_CHECK(mpi_errno); \ + MPIR_Sched_set_tag(s, tag); \ + coll_sig->sched_type = MPIR_SCHED_NORMAL; \ + coll_sig->sched = s; \ + } else { \ + MPIR_Assert(coll_sig->sched_type = MPIR_SCHED_NORMAL); \ + } \ + } while (0) + #define MPII_SCHED_START(sched_type, sched, comm_ptr, request) \ do { \ if (sched_type == MPIR_SCHED_NORMAL) { \ From 613c8462e17f6a87ce3ece5374e9e7ad53d520c2 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 22 Aug 2025 12:27:29 -0500 Subject: [PATCH 25/47] coll/python: generate impl functions Generate collective implement functions that assemble coll_sig and call MPIR_Coll_composition_auto. --- maint/gen_coll.py | 631 ++++---------------------------- maint/local_python/binding_c.py | 33 -- 2 files changed, 65 insertions(+), 599 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index 4c1d6d0ce88..cdd3487ecfa 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -21,10 +21,12 @@ def main(): G.prototypes = [] G.out.append("#include \"mpiimpl.h\"") G.out.append("#include \"iallgatherv/iallgatherv.h\"") + + # dump impl functions for a in coll_names: - dump_coll(a, "blocking") - dump_coll(a, "nonblocking") - dump_coll(a, "persistent") + dump_coll_impl(a, "blocking") + dump_coll_impl(a, "nonblocking") + dump_coll_impl(a, "persistent") # dump the container version of the algorithms dump_algo_cnt_fns() add_algo_prototypes() @@ -200,225 +202,13 @@ def load_coll_algos(algo_txt): algo[key] = value return All -def dump_coll(name, blocking_type): - if blocking_type == "blocking": - dump_allcomm_auto_blocking(name) - dump_mpir_impl_blocking(name) - elif blocking_type == "nonblocking": - dump_allcomm_sched_auto(name) - dump_sched_impl(name) - dump_mpir_impl_nonblocking(name) - elif blocking_type == "persistent": - dump_mpir_impl_persistent(name) - else: - raise Exception("Wrong blocking_type") - dump_mpir(name, blocking_type) - -def dump_allcomm_auto_blocking(name): - """ MPIR_Xxx_allcomm_auto - use Csel selections """ - blocking_type = "blocking" +def dump_coll_impl(name, blocking_type): func = G.FUNCS["mpi_" + name] - params, args = get_params_and_args(func) - func_params = get_func_params(params, name, "blocking") - func_args = get_func_args(args, name, "blocking") + func_params = get_func_params(func, name, blocking_type) - # e.g. ibcast, Ibcast, IBCAST func_name = get_func_name(name, blocking_type) Name = func_name.capitalize() - NAME = func_name.upper() - - G.out.append("") - G.out.append("/* ---- %s ---- */" % func_name) - G.out.append("") - add_prototype("int MPIR_%s_allcomm_auto(%s)" % (Name, func_params)) - dump_split(0, "int MPIR_%s_allcomm_auto(%s)" % (Name, func_params)) - dump_open('{') - G.out.append("int mpi_errno = MPI_SUCCESS;") - G.out.append("") - - # -- Csel_search - dump_open("MPIR_Csel_coll_sig_s coll_sig = {") - G.out.append(".coll_type = MPIR_CSEL_COLL_TYPE__%s," % NAME) - G.out.append(".comm_ptr = comm_ptr,") - for p in func['parameters']: - if not re.match(r'comm$', p['name']): - G.out.append(".u.%s.%s = %s," % (func_name, p['name'], p['name'])) - dump_close("};") - G.out.append("") - G.out.append("MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, &coll_sig);") - G.out.append("MPIR_Assert(cnt);") - G.out.append("") - - # -- switch - def dump_cnt_algo_blocking(algo, commkind): - if "allcomm" in algo: - commkind = "allcomm" - algo_name = get_algo_name(algo) - algo_args = get_algo_args(args, algo, "csel") - algo_params = get_algo_params(params, algo) - dump_split(3, "mpi_errno = MPIR_%s_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) - - dump_open("switch (cnt->id) {") - for commkind in ("intra", "inter"): - if commkind == "inter" and re.match(r'(scan|exscan|neighbor_)', name): - continue - for algo in G.algos[func_name + "-" + commkind]: - if "allcomm" in algo: - if commkind == "intra": - commkind = "allcomm" - else: - # skip inter since it is covered already - continue - G.out.append("case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_%s_%s_%s:" % (Name, commkind, algo['name'])) - G.out.append("INDENT") - dump_cnt_algo_blocking(algo, commkind) - G.out.append("break;"); - G.out.append("DEDENT") - G.out.append("") - G.out.append("case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_%s_allcomm_nb:" % Name) - add_prototype("int MPIR_%s_allcomm_nb(%s);" % (Name, func_params)) - dump_split(2, " mpi_errno = MPIR_%s_allcomm_nb(%s);" % (Name, func_args)) - G.out.append(" break;"); - G.out.append("") - G.out.append("default:") - G.out.append(" MPIR_Assert(0);") - dump_close("}") - - # -- return - G.out.append("MPIR_ERR_CHECK(mpi_errno);") - dump_fn_exit() - dump_close("}") - -def dump_allcomm_sched_auto(name): - """ MPIR_Xxx_allcomm_sched_auto - use Csel selections """ - blocking_type = "nonblocking" - func = G.FUNCS["mpi_" + name] - params, args = get_params_and_args(func) - func_params = get_func_params(params, name, "allcomm_sched_auto") - - # e.g. ibcast, Ibcast, IBCAST - func_name = get_func_name(name, blocking_type) - Name = func_name.capitalize() - NAME = func_name.upper() - - G.out.append("") - G.out.append("/* ---- %s ---- */" % func_name) - G.out.append("") - add_prototype("int MPIR_%s_allcomm_sched_auto(%s)" % (Name, func_params)) - dump_split(0, "int MPIR_%s_allcomm_sched_auto(%s)" % (Name, func_params)) - dump_open('{') - G.out.append("int mpi_errno = MPI_SUCCESS;") - G.out.append("") - - # -- Csel_search - dump_open("MPIR_Csel_coll_sig_s coll_sig = {") - G.out.append(".coll_type = MPIR_CSEL_COLL_TYPE__%s," % NAME) - G.out.append(".comm_ptr = comm_ptr,") - for p in func['parameters']: - if not re.match(r'comm$', p['name']): - G.out.append(".u.%s.%s = %s," % (func_name, p['name'], p['name'])) - dump_close("};") - G.out.append("") - G.out.append("MPII_Csel_container_s *cnt = MPIR_Csel_search(comm_ptr->csel_comm, &coll_sig);") - G.out.append("MPIR_Assert(cnt);") - G.out.append("") - - # -- switch - def dump_cnt_algo_tsp(algo, commkind): - G.out.append("MPII_GENTRAN_CREATE_SCHED_P();") - algo_name = get_algo_name(algo) - algo_args = get_algo_args(args, algo, "csel") - algo_params = get_algo_params(params, algo) - dump_split(3, "mpi_errno = MPIR_TSP_%s_sched_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) - - def dump_cnt_algo_sched(algo, commkind): - G.out.append("MPII_SCHED_CREATE_SCHED_P();") - algo_name = get_algo_name(algo) - algo_args = get_algo_args(args, algo, "csel") - algo_params = get_algo_params(params, algo) - dump_split(3, "mpi_errno = MPIR_%s_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) - - dump_open("switch (cnt->id) {") - for commkind in ("intra", "inter"): - if commkind == "inter" and re.match(r'(scan|exscan|neighbor_)', name): - continue - for algo in G.algos[func_name + "-" + commkind]: - use_commkind = commkind - if "allcomm" in algo: - if commkind == "intra": - use_commkind = "allcomm" - else: - # skip inter since it is covered already - continue - G.out.append("case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_%s_%s_%s:" % (Name, use_commkind, algo['name'])) - G.out.append("INDENT") - if algo['name'].startswith('tsp_'): - dump_cnt_algo_tsp(algo, use_commkind) - else: - dump_cnt_algo_sched(algo, use_commkind) - G.out.append("break;"); - G.out.append("DEDENT") - G.out.append("") - G.out.append("default:") - G.out.append(" MPIR_Assert(0);") - dump_close("}") - - # -- return - G.out.append("MPIR_ERR_CHECK(mpi_errno);") - dump_fn_exit() - dump_close("}") - -def dump_mpir_impl_blocking(name): - """ MPIR_Xxx_impl - """ - blocking_type = "blocking" - func = G.FUNCS["mpi_" + name] - params, args = get_params_and_args(func) - func_params = get_func_params(params, name, "blocking") - func_args = get_func_args(args, name, "blocking") - func_name = get_func_name(name, blocking_type) - Name = func_name.capitalize() - NAME = func_name.upper() - - need_fallback = False - - def dump_algo(algo, commkind): - if "allcomm" in algo: - commkind = "allcomm" - algo_name = get_algo_name(algo) - algo_args = get_algo_args(args, algo, "cvar") - dump_split(3, "mpi_errno = MPIR_%s_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) - - def dump_cases(commkind): - nonlocal need_fallback - CVAR_PREFIX = "MPIR_CVAR_%s_%s_ALGORITHM" % (NAME, commkind.upper()) - for algo in G.algos[func_name + '-' + commkind]: - if algo['name'] != "auto" and algo['name'] != "nb": - G.out.append("case %s_%s:" % (CVAR_PREFIX, algo['name'])) - G.out.append("INDENT") - if 'restrictions' in algo: - dump_fallback(algo) - need_fallback = True - dump_algo(algo, commkind) - G.out.append("break;"); - G.out.append("DEDENT") - G.out.append("case %s_nb:" % CVAR_PREFIX) - dump_split(3, " mpi_errno = MPIR_%s_allcomm_nb(%s);" % (Name, func_args)) - G.out.append(" break;"); - G.out.append("case %s_auto:" % CVAR_PREFIX) - if commkind == "intra": - G.out.append("#ifdef MPIR_%s_fallback" % (Name)) - G.out.append(" if (!comm_ptr->csel_comm) {") - G.out.append(" mpi_errno = MPIR_%s_fallback(%s);" % (Name, func_args)) - G.out.append(" break;") - G.out.append(" }") - G.out.append("#endif"); - dump_split(3, " mpi_errno = MPIR_%s_allcomm_auto(%s);" % (Name, func_args)) - G.out.append(" break;"); - G.out.append("default:") - G.out.append(" MPIR_Assert(0);") - - # ---------------- G.out.append("") add_prototype("int MPIR_%s_impl(%s)" % (Name, func_params)) dump_split(0, "int MPIR_%s_impl(%s)" % (Name, func_params)) @@ -426,285 +216,55 @@ def dump_cases(commkind): G.out.append("int mpi_errno = MPI_SUCCESS;") G.out.append("") - dump_open("if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {") - dump_open("switch (MPIR_CVAR_%s_INTRA_ALGORITHM) {" % NAME) - dump_cases("intra") - dump_close("}") - dump_else() - if re.match(r'(scan|exscan|neighbor_)', name): - G.out.append("MPIR_Assert_error(\"Only intra-communicator allowed\");") + # Initialize coll_sig + G.out.append("MPIR_Csel_coll_sig_s coll_sig;") + if blocking_type == "blocking": + G.out.append("coll_sig.coll_type = MPIR_CSEL_COLL_TYPE__%s;" % name.upper()) else: - dump_open("switch (MPIR_CVAR_%s_INTER_ALGORITHM) {" % NAME) - dump_cases("inter") - dump_close("}") - dump_close("}") - - G.out.append("MPIR_ERR_CHECK(mpi_errno);") - if need_fallback: - G.out.append("goto fn_exit;") - G.out.append("") - G.out.append("fallback:") - dump_split(1, "mpi_errno = MPIR_%s_allcomm_auto(%s);" % (Name, func_args)) - G.out.append("") - dump_fn_exit() - dump_close("}") - -def dump_sched_impl(name): - """ MPIR_Xxx_impl - """ - blocking_type = "nonblocking" - func = G.FUNCS["mpi_" + name] - params, args = get_params_and_args(func) - func_params = get_func_params(params, name, "sched_impl") - - func_name = get_func_name(name, blocking_type) - Name = func_name.capitalize() - NAME = func_name.upper() - - need_fallback = False - - def dump_algo_tsp(algo, commkind): - G.out.append("MPII_GENTRAN_CREATE_SCHED_P();") - algo_name = get_algo_name(algo) - algo_args = get_algo_args(args, algo, "cvar") - if "allcomm" in algo: - commkind = "allcomm" - dump_split(3, "mpi_errno = MPIR_TSP_%s_sched_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) - - def dump_algo_sched(algo, commkind): - G.out.append("MPII_SCHED_CREATE_SCHED_P();") - algo_name = get_algo_name(algo) - algo_args = get_algo_args(args, algo, "cvar") - if "allcomm" in algo: - commkind = "allcomm" - dump_split(3, "mpi_errno = MPIR_%s_%s_%s(%s);" % (Name, commkind, algo_name, algo_args)) - - def dump_cases(commkind): - nonlocal need_fallback - CVAR_PREFIX = "MPIR_CVAR_%s_%s_ALGORITHM" % (NAME, commkind.upper()) - for algo in G.algos[func_name + '-' + commkind]: - if algo['name'] != "auto" and algo['name'] != "nb": - G.out.append("case %s_%s:" % (CVAR_PREFIX, algo['name'])) - G.out.append("INDENT") - if 'restrictions' in algo: - dump_fallback(algo) - need_fallback = True - if algo['name'].startswith('tsp_'): - dump_algo_tsp(algo, commkind) - else: - dump_algo_sched(algo, commkind) - G.out.append("break;"); - G.out.append("DEDENT") - G.out.append("case %s_auto:" % CVAR_PREFIX) - func_args = get_func_args(args, name, "allcomm_sched_auto") - dump_split(3, " mpi_errno = MPIR_%s_allcomm_sched_auto(%s);" % (Name, func_args)) - G.out.append(" break;"); - G.out.append("default:") - G.out.append(" MPIR_Assert(0);") - - # ---------------- - G.out.append("") - add_prototype("int MPIR_%s_sched_impl(%s)" % (Name, func_params)) - dump_split(0, "int MPIR_%s_sched_impl(%s)" % (Name, func_params)) - dump_open('{') - G.out.append("int mpi_errno = MPI_SUCCESS;") - G.out.append("") - - dump_open("if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {") - dump_open("switch (MPIR_CVAR_%s_INTRA_ALGORITHM) {" % NAME) - dump_cases("intra") - dump_close("}") - dump_else() - if re.match(r'(scan|exscan|neighbor_)', name): - G.out.append("MPIR_Assert_error(\"Only intra-communicator allowed\");") + # nonblocking and persistent + G.out.append("coll_sig.coll_type = MPIR_CSEL_COLL_TYPE__I%s;" % name.upper()) + G.out.append("coll_sig.comm_ptr = comm_ptr;") + if blocking_type == "persistent": + G.out.append("coll_sig.is_persistent = true;") else: - dump_open("switch (MPIR_CVAR_%s_INTER_ALGORITHM) {" % NAME) - dump_cases("inter") - dump_close("}") - dump_close("}") - - G.out.append("MPIR_ERR_CHECK(mpi_errno);") - if need_fallback: - G.out.append("goto fn_exit;") - G.out.append("") - G.out.append("fallback:") - func_args = get_func_args(args, name, "allcomm_sched_auto") - dump_split(1, "mpi_errno = MPIR_%s_allcomm_sched_auto(%s);" % (Name, func_args)) - G.out.append("") - dump_fn_exit() - dump_close("}") + G.out.append("coll_sig.is_persistent = false;") + G.out.append("coll_sig.sched = NULL;") -def dump_mpir_impl_nonblocking(name): - blocking_type = "nonblocking" - func = G.FUNCS["mpi_" + name] - params, args = get_params_and_args(func) - func_params = get_func_params(params, name, "nonblocking") - - func_name = get_func_name(name, blocking_type) - Name = func_name.capitalize() - NAME = func_name.upper() + for p in func['parameters']: + if p['name'] == 'comm': + pass + else: + G.out.append("coll_sig.u.%s.%s = %s;" % (name, p['name'], p['name'])) + # Call csel G.out.append("") - add_prototype("int MPIR_%s_impl(%s)" % (Name, func_params)) - dump_split(0, "int MPIR_%s_impl(%s)" % (Name, func_params)) - dump_open('{') - G.out.append("int mpi_errno = MPI_SUCCESS;") - G.out.append("enum MPIR_sched_type sched_type;") - G.out.append("void *sched;") - G.out.append("") - G.out.append("*request = NULL;") - func_args = get_func_args(args, name, "mpir_impl_nonblocking") - dump_split(1, "mpi_errno = MPIR_%s_sched_impl(%s);" % (Name, func_args)) + G.out.append("mpi_errno = MPIR_Coll_composition_auto(&coll_sig);") G.out.append("MPIR_ERR_CHECK(mpi_errno);") - G.out.append("MPII_SCHED_START(sched_type, sched, comm_ptr, request);") G.out.append("") - G.out.append("fn_exit:") - G.out.append("return mpi_errno;") - G.out.append("fn_fail:") - G.out.append("goto fn_exit;") - dump_close("}") - -def dump_mpir_impl_persistent(name): - blocking_type = "persistent" - func = G.FUNCS["mpi_" + name] - params, args = get_params_and_args(func) - func_params = get_func_params(params, name, "persistent") - func_name = get_func_name(name, blocking_type) - Name = func_name.capitalize() - NAME = func_name.upper() + # Set request if nonblocking or persistent + if blocking_type == "blocking": + pass + elif blocking_type == "nonblocking": + G.out.append("MPII_SCHED_START(coll_sig.sched_type, coll_sig.sched, comm_ptr, request);") + G.out.append("") + elif blocking_type == "persistent": + G.out.append("MPIR_Request *req = MPIR_Request_create(MPIR_REQUEST_KIND__PREQUEST_COLL);") + G.out.append("MPIR_ERR_CHKANDJUMP(!req, mpi_errno, MPI_ERR_OTHER, \"**nomem\");") + G.out.append("MPIR_Comm_add_ref(comm_ptr);") + G.out.append("MPIR_Comm_save_inactive_request(comm_ptr, req);") + G.out.append("req->u.persist_coll.sched_type = coll_sig.sched_type;") + G.out.append("req->u.persist_coll.sched = coll_sig.sched;") + G.out.append("*request = req;") + G.out.append("") + else: + raise Exception("Wrong blocking_type") - G.out.append("") - add_prototype("int MPIR_%s_impl(%s)" % (Name, func_params)) - dump_split(0, "int MPIR_%s_impl(%s)" % (Name, func_params)) - dump_open('{') - G.out.append("int mpi_errno = MPI_SUCCESS;") - G.out.append("") - G.out.append("MPIR_Request *req = MPIR_Request_create(MPIR_REQUEST_KIND__PREQUEST_COLL);") - G.out.append("MPIR_ERR_CHKANDJUMP(!req, mpi_errno, MPI_ERR_OTHER, \"**nomem\");") - G.out.append("MPIR_Comm_add_ref(comm_ptr);") - G.out.append("req->comm = comm_ptr;") - G.out.append("MPIR_Comm_save_inactive_request(comm_ptr, req);") - G.out.append("req->u.persist_coll.sched_type = MPIR_SCHED_INVALID;") - G.out.append("req->u.persist_coll.real_request = NULL;") - - func_args = get_func_args(args, name, "mpir_impl_persistent") - dump_split(1, "mpi_errno = MPIR_I%s_sched_impl(%s);" % (name, func_args)) - G.out.append("MPIR_ERR_CHECK(mpi_errno);") - G.out.append("") - G.out.append("*request = req;") - G.out.append("") G.out.append("fn_exit:") G.out.append("return mpi_errno;") G.out.append("fn_fail:") G.out.append("goto fn_exit;") - dump_close("}") - -def dump_mpir(name, blocking_type): - """ MPIR_Xxx - """ - func = G.FUNCS["mpi_" + name] - params, args = get_params_and_args(func) - func_params = get_func_params(params, name, blocking_type) - func_args = get_func_args(args, name, blocking_type) - - func_name = get_func_name(name, blocking_type) - Name = func_name.capitalize() - NAME = func_name.upper() - - def dump_buffer_swap_pre(): - G.out.append("void *in_recvbuf = recvbuf;") - G.out.append("void *host_sendbuf = NULL;") - G.out.append("void *host_recvbuf = NULL;") - G.out.append("") - if name == "reduce_scatter": - G.out.append("MPI_Aint count = 0;") - G.out.append("for (int i = 0; i < MPIR_Comm_size(comm_ptr); i++) {") - G.out.append(" count += recvcounts[i];") - G.out.append("}") - G.out.append("") - elif name == "reduce_scatter_block": - G.out.append("MPI_Aint count = MPIR_Comm_size(comm_ptr) * recvcount;") - - if name == "reduce": - use_recvbuf = "(comm_ptr->rank == root || root == MPI_ROOT) ? recvbuf : NULL" - else: - use_recvbuf = "recvbuf" - - G.out.append("if(!MPIR_Typerep_reduce_is_supported(op, count, datatype))") - G.out.append(" MPIR_Coll_host_buffer_alloc(sendbuf, %s, count, datatype, &host_sendbuf, &host_recvbuf);" % use_recvbuf) - G.out.append("") - - for buf in ("sendbuf", "recvbuf"): - G.out.append("if (host_%s) {" % buf); - G.out.append(" %s = host_%s;" % (buf, buf)); - G.out.append("}") - G.out.append("") - - def dump_buffer_swap_post(): - count = "count" - if name == "reduce_scatter": - count = "recvcounts[comm_ptr->rank]" - elif name == "reduce_scatter_block": - count = "recvcount" - - if blocking_type == "blocking": - G.out.append("if (host_recvbuf) {") - G.out.append(" recvbuf = in_recvbuf;") - G.out.append(" MPIR_Localcopy(host_recvbuf, count, datatype, recvbuf, count, datatype);") - G.out.append("}") - G.out.append("MPIR_Coll_host_buffer_free(host_sendbuf, host_recvbuf);") - elif blocking_type == "nonblocking": - G.out.append("MPIR_Coll_host_buffer_swap_back(host_sendbuf, host_recvbuf, in_recvbuf, %s, datatype, *request);" % count) - elif blocking_type == "persistent": - G.out.append("MPIR_Coll_host_buffer_persist_set(host_sendbuf, host_recvbuf, in_recvbuf, %s, datatype, *request);" % count) - - G.out.append("") - add_prototype("int MPIR_%s(%s)" % (Name, func_params)) - dump_split(0, "int MPIR_%s(%s)" % (Name, func_params)) - dump_open('{') - G.out.append("int mpi_errno = MPI_SUCCESS;") - G.out.append("") - - need_buffer_swap = False - if re.match(r'(reduce|allreduce|scan|exscan|reduce_scatter)', name): - need_buffer_swap = True - if need_buffer_swap: - dump_buffer_swap_pre() - - dump_split(1, "mpi_errno = MPIR_%s_impl(%s);" % (Name, func_args)) - if need_buffer_swap: - dump_buffer_swap_post() - G.out.append("") - G.out.append("return mpi_errno;") - dump_close("}") - -# ---- -def dump_fallback(algo): - cond_list = [] - for a in algo['restrictions'].replace(" ","").split(','): - if a == "inplace": - cond_list.append("sendbuf == MPI_IN_PLACE") - elif a == "noinplace": - cond_list.append("sendbuf != MPI_IN_PLACE") - elif a == "power-of-two": - cond_list.append("MPL_is_pof2(comm_ptr->local_size)") - elif a == "size-ge-pof2": - cond_list.append("count >= MPL_pof2(comm_ptr->local_size)") - elif a == "commutative": - cond_list.append("MPIR_Op_is_commutative(op)") - elif a== "builtin-op": - cond_list.append("HANDLE_IS_BUILTIN(op)") - elif a == "parent-comm": - cond_list.append("MPIR_Comm_is_parent_comm(comm_ptr)") - elif a == "node-consecutive": - cond_list.append("MPII_Comm_is_node_consecutive(comm_ptr)") - elif a == "displs-ordered": - # assume it's allgatherv - cond_list.append("MPII_Iallgatherv_is_displs_ordered(comm_ptr->local_size, recvcounts, displs)") - else: - raise Exception("Unsupported restrictions - %s" % a) - (func_name, commkind) = algo['func-commkind'].split('-') - G.out.append("MPII_COLLECTIVE_FALLBACK_CHECK(comm_ptr->rank, %s, mpi_errno," % ' && '.join(cond_list)) - G.out.append(" \"%s %s cannot be applied.\\n\");" % (func_name.capitalize(), algo['name'])) + dump_close('}') # ---- def get_func_name(name, blocking_type): @@ -727,24 +287,6 @@ def get_algo_funcname(func_name, commkind, algo): else: return "MPIR_%s_%s_%s" % (Name, commkind, get_algo_name(algo)) -def get_params_and_args(func): - mapping = G.MAPS['SMALL_C_KIND_MAP'] - - params = [] - args = [] - for p in func['parameters']: - if p['name'] == 'comm': - params.append("MPIR_Comm * comm_ptr") - args.append("comm_ptr") - else: - s = get_C_param(p, func, mapping) - if p['kind'].startswith('POLY'): - s = re.sub(r'\bint ', 'MPI_Aint ', s) - params.append(s) - args.append(p['name']) - - return (', '.join(params), ', '.join(args)) - def get_algo_extra_args(algo, kind): (func_name, commkind) = algo['func-commkind'].split('-') extra_params = algo['extra_params'].replace(' ', '').split(',') @@ -788,34 +330,6 @@ def get_algo_extra_params(algo): return ', '.join(out_list) # additional wrappers -def get_algo_args(args, algo, kind): - algo_args = args - if 'extra_params' in algo: - algo_args += ", " + get_algo_extra_args(algo, kind) - - if algo['name'].startswith('tsp_'): - algo_args += ", *sched_p" - elif algo['func-commkind'].startswith('i'): - algo_args += ", *sched_p" - elif not algo['func-commkind'].startswith('neighbor_'): - algo_args += ", coll_attr" - - return algo_args - -def get_algo_params(params, algo): - algo_params = params - if 'extra_params' in algo: - algo_params += ", " + get_algo_extra_params(algo) - - if algo['name'].startswith('tsp_'): - algo_params += ", MPIR_TSP_sched_t sched" - elif algo['func-commkind'].startswith('i'): - algo_params += ", MPIR_Sched_t s" - elif not algo['func-commkind'].startswith('neighbor_'): - algo_params += ", int coll_attr" - - return algo_params - def get_algo_name(algo): # the name used in algo function name if "func_name" in algo: @@ -825,45 +339,30 @@ def get_algo_name(algo): else: return algo['name'] -def get_func_params(params, name, kind): - func_params = params - if kind == "blocking": - if not name.startswith('neighbor_'): - func_params += ", int coll_attr" - elif kind == "nonblocking": - func_params += ", MPIR_Request ** request" - elif kind == "persistent": - func_params += ", MPIR_Info * info_ptr, MPIR_Request ** request" - elif kind == "sched_auto": - func_params += ", MPIR_Sched_t s" - elif kind == "allcomm_sched_auto": - func_params += ", bool is_persistent, void **sched_p, enum MPIR_sched_type *sched_type_p" - elif kind == "sched_impl": - func_params += ", bool is_persistent, void **sched_p, enum MPIR_sched_type *sched_type_p" - else: - raise Exception("get_func_params - unexpected kind = %s" % kind) - - return func_params - -def get_func_args(args, name, kind): - func_args = args - if kind == "blocking": - if not name.startswith('neighbor_'): - func_args += ", coll_attr" - elif kind == "nonblocking": - func_args += ", request" - elif kind == "persistent": - func_args += ", info_ptr, request" - elif kind == "allcomm_sched_auto": - func_args += ", is_persistent, sched_p, sched_type_p" - elif kind == "mpir_impl_nonblocking": - func_args += ", false, &sched, &sched_type" - elif kind == "mpir_impl_persistent": - func_args += ", true, &req->u.persist_coll.sched, &req->u.persist_coll.sched_type" +def get_func_params(func, name, blocking_type): + mapping = G.MAPS['SMALL_C_KIND_MAP'] + + params = [] + for p in func['parameters']: + if p['name'] == 'comm': + params.append("MPIR_Comm * comm_ptr") + else: + s = get_C_param(p, func, mapping) + if p['kind'].startswith('POLY'): + s = re.sub(r'\bint ', 'MPI_Aint ', s) + params.append(s) + + if blocking_type == "blocking": + pass + elif blocking_type == "nonblocking": + params.append("MPIR_Request ** request") + elif blocking_type == "persistent": + params.append("MPIR_Info * info_ptr") + params.append("MPIR_Request ** request") else: - raise Exception("get_func_args - unexpected kind = %s" % kind) + raise Exception("get_func_params - unexpected blocking_type = %s" % blocking_type) - return func_args + return ', '.join(params) # ---------------------- def dump_c_file(f, lines): diff --git a/maint/local_python/binding_c.py b/maint/local_python/binding_c.py index 1faf8e311b8..5b89d7814eb 100644 --- a/maint/local_python/binding_c.py +++ b/maint/local_python/binding_c.py @@ -1782,17 +1782,9 @@ def push_impl_decl(func, impl_name=None): if func['_impl_param_list']: params = ', '.join(func['_impl_param_list']) - if func['dir'] == 'coll': - # block collective use an extra coll_attr - if not RE.match(r'MPI_(I.*|Neighbor.*|.*_init)$', func['name']): - params = params + ", int coll_attr" else: params="void" - if func['dir'] == 'coll': - # collective also dump MPIR_Xxx(...) - mpir_name = re.sub(r'^MPIX?_', 'MPIR_', func['name']) - G.impl_declares.append("int %s(%s);" % (mpir_name, params)) # dump MPIR_Xxx_impl(...) if func['dir'] == 'io': G.io_impl_declares.append("int %s(%s);" % (impl_name, params)) @@ -1821,29 +1813,6 @@ def dump_CHECKENUM(var, errname, t, type="ENUM"): G.out.append("goto fn_fail;") dump_if_close() -def dump_body_coll(func): - # collectives call MPIR_Xxx - mpir_name = re.sub(r'^MPIX?_', 'MPIR_', func['name']) - - args = ", ".join(func['_impl_arg_list']) - - if RE.match(r'MPI_(I.*|.*_init)$', func['name'], re.IGNORECASE): - # non-blocking collectives - G.out.append("MPIR_Request *request_ptr = NULL;") - dump_line_with_break("mpi_errno = %s(%s);" % (mpir_name, args)) - dump_error_check("") - G.out.append("if (!request_ptr) {") - G.out.append(" request_ptr = MPIR_Request_create_complete(MPIR_REQUEST_KIND__COLL);") - G.out.append("}") - G.out.append("*request = request_ptr->handle;") - elif RE.match(r'mpi_neighbor_', func['name'], re.IGNORECASE): - dump_line_with_break("mpi_errno = %s(%s);" % (mpir_name, args)) - dump_error_check("") - else: - # blocking collectives - dump_line_with_break("mpi_errno = %s(%s, 0);" % (mpir_name, args)) - dump_error_check("") - def dump_coll_v_swap(func): # -- wrappers to make code cleaner def replace_arg(old, new): @@ -2064,8 +2033,6 @@ def dump_body_normal(): dump_body_topo_fns(func, RE.m.group(1)) else: print("Error: unhandled special impl: [%s]" % func['impl']) - elif func['dir'] == 'coll': - dump_body_coll(func) else: dump_body_impl(func, "mpir") From d2b447f0b1f611e628d06f971191b52bd2628edd Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 23 Aug 2025 11:25:33 -0500 Subject: [PATCH 26/47] coll/gen_coll: temporarily generate MPIR collectives Current compositional algorithms call MPIR collectives. We will refactor them later. But for now, generate a wrapper MPIR functions that calls _impl functions. --- maint/gen_coll.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index cdd3487ecfa..2823e986c62 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -27,6 +27,14 @@ def main(): dump_coll_impl(a, "blocking") dump_coll_impl(a, "nonblocking") dump_coll_impl(a, "persistent") + + # TEMP: dump mpir functions. + # Current code base call MPIR_ functions in copositinal algorithms. Create a wrapper that call _impl + # for now. We will refactor the compositional algorithms later. + for a in coll_names: + dump_coll_mpir(a, "blocking") + dump_coll_mpir(a, "nonblocking") + # dump the container version of the algorithms dump_algo_cnt_fns() add_algo_prototypes() @@ -266,6 +274,34 @@ def dump_coll_impl(name, blocking_type): G.out.append("goto fn_exit;") dump_close('}') +def dump_coll_mpir(name, blocking_type): + def get_func_args(func): + args = [] + for p in func['parameters']: + if p['name'] == 'comm': + args.append('comm_ptr') + else: + args.append(p['name']) + return ', '.join(args) + + func = G.FUNCS["mpi_" + name] + func_params = get_func_params(func, name, blocking_type) + func_args = get_func_args(func) + if blocking_type == "blocking": + func_params += ", int coll_attr" + else: + func_args += ", request" + + func_name = get_func_name(name, blocking_type) + Name = func_name.capitalize() + + G.out.append("") + add_prototype("int MPIR_%s(%s)" % (Name, func_params)) + dump_split(0, "int MPIR_%s(%s)" % (Name, func_params)) + dump_open('{') + G.out.append("return MPIR_%s_impl(%s);" % (Name, func_args)) + dump_close('}') + # ---- def get_func_name(name, blocking_type): if blocking_type == "blocking": From a3aaf2e2ae7b016be2a15ddbc40824776afc1f01 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 23 Aug 2025 20:51:13 -0500 Subject: [PATCH 27/47] misc: only include json.h where needed --- src/include/mpir_csel.h | 1 - src/mpi/coll/src/csel_container.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/mpir_csel.h b/src/include/mpir_csel.h index 30c9953263c..8487b736170 100644 --- a/src/include/mpir_csel.h +++ b/src/include/mpir_csel.h @@ -6,7 +6,6 @@ #ifndef MPIR_CSEL_H_INCLUDED #define MPIR_CSEL_H_INCLUDED -#include "json.h" #include "coll_impl.h" typedef enum { diff --git a/src/mpi/coll/src/csel_container.c b/src/mpi/coll/src/csel_container.c index 99357d4cac9..9ffe341eb76 100644 --- a/src/mpi/coll/src/csel_container.c +++ b/src/mpi/coll/src/csel_container.c @@ -6,6 +6,7 @@ #include "mpiimpl.h" #include "coll_impl.h" #include "mpl.h" +#include "json.h" static void parse_container_params(struct json_object *obj, MPII_Csel_container_s * cnt) { From f4bf49dfeafe41e504ac95b2a9631e40d59dbba7 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 23 Aug 2025 21:27:57 -0500 Subject: [PATCH 28/47] coll: reorganize csel headers Move MPIR_Csel_coll_sig_s and MPII_Csel_container_s definitions to mpir_coll.h since they are now common interface to all collective algorithms. Move the rest of the csel header to coll_csel.h and only include it where needed. --- src/include/mpiimpl.h | 2 - src/include/mpir_coll.h | 602 +++++++++++++++++ src/include/mpir_csel.h | 625 ------------------ src/include/mpir_request.h | 2 +- src/mpi/coll/allreduce/allreduce_intra_tree.c | 1 + src/mpi/coll/bcast/bcast_intra_tree.c | 1 + src/mpi/coll/iallreduce/iallreduce_tsp_auto.c | 1 + src/mpi/coll/ibarrier/ibarrier_tsp_auto.c | 1 + src/mpi/coll/ibcast/ibcast_tsp_auto.c | 1 + src/mpi/coll/include/coll_csel.h | 21 + src/mpi/coll/include/coll_impl.h | 6 + src/mpi/coll/ireduce/ireduce_tsp_auto.c | 8 +- src/mpi/coll/ireduce/ireduce_tsp_tree.c | 1 + src/mpi/coll/src/coll_impl.c | 1 + src/mpi/coll/src/csel.c | 2 +- src/mpi/coll/src/csel_container.c | 2 +- 16 files changed, 643 insertions(+), 634 deletions(-) delete mode 100644 src/include/mpir_csel.h create mode 100644 src/mpi/coll/include/coll_csel.h diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h index 07e5b7fd242..5e546e90664 100644 --- a/src/include/mpiimpl.h +++ b/src/include/mpiimpl.h @@ -182,7 +182,6 @@ typedef struct MPIR_Stream MPIR_Stream; #endif #include "coll_types.h" -#include "coll_impl.h" /*****************************************************************************/ /********************** PART 4: DEVICE PRE DECLARATION ***********************/ @@ -205,7 +204,6 @@ typedef struct MPIR_Stream MPIR_Stream; #include "mpir_request.h" #include "mpir_progress_hook.h" #include "mpir_win.h" -#include "mpir_csel.h" #include "mpir_coll.h" #include "mpir_func.h" #include "mpir_nbc.h" diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index 3a6b8fb8014..e36a394690a 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -6,9 +6,611 @@ #ifndef MPIR_COLL_H_INCLUDED #define MPIR_COLL_H_INCLUDED +typedef struct MPIR_Csel_coll_sig MPIR_Csel_coll_sig_s; +typedef struct MPII_Csel_container MPII_Csel_container_s; + #include "coll_impl.h" #include "coll_algos.h" +typedef enum { + MPIR_CSEL_COLL_TYPE__ALLGATHER = 0, + MPIR_CSEL_COLL_TYPE__ALLGATHERV, + MPIR_CSEL_COLL_TYPE__ALLREDUCE, + MPIR_CSEL_COLL_TYPE__ALLTOALL, + MPIR_CSEL_COLL_TYPE__ALLTOALLV, + MPIR_CSEL_COLL_TYPE__ALLTOALLW, + MPIR_CSEL_COLL_TYPE__BARRIER, + MPIR_CSEL_COLL_TYPE__BCAST, + MPIR_CSEL_COLL_TYPE__EXSCAN, + MPIR_CSEL_COLL_TYPE__GATHER, + MPIR_CSEL_COLL_TYPE__GATHERV, + MPIR_CSEL_COLL_TYPE__IALLGATHER, + MPIR_CSEL_COLL_TYPE__IALLGATHERV, + MPIR_CSEL_COLL_TYPE__IALLREDUCE, + MPIR_CSEL_COLL_TYPE__IALLTOALL, + MPIR_CSEL_COLL_TYPE__IALLTOALLV, + MPIR_CSEL_COLL_TYPE__IALLTOALLW, + MPIR_CSEL_COLL_TYPE__IBARRIER, + MPIR_CSEL_COLL_TYPE__IBCAST, + MPIR_CSEL_COLL_TYPE__IEXSCAN, + MPIR_CSEL_COLL_TYPE__IGATHER, + MPIR_CSEL_COLL_TYPE__IGATHERV, + MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHER, + MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHERV, + MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALL, + MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLV, + MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLW, + MPIR_CSEL_COLL_TYPE__IREDUCE, + MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER, + MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK, + MPIR_CSEL_COLL_TYPE__ISCAN, + MPIR_CSEL_COLL_TYPE__ISCATTER, + MPIR_CSEL_COLL_TYPE__ISCATTERV, + MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHER, + MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHERV, + MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALL, + MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLV, + MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLW, + MPIR_CSEL_COLL_TYPE__REDUCE, + MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER, + MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK, + MPIR_CSEL_COLL_TYPE__SCAN, + MPIR_CSEL_COLL_TYPE__SCATTER, + MPIR_CSEL_COLL_TYPE__SCATTERV, + MPIR_CSEL_COLL_TYPE__END, +} MPIR_Csel_coll_type_e; + +typedef enum { + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_k_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_inter_local_gather_remote_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_inter_remote_gather_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_multiplying, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_reduce_scatter_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_k_reduce_scatter_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ccl, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_inter_reduce_exchange_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_k_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise_sendrecv_replace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_inter_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_pairwise_sendrecv_replace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_inter_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_pairwise_sendrecv_replace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_inter_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_recursive_doubling_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_ring_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_pipelined_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_inter_remote_send_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_local_gather_remote_send, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_inter_sched_local_gather_remote_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_inter_sched_remote_gather_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_naive, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_reduce_scatter_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_inter_sched_remote_reduce_local_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_brucks, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_permuted_sendrecv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_inter_sched_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_scattered, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_inter_sched_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_blocked, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_inplace, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_inter_sched_pairwise_exchange, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_k_dissemination, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_inter_sched_bcast, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_ring_allgatherv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_recursive_doubling_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_ring_allgather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_inter_sched_flat, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iexscan_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_long, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_short, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_reduce_scatter_gather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_inter_sched_local_reduce_remote_send, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_tsp_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_inter_sched_remote_reduce_local_scatterv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_tsp_recexch, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_tsp_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_tsp_tree, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_sched_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_remote_send_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_tsp_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_sched_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgather_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgatherv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoall_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallv_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallw_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_reduce_scatter_gather, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_inter_local_reduce_remote_send, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_inter_remote_reduce_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_noncommutative, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_pairwise, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_halving, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_recursive_doubling, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_smp, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_intra_binomial, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_remote_send_local_scatter, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_allcomm_nb, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear, + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb, + /* composition algorithms */ + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto, + /* end */ + MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count, +} MPII_Csel_container_type_e; + +struct MPIR_Csel_coll_sig { + MPIR_Csel_coll_type_e coll_type; + MPIR_Comm *comm_ptr; + void *sched; + enum MPIR_sched_type sched_type; + bool is_persistent; + + union { + struct { + const void *sendbuf; + MPI_Aint sendcount; + MPI_Datatype sendtype; + void *recvbuf; + MPI_Aint recvcount; + MPI_Datatype recvtype; + } allgather, iallgather, neighbor_allgather, ineighbor_allgather; + struct { + const void *sendbuf; + MPI_Aint sendcount; + MPI_Datatype sendtype; + void *recvbuf; + const MPI_Aint *recvcounts; + const MPI_Aint *displs; + MPI_Datatype recvtype; + } allgatherv, iallgatherv, neighbor_allgatherv, ineighbor_allgatherv; + struct { + const void *sendbuf; + void *recvbuf; + MPI_Aint count; + MPI_Datatype datatype; + MPI_Op op; + } allreduce, iallreduce; + struct { + const void *sendbuf; + MPI_Aint sendcount; + MPI_Datatype sendtype; + MPI_Aint recvcount; + void *recvbuf; + MPI_Datatype recvtype; + } alltoall, ialltoall, neighbor_alltoall, ineighbor_alltoall; + struct { + const void *sendbuf; + const MPI_Aint *sendcounts; + const MPI_Aint *sdispls; + MPI_Datatype sendtype; + void *recvbuf; + const MPI_Aint *recvcounts; + const MPI_Aint *rdispls; + MPI_Datatype recvtype; + } alltoallv, ialltoallv, neighbor_alltoallv, ineighbor_alltoallv; + struct { + const void *sendbuf; + const MPI_Aint *sendcounts; + const MPI_Aint *sdispls; + const MPI_Datatype *sendtypes; + void *recvbuf; + const MPI_Aint *recvcounts; + const MPI_Aint *rdispls; + const MPI_Datatype *recvtypes; + } alltoallw, ialltoallw; + struct { + const void *sendbuf; + const MPI_Aint *sendcounts; + const MPI_Aint *sdispls; + const MPI_Datatype *sendtypes; + void *recvbuf; + const MPI_Aint *recvcounts; + const MPI_Aint *rdispls; + const MPI_Datatype *recvtypes; + } neighbor_alltoallw, ineighbor_alltoallw; + struct { + int dummy; /* some compiler (suncc) doesn't like empty struct */ + } barrier, ibarrier; + struct { + void *buffer; + MPI_Aint count; + MPI_Datatype datatype; + int root; + } bcast, ibcast; + struct { + const void *sendbuf; + void *recvbuf; + MPI_Aint count; + MPI_Datatype datatype; + MPI_Op op; + } exscan, iexscan; + struct { + const void *sendbuf; + MPI_Aint sendcount; + MPI_Datatype sendtype; + MPI_Aint recvcount; + void *recvbuf; + MPI_Datatype recvtype; + int root; + } gather, igather, scatter, iscatter; + struct { + const void *sendbuf; + MPI_Aint sendcount; + MPI_Datatype sendtype; + void *recvbuf; + const MPI_Aint *recvcounts; + const MPI_Aint *displs; + MPI_Datatype recvtype; + int root; + } gatherv, igatherv; + struct { + const void *sendbuf; + void *recvbuf; + MPI_Aint count; + MPI_Datatype datatype; + MPI_Op op; + int root; + } reduce, ireduce; + struct { + const void *sendbuf; + void *recvbuf; + const MPI_Aint *recvcounts; + MPI_Datatype datatype; + MPI_Op op; + } reduce_scatter, ireduce_scatter; + struct { + const void *sendbuf; + void *recvbuf; + MPI_Aint recvcount; + MPI_Datatype datatype; + MPI_Op op; + } reduce_scatter_block, ireduce_scatter_block; + struct { + const void *sendbuf; + void *recvbuf; + MPI_Aint count; + MPI_Datatype datatype; + MPI_Op op; + } scan, iscan; + struct { + const void *sendbuf; + const MPI_Aint *sendcounts; + const MPI_Aint *displs; + MPI_Datatype sendtype; + MPI_Aint recvcount; + void *recvbuf; + MPI_Datatype recvtype; + int root; + } scatterv, iscatterv; + } u; +}; + +struct MPII_Csel_container { + MPII_Csel_container_type_e id; + + union { + struct { + struct { + int k; + } intra_tsp_brucks; + struct { + int k; + } intra_tsp_recexch_doubling; + struct { + int k; + } intra_tsp_recexch_halving; + } iallgather; + struct { + struct { + int k; + } intra_tsp_brucks; + struct { + int k; + } intra_tsp_recexch_doubling; + struct { + int k; + } intra_tsp_recexch_halving; + } iallgatherv; + struct { + struct { + int k; + } intra_tsp_recexch_single_buffer; + struct { + int k; + } intra_tsp_recexch_multiple_buffer; + struct { + int tree_type; + int k; + int chunk_size; + int buffer_per_child; + } intra_tsp_tree; + struct { + int k; + } intra_tsp_recexch_reduce_scatter_recexch_allgatherv; + } iallreduce; + struct { + struct { + int k; + int buffer_per_phase; + } intra_tsp_brucks; + struct { + int batch_size; + int bblock; + } intra_tsp_scattered; + } ialltoall; + struct { + struct { + int batch_size; + int bblock; + } intra_tsp_scattered; + struct { + int bblock; + } intra_tsp_blocked; + } ialltoallv; + struct { + struct { + int bblock; + } intra_tsp_blocked; + } ialltoallw; + struct { + struct { + int k; + } intra_k_dissemination; + struct { + int k; + bool single_phase_recv; + } intra_recexch; + } barrier; + struct { + struct { + int k; + } intra_tsp_recexch; + struct { + int k; + } intra_tsp_k_dissemination; + } ibarrier; + struct { + struct { + int tree_type; + int k; + int chunk_size; + } intra_tsp_tree; + struct { + int chunk_size; + } intra_tsp_ring; + struct { + int scatterv_k; + int allgatherv_k; + } intra_tsp_scatterv_recexch_allgatherv; + struct { + int scatterv_k; + } intra_tsp_scatterv_ring_allgatherv; + } ibcast; + struct { + struct { + int tree_type; + int k; + int is_non_blocking; + int topo_overhead; + int topo_diff_groups; + int topo_diff_switches; + int topo_same_switches; + } intra_tree; + struct { + int tree_type; + int k; + int is_non_blocking; + int chunk_size; + int recv_pre_posted; + } intra_pipelined_tree; + } bcast; + struct { + struct { + int k; + } intra_k_brucks; + struct { + int k; + bool single_phase_recv; + } intra_recexch_doubling; + struct { + int k; + bool single_phase_recv; + } intra_recexch_halving; + } allgather; + struct { + struct { + int k; + } intra_k_brucks; + } alltoall; + struct { + struct { + int k; + } intra_tsp_tree; + } igather; + struct { + struct { + int tree_type; + int k; + int chunk_size; + int buffer_per_child; + int topo_overhead; + int topo_diff_groups; + int topo_diff_switches; + int topo_same_switches; + } intra_tsp_tree; + struct { + int chunk_size; + int buffer_per_child; + } intra_tsp_ring; + } ireduce; + struct { + struct { + int k; + } intra_tsp_recexch; + } ireduce_scatter; + struct { + struct { + int k; + } intra_tsp_recexch; + } ireduce_scatter_block; + struct { + struct { + int k; + } intra_recursive_multiplying; + struct { + int tree_type; + int k; + int chunk_size; + int buffer_per_child; + int topo_overhead; + int topo_diff_groups; + int topo_diff_switches; + int topo_same_switches; + } intra_tree; + struct { + int k; + bool single_phase_recv; + } intra_recexch; + struct { + int k; + bool single_phase_recv; + } intra_k_reduce_scatter_allgather; + struct { + int ccl; + } intra_ccl; + } allreduce; + struct { + struct { + int k; + } intra_tsp_tree; + } iscatter; + } u; +}; + +typedef int (*MPIR_Coll_algo_fn) (MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); + /* During init, not all algorithms are safe to use. For example, the csel * may not have been initialized. We define a set of fallback routines that * are safe to use during init. They are all intra algorithms. diff --git a/src/include/mpir_csel.h b/src/include/mpir_csel.h deleted file mode 100644 index 8487b736170..00000000000 --- a/src/include/mpir_csel.h +++ /dev/null @@ -1,625 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#ifndef MPIR_CSEL_H_INCLUDED -#define MPIR_CSEL_H_INCLUDED - -#include "coll_impl.h" - -typedef enum { - MPIR_CSEL_COLL_TYPE__ALLGATHER = 0, - MPIR_CSEL_COLL_TYPE__ALLGATHERV, - MPIR_CSEL_COLL_TYPE__ALLREDUCE, - MPIR_CSEL_COLL_TYPE__ALLTOALL, - MPIR_CSEL_COLL_TYPE__ALLTOALLV, - MPIR_CSEL_COLL_TYPE__ALLTOALLW, - MPIR_CSEL_COLL_TYPE__BARRIER, - MPIR_CSEL_COLL_TYPE__BCAST, - MPIR_CSEL_COLL_TYPE__EXSCAN, - MPIR_CSEL_COLL_TYPE__GATHER, - MPIR_CSEL_COLL_TYPE__GATHERV, - MPIR_CSEL_COLL_TYPE__IALLGATHER, - MPIR_CSEL_COLL_TYPE__IALLGATHERV, - MPIR_CSEL_COLL_TYPE__IALLREDUCE, - MPIR_CSEL_COLL_TYPE__IALLTOALL, - MPIR_CSEL_COLL_TYPE__IALLTOALLV, - MPIR_CSEL_COLL_TYPE__IALLTOALLW, - MPIR_CSEL_COLL_TYPE__IBARRIER, - MPIR_CSEL_COLL_TYPE__IBCAST, - MPIR_CSEL_COLL_TYPE__IEXSCAN, - MPIR_CSEL_COLL_TYPE__IGATHER, - MPIR_CSEL_COLL_TYPE__IGATHERV, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHER, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHERV, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALL, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLV, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLW, - MPIR_CSEL_COLL_TYPE__IREDUCE, - MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER, - MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK, - MPIR_CSEL_COLL_TYPE__ISCAN, - MPIR_CSEL_COLL_TYPE__ISCATTER, - MPIR_CSEL_COLL_TYPE__ISCATTERV, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHER, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHERV, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALL, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLV, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLW, - MPIR_CSEL_COLL_TYPE__REDUCE, - MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER, - MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK, - MPIR_CSEL_COLL_TYPE__SCAN, - MPIR_CSEL_COLL_TYPE__SCATTER, - MPIR_CSEL_COLL_TYPE__SCATTERV, - MPIR_CSEL_COLL_TYPE__END, -} MPIR_Csel_coll_type_e; - -typedef enum { - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_k_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_inter_local_gather_remote_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_inter_remote_gather_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_multiplying, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_k_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ccl, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_inter_reduce_exchange_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_k_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_inter_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_recursive_doubling_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_ring_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_pipelined_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_inter_remote_send_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_local_gather_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_inter_sched_local_gather_remote_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_inter_sched_remote_gather_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_naive, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_inter_sched_remote_reduce_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_permuted_sendrecv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_k_dissemination, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_inter_sched_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_ring_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_recursive_doubling_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_ring_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_inter_sched_flat, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iexscan_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_long, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_short, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_reduce_scatter_gather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_inter_sched_local_reduce_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_inter_sched_remote_reduce_local_scatterv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_tsp_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_remote_send_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoall_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallw_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_reduce_scatter_gather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_inter_local_reduce_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_inter_remote_reduce_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_remote_send_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb, - /* composition algorithms */ - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto, - /* end */ - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count, -} MPII_Csel_container_type_e; - -typedef struct { - MPIR_Csel_coll_type_e coll_type; - MPIR_Comm *comm_ptr; - void *sched; - enum MPIR_sched_type sched_type; - bool is_persistent; - - union { - struct { - const void *sendbuf; - MPI_Aint sendcount; - MPI_Datatype sendtype; - void *recvbuf; - MPI_Aint recvcount; - MPI_Datatype recvtype; - } allgather, iallgather, neighbor_allgather, ineighbor_allgather; - struct { - const void *sendbuf; - MPI_Aint sendcount; - MPI_Datatype sendtype; - void *recvbuf; - const MPI_Aint *recvcounts; - const MPI_Aint *displs; - MPI_Datatype recvtype; - } allgatherv, iallgatherv, neighbor_allgatherv, ineighbor_allgatherv; - struct { - const void *sendbuf; - void *recvbuf; - MPI_Aint count; - MPI_Datatype datatype; - MPI_Op op; - } allreduce, iallreduce; - struct { - const void *sendbuf; - MPI_Aint sendcount; - MPI_Datatype sendtype; - MPI_Aint recvcount; - void *recvbuf; - MPI_Datatype recvtype; - } alltoall, ialltoall, neighbor_alltoall, ineighbor_alltoall; - struct { - const void *sendbuf; - const MPI_Aint *sendcounts; - const MPI_Aint *sdispls; - MPI_Datatype sendtype; - void *recvbuf; - const MPI_Aint *recvcounts; - const MPI_Aint *rdispls; - MPI_Datatype recvtype; - } alltoallv, ialltoallv, neighbor_alltoallv, ineighbor_alltoallv; - struct { - const void *sendbuf; - const MPI_Aint *sendcounts; - const MPI_Aint *sdispls; - const MPI_Datatype *sendtypes; - void *recvbuf; - const MPI_Aint *recvcounts; - const MPI_Aint *rdispls; - const MPI_Datatype *recvtypes; - } alltoallw, ialltoallw; - struct { - const void *sendbuf; - const MPI_Aint *sendcounts; - const MPI_Aint *sdispls; - const MPI_Datatype *sendtypes; - void *recvbuf; - const MPI_Aint *recvcounts; - const MPI_Aint *rdispls; - const MPI_Datatype *recvtypes; - } neighbor_alltoallw, ineighbor_alltoallw; - struct { - int dummy; /* some compiler (suncc) doesn't like empty struct */ - } barrier, ibarrier; - struct { - void *buffer; - MPI_Aint count; - MPI_Datatype datatype; - int root; - } bcast, ibcast; - struct { - const void *sendbuf; - void *recvbuf; - MPI_Aint count; - MPI_Datatype datatype; - MPI_Op op; - } exscan, iexscan; - struct { - const void *sendbuf; - MPI_Aint sendcount; - MPI_Datatype sendtype; - MPI_Aint recvcount; - void *recvbuf; - MPI_Datatype recvtype; - int root; - } gather, igather, scatter, iscatter; - struct { - const void *sendbuf; - MPI_Aint sendcount; - MPI_Datatype sendtype; - void *recvbuf; - const MPI_Aint *recvcounts; - const MPI_Aint *displs; - MPI_Datatype recvtype; - int root; - } gatherv, igatherv; - struct { - const void *sendbuf; - void *recvbuf; - MPI_Aint count; - MPI_Datatype datatype; - MPI_Op op; - int root; - } reduce, ireduce; - struct { - const void *sendbuf; - void *recvbuf; - const MPI_Aint *recvcounts; - MPI_Datatype datatype; - MPI_Op op; - } reduce_scatter, ireduce_scatter; - struct { - const void *sendbuf; - void *recvbuf; - MPI_Aint recvcount; - MPI_Datatype datatype; - MPI_Op op; - } reduce_scatter_block, ireduce_scatter_block; - struct { - const void *sendbuf; - void *recvbuf; - MPI_Aint count; - MPI_Datatype datatype; - MPI_Op op; - } scan, iscan; - struct { - const void *sendbuf; - const MPI_Aint *sendcounts; - const MPI_Aint *displs; - MPI_Datatype sendtype; - MPI_Aint recvcount; - void *recvbuf; - MPI_Datatype recvtype; - int root; - } scatterv, iscatterv; - } u; -} MPIR_Csel_coll_sig_s; - -typedef struct { - MPII_Csel_container_type_e id; - - union { - struct { - struct { - int k; - } intra_tsp_brucks; - struct { - int k; - } intra_tsp_recexch_doubling; - struct { - int k; - } intra_tsp_recexch_halving; - } iallgather; - struct { - struct { - int k; - } intra_tsp_brucks; - struct { - int k; - } intra_tsp_recexch_doubling; - struct { - int k; - } intra_tsp_recexch_halving; - } iallgatherv; - struct { - struct { - int k; - } intra_tsp_recexch_single_buffer; - struct { - int k; - } intra_tsp_recexch_multiple_buffer; - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - } intra_tsp_tree; - struct { - int k; - } intra_tsp_recexch_reduce_scatter_recexch_allgatherv; - } iallreduce; - struct { - struct { - int k; - int buffer_per_phase; - } intra_tsp_brucks; - struct { - int batch_size; - int bblock; - } intra_tsp_scattered; - } ialltoall; - struct { - struct { - int batch_size; - int bblock; - } intra_tsp_scattered; - struct { - int bblock; - } intra_tsp_blocked; - } ialltoallv; - struct { - struct { - int bblock; - } intra_tsp_blocked; - } ialltoallw; - struct { - struct { - int k; - } intra_k_dissemination; - struct { - int k; - bool single_phase_recv; - } intra_recexch; - } barrier; - struct { - struct { - int k; - } intra_tsp_recexch; - struct { - int k; - } intra_tsp_k_dissemination; - } ibarrier; - struct { - struct { - int tree_type; - int k; - int chunk_size; - } intra_tsp_tree; - struct { - int chunk_size; - } intra_tsp_ring; - struct { - int scatterv_k; - int allgatherv_k; - } intra_tsp_scatterv_recexch_allgatherv; - struct { - int scatterv_k; - } intra_tsp_scatterv_ring_allgatherv; - } ibcast; - struct { - struct { - int tree_type; - int k; - int is_non_blocking; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tree; - struct { - int tree_type; - int k; - int is_non_blocking; - int chunk_size; - int recv_pre_posted; - } intra_pipelined_tree; - } bcast; - struct { - struct { - int k; - } intra_k_brucks; - struct { - int k; - bool single_phase_recv; - } intra_recexch_doubling; - struct { - int k; - bool single_phase_recv; - } intra_recexch_halving; - } allgather; - struct { - struct { - int k; - } intra_k_brucks; - } alltoall; - struct { - struct { - int k; - } intra_tsp_tree; - } igather; - struct { - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tsp_tree; - struct { - int chunk_size; - int buffer_per_child; - } intra_tsp_ring; - } ireduce; - struct { - struct { - int k; - } intra_tsp_recexch; - } ireduce_scatter; - struct { - struct { - int k; - } intra_tsp_recexch; - } ireduce_scatter_block; - struct { - struct { - int k; - } intra_recursive_multiplying; - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tree; - struct { - int k; - bool single_phase_recv; - } intra_recexch; - struct { - int k; - bool single_phase_recv; - } intra_k_reduce_scatter_allgather; - struct { - int ccl; - } intra_ccl; - } allreduce; - struct { - struct { - int k; - } intra_tsp_tree; - } iscatter; - } u; -} MPII_Csel_container_s; - -int MPIR_Csel_create_from_file(const char *json_file, - void *(*create_container) (struct json_object *), void **csel); -int MPIR_Csel_create_from_buf(const char *json, - void *(*create_container) (struct json_object *), void **csel); -int MPIR_Csel_free(void *csel); -int MPIR_Csel_prune(void *root_csel, MPIR_Comm * comm_ptr, void **comm_csel); -void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); - -void *MPII_Create_container(struct json_object *obj); - -typedef int (*MPIR_Coll_algo_fn) (MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); -void MPIR_Coll_algo_init(void); -/* NOTE: MPIR_Coll_auto is one of the composition container functions. However, - * MPIR_Coll_composition_auto is a gate function, thus does not take "cnt" parameter. */ -int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig); -int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); - -#endif /* MPIR_CSEL_H_INCLUDED */ diff --git a/src/include/mpir_request.h b/src/include/mpir_request.h index fee9e9efa5c..ad82142e91f 100644 --- a/src/include/mpir_request.h +++ b/src/include/mpir_request.h @@ -204,7 +204,7 @@ struct MPIR_Request { struct { /* Persistent requests have their own "real" requests */ struct MPIR_Request *real_request; - MPIR_TSP_sched_t sched; + void *sched; } persist; /* kind : MPIR_REQUEST_KIND__PREQUEST_SEND or MPIR_REQUEST_KIND__PREQUEST_RECV */ struct { struct MPIR_Request *real_request; diff --git a/src/mpi/coll/allreduce/allreduce_intra_tree.c b/src/mpi/coll/allreduce/allreduce_intra_tree.c index 272dc15d371..9a25943995d 100644 --- a/src/mpi/coll/allreduce/allreduce_intra_tree.c +++ b/src/mpi/coll/allreduce/allreduce_intra_tree.c @@ -9,6 +9,7 @@ #include "mpiimpl.h" #include "algo_common.h" #include "treealgo.h" +#include "coll_csel.h" int MPIR_Allreduce_intra_tree(const void *sendbuf, void *recvbuf, diff --git a/src/mpi/coll/bcast/bcast_intra_tree.c b/src/mpi/coll/bcast/bcast_intra_tree.c index cd504c2a793..3f0d9636244 100644 --- a/src/mpi/coll/bcast/bcast_intra_tree.c +++ b/src/mpi/coll/bcast/bcast_intra_tree.c @@ -4,6 +4,7 @@ */ #include "mpiimpl.h" +#include "coll_csel.h" /* Algorithm: Tree-based bcast * For short messages, we use a kary/knomial tree-based algorithm. * Cost = lgp.alpha + n.lgp.beta diff --git a/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c b/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c index 9ad63c8d6d1..20149af8dc7 100644 --- a/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c +++ b/src/mpi/coll/iallreduce/iallreduce_tsp_auto.c @@ -6,6 +6,7 @@ #include "mpiimpl.h" #include "algo_common.h" #include "treealgo.h" +#include "coll_csel.h" /* Routine to schedule a pipelined tree based allreduce */ int MPIR_TSP_Iallreduce_sched_intra_tsp_auto(const void *sendbuf, void *recvbuf, MPI_Aint count, diff --git a/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c b/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c index 094230a278c..01f9927dc4f 100644 --- a/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c +++ b/src/mpi/coll/ibarrier/ibarrier_tsp_auto.c @@ -4,6 +4,7 @@ */ #include "mpiimpl.h" +#include "coll_csel.h" /* sched version of CVAR and json based collective selection. Meant only for gentran scheduler */ int MPIR_TSP_Ibarrier_sched_intra_tsp_auto(MPIR_Comm * comm, MPIR_TSP_sched_t sched) diff --git a/src/mpi/coll/ibcast/ibcast_tsp_auto.c b/src/mpi/coll/ibcast/ibcast_tsp_auto.c index f74dc33bfb2..3965500adb7 100644 --- a/src/mpi/coll/ibcast/ibcast_tsp_auto.c +++ b/src/mpi/coll/ibcast/ibcast_tsp_auto.c @@ -6,6 +6,7 @@ #include "mpiimpl.h" #include "algo_common.h" #include "treealgo.h" +#include "coll_csel.h" /* Provides a "flat" broadcast that doesn't know anything about * hierarchy. It will choose between several different algorithms diff --git a/src/mpi/coll/include/coll_csel.h b/src/mpi/coll/include/coll_csel.h new file mode 100644 index 00000000000..68673eedb4d --- /dev/null +++ b/src/mpi/coll/include/coll_csel.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) by Argonne National Laboratory + * See COPYRIGHT in top-level directory + */ + +#ifndef COLL_CSEL_H_INCLUDED +#define COLL_CSEL_H_INCLUDED + +#include "json.h" + +int MPIR_Csel_create_from_file(const char *json_file, + void *(*create_container) (struct json_object *), void **csel); +int MPIR_Csel_create_from_buf(const char *json, + void *(*create_container) (struct json_object *), void **csel); +int MPIR_Csel_free(void *csel); +int MPIR_Csel_prune(void *root_csel, MPIR_Comm * comm_ptr, void **comm_csel); +void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); + +void *MPII_Create_container(struct json_object *obj); + +#endif /* COLL_CSEL_H_INCLUDED */ diff --git a/src/mpi/coll/include/coll_impl.h b/src/mpi/coll/include/coll_impl.h index 0f626c13f3b..8534a0c8df3 100644 --- a/src/mpi/coll/include/coll_impl.h +++ b/src/mpi/coll/include/coll_impl.h @@ -61,6 +61,12 @@ int MPIR_Coll_safe_to_block(void); int MPII_Coll_finalize(void); +void MPIR_Coll_algo_init(void); +/* NOTE: MPIR_Coll_auto is one of the composition container functions. However, + * MPIR_Coll_composition_auto is a gate function, thus does not take "cnt" parameter. */ +int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig); +int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); + #define MPII_GENTRAN_CREATE_SCHED_P() \ do { \ *sched_type_p = MPIR_SCHED_GENTRAN; \ diff --git a/src/mpi/coll/ireduce/ireduce_tsp_auto.c b/src/mpi/coll/ireduce/ireduce_tsp_auto.c index 5f13cee8a04..81634f552a4 100644 --- a/src/mpi/coll/ireduce/ireduce_tsp_auto.c +++ b/src/mpi/coll/ireduce/ireduce_tsp_auto.c @@ -4,15 +4,15 @@ */ #include "mpiimpl.h" +#include "coll_csel.h" /* Provides a "flat" reduce that doesn't know anything about * hierarchy. It will choose between several different algorithms * based on the given parameters. */ /* Remove this function when gentran algos are in json file */ -static int MPIR_Ireduce_sched_intra_tsp_flat_auto(const void *sendbuf, void *recvbuf, - MPI_Aint count, MPI_Datatype datatype, MPI_Op op, - int root, MPIR_Comm * comm_ptr, - MPIR_TSP_sched_t sched) +int MPIR_Ireduce_sched_intra_tsp_flat_auto(const void *sendbuf, void *recvbuf, + MPI_Aint count, MPI_Datatype datatype, MPI_Op op, + int root, MPIR_Comm * comm_ptr, MPIR_TSP_sched_t sched) { int mpi_errno = MPI_SUCCESS; int tree_type = MPIR_TREE_TYPE_KNOMIAL_1; diff --git a/src/mpi/coll/ireduce/ireduce_tsp_tree.c b/src/mpi/coll/ireduce/ireduce_tsp_tree.c index 47c8123dff0..0c0160eb117 100644 --- a/src/mpi/coll/ireduce/ireduce_tsp_tree.c +++ b/src/mpi/coll/ireduce/ireduce_tsp_tree.c @@ -6,6 +6,7 @@ #include "mpiimpl.h" #include "algo_common.h" #include "treealgo.h" +#include "coll_csel.h" /* Routine to schedule a pipelined tree based reduce */ int MPIR_TSP_Ireduce_sched_intra_tree(const void *sendbuf, void *recvbuf, MPI_Aint count, diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index b5c0f876d95..38690aba45d 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -5,6 +5,7 @@ #include "mpiimpl.h" #include "coll_impl.h" +#include "coll_csel.h" /* === BEGIN_MPI_T_CVAR_INFO_BLOCK === diff --git a/src/mpi/coll/src/csel.c b/src/mpi/coll/src/csel.c index 3b9bed141d1..390f60166f9 100644 --- a/src/mpi/coll/src/csel.c +++ b/src/mpi/coll/src/csel.c @@ -5,7 +5,7 @@ #include "mpiimpl.h" #include "mpl.h" -#include "mpir_csel.h" +#include "coll_csel.h" #include /* open */ #include /* mmap */ #include diff --git a/src/mpi/coll/src/csel_container.c b/src/mpi/coll/src/csel_container.c index 9ffe341eb76..3fdc2af12f8 100644 --- a/src/mpi/coll/src/csel_container.c +++ b/src/mpi/coll/src/csel_container.c @@ -6,7 +6,7 @@ #include "mpiimpl.h" #include "coll_impl.h" #include "mpl.h" -#include "json.h" +#include "coll_csel.h" static void parse_container_params(struct json_object *obj, MPII_Csel_container_s * cnt) { From 0d711b0b20884c5c49bfa112341cb8a1a5bb5085 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Tue, 2 Sep 2025 13:50:16 -0500 Subject: [PATCH 29/47] coll/csel: remove MPIR_Csel_prune We can add separate caching mechanism to expedite search later. For now, simplify by directly use csel_node_s. --- src/include/mpir_comm.h | 2 - src/mpi/coll/include/coll_csel.h | 1 - src/mpi/coll/src/coll_impl.c | 3 -- src/mpi/coll/src/csel.c | 93 +++++--------------------------- 4 files changed, 13 insertions(+), 86 deletions(-) diff --git a/src/include/mpir_comm.h b/src/include/mpir_comm.h index 52a5b42cbab..f02a579ecbf 100644 --- a/src/include/mpir_comm.h +++ b/src/include/mpir_comm.h @@ -225,8 +225,6 @@ struct MPIR_Comm { MPIR_Treealgo_tree_t *topo_wave_tree; } coll; - void *csel_comm; /* collective selector handle */ - #if defined HAVE_HCOLL hcoll_comm_priv_t hcoll_priv; #endif /* HAVE_HCOLL */ diff --git a/src/mpi/coll/include/coll_csel.h b/src/mpi/coll/include/coll_csel.h index 68673eedb4d..b11c6279a31 100644 --- a/src/mpi/coll/include/coll_csel.h +++ b/src/mpi/coll/include/coll_csel.h @@ -13,7 +13,6 @@ int MPIR_Csel_create_from_file(const char *json_file, int MPIR_Csel_create_from_buf(const char *json, void *(*create_container) (struct json_object *), void **csel); int MPIR_Csel_free(void *csel); -int MPIR_Csel_prune(void *root_csel, MPIR_Comm * comm_ptr, void **comm_csel); void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); void *MPII_Create_container(struct json_object *obj); diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 38690aba45d..878b65d302b 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -291,9 +291,6 @@ int MPIR_Coll_comm_init(MPIR_Comm * comm) if (mpi_errno) MPIR_ERR_POP(mpi_errno); - mpi_errno = MPIR_Csel_prune(MPIR_Csel_root, comm, &comm->csel_comm); - MPIR_ERR_CHECK(mpi_errno); - fn_exit: return mpi_errno; fn_fail: diff --git a/src/mpi/coll/src/csel.c b/src/mpi/coll/src/csel.c index 390f60166f9..3804535d165 100644 --- a/src/mpi/coll/src/csel.c +++ b/src/mpi/coll/src/csel.c @@ -125,25 +125,6 @@ typedef struct csel_node { struct csel_node *failure; } csel_node_s; -typedef enum { - CSEL_TYPE__ROOT, - CSEL_TYPE__PRUNED, -} csel_type_e; - -typedef struct { - csel_type_e type; - - union { - struct { - csel_node_s *tree; - } root; - struct { - /* one tree for each collective */ - csel_node_s *coll_trees[MPIR_CSEL_COLL_TYPE__END]; - } pruned; - } u; -} csel_s; - static int nesting = -1; #define nprintf(...) \ do { \ @@ -547,23 +528,22 @@ static csel_node_s *parse_json_tree(struct json_object *obj, int MPIR_Csel_create_from_buf(const char *json, void *(*create_container) (struct json_object *), void **csel_) { - csel_s *csel = NULL; struct json_object *tree; - - csel = (csel_s *) MPL_malloc(sizeof(csel_s), MPL_MEM_COLL); - csel->type = CSEL_TYPE__ROOT; tree = json_tokener_parse(json); if (tree == NULL) goto fn_exit; - csel->u.root.tree = parse_json_tree(tree, create_container); - if (csel->u.root.tree) - validate_tree(csel->u.root.tree); + csel_node_s *csel_root = parse_json_tree(tree, create_container); + if (csel_root) { + validate_tree(csel_root); + } else { + MPIR_Assert(0); + } json_object_put(tree); fn_exit: - *csel_ = csel; + *csel_ = csel_root; return 0; } @@ -678,46 +658,6 @@ static csel_node_s *prune_tree(csel_node_s * root, MPIR_Comm * comm_ptr) return root; } -/* The prune function allows us to simplify the tree for specific - * communicators using comm-specific information (such as size and - * intra/inter comm type. */ -int MPIR_Csel_prune(void *root_csel, MPIR_Comm * comm_ptr, void **comm_csel_) -{ - int mpi_errno = MPI_SUCCESS; - csel_s *csel = (csel_s *) root_csel; - csel_s *comm_csel = NULL; - - MPIR_Assert(root_csel); - MPIR_Assert(comm_ptr); - - comm_csel = (csel_s *) MPL_malloc(sizeof(csel_s), MPL_MEM_COLL); - MPIR_Assert(comm_csel); - - comm_csel->type = CSEL_TYPE__PRUNED; - for (int i = 0; i < MPIR_CSEL_COLL_TYPE__END; i++) - comm_csel->u.pruned.coll_trees[i] = NULL; - - /* prune the tree as far as possible */ - csel_node_s *node = prune_tree(csel->u.root.tree, comm_ptr); - - /* if the tree is not NULL, we should be at a collective branch at - * this point */ - if (node) { - MPIR_Assert(node->type == CSEL_NODE_TYPE__OPERATOR__COLLECTIVE); - } - - while (node) { - /* see if any additional pruning is possible once the - * collective type is removed from the tree */ - comm_csel->u.pruned.coll_trees[node->u.collective.coll_type] = - prune_tree(node->success, comm_ptr); - node = node->failure; - } - - *comm_csel_ = comm_csel; - return mpi_errno; -} - static void free_tree(csel_node_s * node) { if (node->type == CSEL_NODE_TYPE__CONTAINER) { @@ -732,15 +672,14 @@ static void free_tree(csel_node_s * node) } } -int MPIR_Csel_free(void *csel_) +int MPIR_Csel_free(void *csel_root) { int mpi_errno = MPI_SUCCESS; - csel_s *csel = (csel_s *) csel_; - if (csel->type == CSEL_TYPE__ROOT && csel->u.root.tree) - free_tree(csel->u.root.tree); + if (csel_root) { + free_tree(csel_root); + } - MPL_free(csel); return mpi_errno; } @@ -1174,19 +1113,13 @@ static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s * coll_sig) void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s * coll_sig) { - csel_s *csel = (csel_s *) csel_; - csel_node_s *node = NULL; MPIR_Comm *comm_ptr = coll_sig->comm_ptr; MPIR_Assert(csel_); - csel_node_s *root; - if (csel->type == CSEL_TYPE__ROOT) - root = csel->u.root.tree; - else - root = csel->u.pruned.coll_trees[coll_sig->coll_type]; + csel_node_s *root = csel_; - for (node = root; node;) { + for (csel_node_s * node = root; node;) { switch (node->type) { case CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED: if (MPIR_IS_THREADED == node->u.is_multi_threaded.val) From 95a323b8d6969cf7ef999f1cd7a9e22de63f1ea1 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 23 Aug 2025 22:50:10 -0500 Subject: [PATCH 30/47] coll/gen_coll: generate algorithm container ids Generate those IDs, table entries, and json parsing from coll_algorithms.txt. --- maint/gen_coll.py | 42 ++- src/include/mpir_coll.h | 194 +------------ src/mpi/coll/src/coll_impl.c | 2 +- src/mpi/coll/src/csel_container.c | 468 +----------------------------- 4 files changed, 43 insertions(+), 663 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index 2823e986c62..a80537322dc 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -19,6 +19,7 @@ def main(): G.out = [] G.prototypes_hash = {} G.prototypes = [] + G.algo_list = [] G.out.append("#include \"mpiimpl.h\"") G.out.append("#include \"iallgatherv/iallgatherv.h\"") @@ -42,7 +43,7 @@ def main(): add_sched_auto_prototypes(a) dump_c_file("src/mpi/coll/mpir_coll.c", G.out) - dump_prototypes("src/mpi/coll/include/coll_algos.h", G.prototypes) + dump_coll_algos_h("src/mpi/coll/include/coll_algos.h", G.algo_list, G.prototypes) def dump_algo_cnt_fns(): def get_coll_args(func, func_name): @@ -95,6 +96,8 @@ def dump_algo_prep(func_name, algo): algo_args = get_algo_args(func, func_name, algo) decl = "int %s_cnt(%s)" % (algo_funcname, "MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt") add_prototype(decl) + G.algo_list.append(algo_funcname) + dump_split(0, decl) dump_open('{') G.out.append("int mpi_errno = MPI_SUCCESS;") @@ -424,7 +427,15 @@ def dump_c_file(f, lines): print(" " * indent, end='', file=Out) print(l, file=Out) -def dump_prototypes(f, prototypes): +def dump_coll_algos_h(f, algolist, prototypes): + def algo_id(a): + prefix = "MPII_CSEL_CONTAINER_TYPE__ALGORITHM" + # TODO: fix the tsp function name + if RE.match(r'MPIR_TSP_(\w+)_sched_intra_(\w+)', a): + return "%s__MPIR_%s_intra_tsp_%s" % (prefix, RE.m.group(1), RE.m.group(2)) + else: + return "%s__%s" % (prefix, a) + print(" --> [%s]" % f) with open(f, "w") as Out: for l in G.copyright_c: @@ -432,6 +443,33 @@ def dump_prototypes(f, prototypes): print("#ifndef COLL_ALGOS_H_INCLUDED", file=Out) print("#define COLL_ALGOS_H_INCLUDED", file=Out) print("", file=Out) + + print("#define MPIR_COLL_ALGORITHM_IDS() \\", file=Out) + for a in algolist[:-1]: + print(" %s, \\" % algo_id(a), file=Out) + print(" %s" % algo_id(algolist[-1]), file=Out) + print("", file=Out) + + print("#define MPIR_COLL_SET_ALGO_TABLE() \\", file=Out) + print(" do { \\", file=Out) + for a in algolist: + print(" MPIR_Coll_algo_table[%s] = %s_cnt; \\" % (algo_id(a), a), file=Out) + print(" } while (0)", file=Out) + print("", file=Out) + + print("#define MPIR_COLL_SET_CONTAINER_ID() \\", file=Out) + print(" do { \\", file=Out) + print(" if (!strcmp(ckey, \"algorithm=%s\")) { \\" % algolist[0], file=Out) + print(" cnt->id = %s; \\" % algo_id(algolist[0]), file=Out) + for a in algolist[1:]: + print(" } else if (!strcmp(ckey, \"algorithm=%s\")) { \\" % a, file=Out) + print(" cnt->id = %s; \\" % algo_id(a), file=Out) + print(" } else { \\", file=Out) + print(" fprintf(stderr, \"unrecognized key \%s\\n\", key); \\", file=Out) + print(" } \\", file=Out) + print(" } while (0)", file=Out) + print("", file=Out) + for l in prototypes: lines = split_line_with_break(l + ';', '', 80) for l2 in lines: diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index e36a394690a..9096520d61d 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -61,199 +61,7 @@ typedef enum { } MPIR_Csel_coll_type_e; typedef enum { - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_k_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_inter_local_gather_remote_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_inter_remote_gather_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_multiplying, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_k_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ccl, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_inter_reduce_exchange_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_k_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_pairwise_sendrecv_replace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_inter_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_recursive_doubling_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_ring_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_pipelined_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_inter_remote_send_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_local_gather_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_inter_sched_local_gather_remote_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_inter_sched_remote_gather_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_naive, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_reduce_scatter_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_inter_sched_remote_reduce_local_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_brucks, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_permuted_sendrecv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_scattered, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_blocked, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_inplace, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_inter_sched_pairwise_exchange, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_k_dissemination, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_inter_sched_bcast, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_ring_allgatherv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_recursive_doubling_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_ring_allgather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_inter_sched_flat, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iexscan_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_long, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_short, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_reduce_scatter_gather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_inter_sched_local_reduce_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_inter_sched_remote_reduce_local_scatterv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_tsp_recexch, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_tsp_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_tsp_tree, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_sched_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_remote_send_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_tsp_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_sched_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgather_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgatherv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoall_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallv_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallw_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_reduce_scatter_gather, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_inter_local_reduce_remote_send, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_inter_remote_reduce_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_noncommutative, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_pairwise, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_halving, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_recursive_doubling, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_smp, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_intra_binomial, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_remote_send_local_scatter, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_allcomm_nb, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear, - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb, + MPIR_COLL_ALGORITHM_IDS(), /* composition algorithms */ MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto, /* end */ diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 878b65d302b..c2b232a3790 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -408,7 +408,7 @@ void MPIR_Coll_host_buffer_persist_set(void *host_sendbuf, void *host_recvbuf, v void MPIR_Coll_algo_init(void) { - /* manual entries now, but we will replace it with autogen later */ + MPIR_COLL_SET_ALGO_TABLE(); MPIR_Coll_algo_table[MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto] = MPIR_Coll_auto; } diff --git a/src/mpi/coll/src/csel_container.c b/src/mpi/coll/src/csel_container.c index 3fdc2af12f8..3e6f220fab0 100644 --- a/src/mpi/coll/src/csel_container.c +++ b/src/mpi/coll/src/csel_container.c @@ -380,473 +380,7 @@ void *MPII_Create_container(struct json_object *obj) json_object_object_foreach(obj, key, val) { char *ckey = MPL_strdup_no_spaces(key); - if (!strcmp(ckey, "algorithm=MPIR_Allgather_intra_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Allgather_intra_k_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_k_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Allgather_intra_recursive_doubling")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Allgather_intra_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Allgather_intra_recexch_doubling")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Allgather_intra_recexch_halving")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_halving; - else if (!strcmp(ckey, "algorithm=MPIR_Allgather_inter_local_gather_remote_bcast")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_inter_local_gather_remote_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Allgather_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Allgatherv_intra_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Allgatherv_intra_recursive_doubling")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Allgatherv_intra_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_intra_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Allgatherv_inter_remote_gather_local_bcast")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_inter_remote_gather_local_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Allgatherv_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgatherv_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_recursive_doubling")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_recursive_multiplying")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_multiplying; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_reduce_scatter_allgather")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_reduce_scatter_allgather; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_recexch")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recexch; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_k_reduce_scatter_allgather")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_k_reduce_scatter_allgather; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_intra_ccl")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ccl; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_inter_reduce_exchange_bcast")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_inter_reduce_exchange_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Allreduce_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoall_intra_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoall_intra_k_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_k_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoall_intra_pairwise")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoall_intra_pairwise_sendrecv_replace")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_pairwise_sendrecv_replace; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoall_intra_scattered")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_scattered; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoall_inter_pairwise_exchange")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_inter_pairwise_exchange; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoall_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallv_intra_pairwise_sendrecv_replace")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_pairwise_sendrecv_replace; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallv_intra_scattered")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_intra_scattered; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallv_inter_pairwise_exchange")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_inter_pairwise_exchange; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallv_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallv_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallw_intra_pairwise_sendrecv_replace")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_pairwise_sendrecv_replace; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallw_intra_scattered")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallw_inter_pairwise_exchange")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange; - else if (!strcmp(ckey, "algorithm=MPIR_Alltoallw_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Barrier_intra_k_dissemination")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination; - else if (!strcmp(ckey, "algorithm=MPIR_Barrier_intra_recexch")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch; - else if (!strcmp(ckey, "algorithm=MPIR_Barrier_intra_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Barrier_inter_bcast")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_inter_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Barrier_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_intra_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_binomial; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_intra_scatter_recursive_doubling_allgather")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_recursive_doubling_allgather; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_intra_scatter_ring_allgather")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_scatter_ring_allgather; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_intra_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_intra_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_intra_pipelined_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_pipelined_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_inter_remote_send_local_bcast")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_inter_remote_send_local_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Bcast_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Exscan_intra_recursive_doubling")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_intra_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Exscan_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Exscan_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Gather_intra_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_intra_binomial; - else if (!strcmp(ckey, "algorithm=MPIR_Gather_inter_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Gather_inter_local_gather_remote_send")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_inter_local_gather_remote_send; - else if (!strcmp(ckey, "algorithm=MPIR_Gather_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gather_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Gatherv_allcomm_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Gatherv_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Gatherv_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_intra_tsp_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_intra_sched_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_intra_sched_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_sched_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_intra_tsp_recexch_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_intra_tsp_recexch_halving")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_recexch_halving; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_intra_tsp_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_intra_tsp_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgather_inter_sched_local_gather_remote_bcast")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgather_inter_sched_local_gather_remote_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_intra_tsp_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_intra_sched_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_intra_sched_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_sched_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_intra_tsp_recexch_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_intra_tsp_recexch_halving")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_recexch_halving; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_intra_tsp_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_intra_tsp_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Iallgatherv_inter_sched_remote_gather_local_bcast")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallgatherv_inter_sched_remote_gather_local_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_sched_naive")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_naive; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_sched_reduce_scatter_allgather")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_reduce_scatter_allgather; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_tsp_recexch_single_buffer")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_tsp_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_tsp_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_ring; - else if (!strcmp - (ckey, - "algorithm=MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_intra_sched_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_sched_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Iallreduce_inter_sched_remote_reduce_local_bcast")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_inter_sched_remote_reduce_local_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_intra_tsp_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_intra_tsp_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_intra_tsp_scattered")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_tsp_scattered; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_intra_sched_brucks")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_brucks; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_intra_sched_inplace")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_inplace; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_intra_sched_pairwise")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_pairwise; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_intra_sched_permuted_sendrecv")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_intra_sched_permuted_sendrecv; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoall_inter_sched_pairwise_exchange")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoall_inter_sched_pairwise_exchange; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallv_intra_sched_blocked")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_blocked; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallv_intra_sched_inplace")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_sched_inplace; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallv_intra_tsp_scattered")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_scattered; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallv_intra_tsp_blocked")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_blocked; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallv_intra_tsp_inplace")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_intra_tsp_inplace; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallv_inter_sched_pairwise_exchange")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallv_inter_sched_pairwise_exchange; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallw_intra_tsp_blocked")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_blocked; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallw_intra_tsp_inplace")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_tsp_inplace; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallw_intra_sched_blocked")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_blocked; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallw_intra_sched_inplace")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_intra_sched_inplace; - else if (!strcmp(ckey, "algorithm=MPIR_Ialltoallw_inter_sched_pairwise_exchange")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ialltoallw_inter_sched_pairwise_exchange; - else if (!strcmp(ckey, "algorithm=MPIR_Ibarrier_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_sched_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Ibarrier_intra_tsp_recexch")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_recexch; - else if (!strcmp(ckey, "algorithm=MPIR_Ibarrier_intra_tsp_k_dissem")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_intra_tsp_k_dissemination; - else if (!strcmp(ckey, "algorithm=MPIR_Ibarrier_inter_sched_bcast")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibarrier_inter_sched_bcast; - else if (!strcmp(ckey, "algorithm=MPIR_Ibcast_intra_tsp_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv; - else if (!strcmp(ckey, "algorithm=MPIR_Ibcast_intra_tsp_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Ibcast_intra_sched_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_binomial; - else if (!strcmp - (ckey, "algorithm=MPIR_Ibcast_intra_sched_scatter_recursive_doubling_allgather")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_recursive_doubling_allgather; - else if (!strcmp(ckey, "algorithm=MPIR_Ibcast_intra_sched_scatter_ring_allgather")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_scatter_ring_allgather; - else if (!strcmp(ckey, "algorithm=MPIR_Ibcast_intra_sched_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_sched_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Ibcast_inter_sched_flat")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_inter_sched_flat; - else if (!strcmp(ckey, "algorithm=MPIR_Iexscan_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iexscan_intra_sched_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Igather_intra_tsp_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_tsp_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Igather_intra_sched_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_intra_sched_binomial; - else if (!strcmp(ckey, "algorithm=MPIR_Igather_inter_sched_long")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_long; - else if (!strcmp(ckey, "algorithm=MPIR_Igather_inter_sched_short")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igather_inter_sched_short; - else if (!strcmp(ckey, "algorithm=MPIR_Igatherv_allcomm_tsp_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_tsp_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Igatherv_allcomm_sched_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Igatherv_allcomm_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_allgather_allcomm_tsp_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_tsp_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_allgather_allcomm_sched_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgather_allcomm_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_allgatherv_allcomm_tsp_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_tsp_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_allgatherv_allcomm_sched_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_allgatherv_allcomm_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_alltoall_allcomm_tsp_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_tsp_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_alltoall_allcomm_sched_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoall_allcomm_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_alltoallv_allcomm_tsp_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_tsp_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_alltoallv_allcomm_sched_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallv_allcomm_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_alltoallw_allcomm_tsp_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_tsp_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ineighbor_alltoallw_allcomm_sched_linear")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ineighbor_alltoallw_allcomm_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_intra_tsp_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_intra_tsp_ring")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_intra_sched_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_binomial; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_intra_sched_reduce_scatter_gather")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_reduce_scatter_gather; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_intra_sched_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_sched_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_inter_sched_local_reduce_remote_send")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_inter_sched_local_reduce_remote_send; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_intra_sched_noncommutative")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_noncommutative; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_intra_sched_pairwise")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_pairwise; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_intra_sched_recursive_halving")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_sched_recursive_halving; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_intra_tsp_recexch")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_intra_tsp_recexch; - else if (!strcmp - (ckey, "algorithm=MPIR_Ireduce_scatter_inter_sched_remote_reduce_local_scatterv")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_inter_sched_remote_reduce_local_scatterv; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_block_intra_tsp_recexch")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_tsp_recexch; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_block_intra_sched_noncommutative")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_noncommutative; - else if (!strcmp(ckey, "algorithm=MPIR_Ireduce_scatter_block_intra_sched_pairwise")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_pairwise; - else if (!strcmp - (ckey, "algorithm=MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_doubling; - else if (!strcmp - (ckey, "algorithm=MPIR_Ireduce_scatter_block_intra_sched_recursive_halving")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_intra_sched_recursive_halving; - else if (!strcmp - (ckey, - "algorithm=MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_scatter_block_inter_sched_remote_reduce_local_scatterv; - else if (!strcmp(ckey, "algorithm=MPIR_Iscan_intra_sched_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Iscan_intra_sched_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_sched_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Iscan_intra_tsp_recursive_doubling")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscan_intra_tsp_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Iscatter_intra_tsp_tree")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_tsp_tree; - else if (!strcmp(ckey, "algorithm=MPIR_Iscatter_intra_sched_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_intra_sched_binomial; - else if (!strcmp(ckey, "algorithm=MPIR_Iscatter_inter_sched_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Iscatter_inter_sched_remote_send_local_scatter")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatter_inter_sched_remote_send_local_scatter; - else if (!strcmp(ckey, "algorithm=MPIR_Iscatterv_allcomm_tsp_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_tsp_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Iscatterv_allcomm_sched_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iscatterv_allcomm_sched_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Neighbor_allgather_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgather_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Neighbor_allgatherv_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_allgatherv_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Neighbor_alltoall_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoall_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Neighbor_alltoallv_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallv_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Neighbor_alltoallw_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Neighbor_alltoallw_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_intra_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_binomial; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_intra_reduce_scatter_gather")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_reduce_scatter_gather; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_intra_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_intra_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_inter_local_reduce_remote_send")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_inter_local_reduce_remote_send; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_intra_noncommutative")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_noncommutative; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_intra_pairwise")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_pairwise; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_intra_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_intra_recursive_halving")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_intra_recursive_halving; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_inter_remote_reduce_local_scatter")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_inter_remote_reduce_local_scatter; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_block_intra_noncommutative")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_noncommutative; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_block_intra_pairwise")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_pairwise; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_block_intra_recursive_doubling")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_block_intra_recursive_halving")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_intra_recursive_halving; - else if (!strcmp - (ckey, "algorithm=MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_inter_remote_reduce_local_scatter; - else if (!strcmp(ckey, "algorithm=MPIR_Reduce_scatter_block_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Reduce_scatter_block_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Scan_intra_recursive_doubling")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_recursive_doubling; - else if (!strcmp(ckey, "algorithm=MPIR_Scan_intra_smp")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_intra_smp; - else if (!strcmp(ckey, "algorithm=MPIR_Scan_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scan_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Scatter_intra_binomial")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_intra_binomial; - else if (!strcmp(ckey, "algorithm=MPIR_Scatter_inter_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Scatter_inter_remote_send_local_scatter")) - cnt->id = - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_inter_remote_send_local_scatter; - else if (!strcmp(ckey, "algorithm=MPIR_Scatter_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatter_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Scatterv_allcomm_linear")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_linear; - else if (!strcmp(ckey, "algorithm=MPIR_Scatterv_allcomm_nb")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Scatterv_allcomm_nb; - else if (!strcmp(ckey, "algorithm=MPIR_Coll_auto")) - cnt->id = MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto; - else { - fprintf(stderr, "unrecognized key %s\n", key); - MPIR_Assert(0); - } + MPIR_COLL_SET_CONTAINER_ID(); MPL_free(ckey); } From 35a127f2c7481423cbb2e9ab6a4bb8b30c2e6be8 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Mon, 25 Aug 2025 11:40:35 -0500 Subject: [PATCH 31/47] misc: spelling check --- src/mpi/coll/allgather/allgather_intra_recexch.c | 2 +- src/mpi/coll/iallgather/iallgather_tsp_recexch.c | 2 +- src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mpi/coll/allgather/allgather_intra_recexch.c b/src/mpi/coll/allgather/allgather_intra_recexch.c index d2ee4702ba4..51fcb60040b 100644 --- a/src/mpi/coll/allgather/allgather_intra_recexch.c +++ b/src/mpi/coll/allgather/allgather_intra_recexch.c @@ -106,7 +106,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, MPIR_ERR_CHECK(mpi_errno); } - if (step1_sendto != -1) { /* non-participating rank sends the data to a partcipating rank */ + if (step1_sendto != -1) { /* non-participating rank sends the data to a participating rank */ void *buf_to_send; send_offset = rank * recv_extent * recvcount; if (is_inplace) diff --git a/src/mpi/coll/iallgather/iallgather_tsp_recexch.c b/src/mpi/coll/iallgather/iallgather_tsp_recexch.c index 5b849db5fd7..44d44d34736 100644 --- a/src/mpi/coll/iallgather/iallgather_tsp_recexch.c +++ b/src/mpi/coll/iallgather/iallgather_tsp_recexch.c @@ -70,7 +70,7 @@ static int MPIR_TSP_Iallgather_sched_intra_recexch_step1(int step1_sendto, int * int vtx_id; MPIR_FUNC_ENTER; - if (step1_sendto != -1) { /* non-participating rank sends the data to a partcipating rank */ + if (step1_sendto != -1) { /* non-participating rank sends the data to a participating rank */ void *buf_to_send; MPI_Aint send_offset = rank * recv_extent * recvcount; if (is_inplace) diff --git a/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c b/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c index f228b787192..c3cb3c680d3 100644 --- a/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c +++ b/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c @@ -78,7 +78,7 @@ static int MPIR_TSP_Iallgatherv_sched_intra_recexch_step1(int step1_sendto, int int i, vtx_id; MPIR_FUNC_ENTER; - if (step1_sendto != -1) { /* non-participating rank sends the data to a partcipating rank */ + if (step1_sendto != -1) { /* non-participating rank sends the data to a participating rank */ void *buf_to_send; MPI_Aint send_offset = displs[rank] * recv_extent; if (is_inplace) From e38853aab08c53a603e9571693a6093c4780587d Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sat, 23 Aug 2025 23:49:53 -0500 Subject: [PATCH 32/47] coll: remove the alias feature in coll_algorithms.txt We can easily create alias algorithms by defining a separate algorithm function that calls the generic routines. Thus, simplify the design by removing the alias feature in coll_algorithms.txt. This ensures a one-to-one entry for each collective algorithms with a matching algorithm function. Add iallreduce tsp_recexch algorithm since the function is used in multiple places. Similarly, add ibcast tsp_scatterv_allgatherv algorithm since it is used elsewhere internally. Remove enums such as IREDUCE_RECEXCH_TYPE_DISTANCE_DOUBLING/HALVING. The actual parameter is more like a boolean. --- maint/gen_coll.py | 4 +- src/include/mpir_coll.h | 4 +- .../coll/allgather/allgather_intra_recexch.c | 45 +++++++++---- src/mpi/coll/coll_algorithms.txt | 63 +++++++++---------- src/mpi/coll/cvars.txt | 2 + .../coll/iallgather/iallgather_tsp_recexch.c | 29 +++++++-- .../iallgatherv/iallgatherv_tsp_recexch.c | 37 +++++++++-- .../coll/iallreduce/iallreduce_tsp_recexch.c | 22 ++++++- ...ecexch_reduce_scatter_recexch_allgatherv.c | 7 +-- src/mpi/coll/ibcast/Makefile.mk | 1 - src/mpi/coll/ibcast/ibcast_tsp_auto.c | 3 +- .../ibcast/ibcast_tsp_scatterv_allgatherv.c | 30 +++++++-- src/mpi/coll/ibcast/ibcast_tsp_tree.c | 7 +++ src/mpi/coll/include/coll_impl.h | 1 + src/mpi/coll/include/coll_types.h | 24 ------- src/mpi/coll/ireduce/ireduce_tsp_tree.c | 10 +++ src/mpi/coll/src/csel_container.c | 11 ++-- 17 files changed, 193 insertions(+), 107 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index a80537322dc..443a5536127 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -371,9 +371,7 @@ def get_algo_extra_params(algo): # additional wrappers def get_algo_name(algo): # the name used in algo function name - if "func_name" in algo: - return algo['func_name'] - elif algo['name'].startswith('tsp_'): + if algo['name'].startswith('tsp_'): return algo['name'][4:] else: return algo['name'] diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index 9096520d61d..37ebf57144a 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -247,7 +247,7 @@ struct MPII_Csel_container { } intra_tsp_recexch_single_buffer; struct { int k; - } intra_tsp_recexch_multiple_buffer; + } intra_tsp_recexch; struct { int tree_type; int k; @@ -311,7 +311,7 @@ struct MPII_Csel_container { struct { int scatterv_k; int allgatherv_k; - } intra_tsp_scatterv_recexch_allgatherv; + } intra_tsp_scatterv_allgatherv; struct { int scatterv_k; } intra_tsp_scatterv_ring_allgatherv; diff --git a/src/mpi/coll/allgather/allgather_intra_recexch.c b/src/mpi/coll/allgather/allgather_intra_recexch.c index 51fcb60040b..7167a61d3d6 100644 --- a/src/mpi/coll/allgather/allgather_intra_recexch.c +++ b/src/mpi/coll/allgather/allgather_intra_recexch.c @@ -12,14 +12,15 @@ * on the recursive doubling algorithm described by Thakur et al, "Optimization of * Collective Communication Operations in MPICH", 2005. The recursive doubling * algorithm has been extended for any radix k. A variant of the algorithm called - * as distance halving (selected by setting recexch_type=1) is based on the + * as distance halving (selected by setting is_halving=true) is based on the * paper, Sack et al, "Faster topology-aware collective algorithms through * non-minimal communication", 2012. * */ -int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, - MPI_Datatype recvtype, MPIR_Comm * comm, - int recexch_type, int k, int single_phase_recv, int coll_attr) +static int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, + MPI_Datatype recvtype, MPIR_Comm * comm, + bool is_halving, int k, int single_phase_recv, + int coll_attr) { int mpi_errno = MPI_SUCCESS; int is_inplace, i, j; @@ -134,7 +135,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, /* For distance halving algorithm, exchange the data with digit reversed partner * so that finally the data is in the correct order. */ - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) { + if (is_halving) { if (step1_sendto == -1) { /* get the partner with whom I should exchange data */ partner = MPII_Recexchalgo_reverse_digits_step2(rank, nranks, k); @@ -163,7 +164,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, } } - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) { + if (is_halving) { phase = step2_nphases - 1; recv_phase = step2_nphases - 1; } else { @@ -179,7 +180,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, for (iter = 0; iter < total_phases && (j + iter) < step2_nphases; iter++) { for (i = 0; i < k - 1; i++) { nbr = step2_nbrs[recv_phase][i]; - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) + if (is_halving) rank_for_offset = MPII_Recexchalgo_reverse_digits_step2(nbr, nranks, k); else rank_for_offset = nbr; @@ -191,7 +192,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, MPIR_ALLGATHER_TAG, comm, &recv_reqs[num_rreq++]); MPIR_ERR_CHECK(mpi_errno); } - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) + if (is_halving) recv_phase--; else recv_phase++; @@ -200,7 +201,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, /* send data to all the neighbors */ for (i = 0; i < k - 1; i++) { nbr = step2_nbrs[phase][i]; - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) + if (is_halving) rank_for_offset = MPII_Recexchalgo_reverse_digits_step2(rank, nranks, k); else rank_for_offset = rank; @@ -214,7 +215,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, /* wait on prev recvs */ MPIC_Waitall((k - 1), recv_reqs, MPI_STATUSES_IGNORE); - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) + if (is_halving) phase--; else phase++; @@ -225,7 +226,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, /* send data to all the neighbors once more */ for (i = 0; i < k - 1; i++) { nbr = step2_nbrs[phase][i]; - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) + if (is_halving) rank_for_offset = MPII_Recexchalgo_reverse_digits_step2(rank, nranks, k); else rank_for_offset = rank; @@ -242,7 +243,7 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, mpi_errno = MPIC_Waitall((k - 1), recv_reqs + (k - 1), MPI_STATUSES_IGNORE); MPIR_ERR_CHECK(mpi_errno); - if (recexch_type == MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING) + if (is_halving) phase--; else phase++; @@ -286,3 +287,21 @@ int MPIR_Allgather_intra_recexch(const void *sendbuf, MPI_Aint sendcount, fn_fail: goto fn_exit; } + +int MPIR_Allgather_intra_recexch_doubling(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, + MPI_Datatype recvtype, MPIR_Comm * comm, + int k, int single_phase_recv, int coll_attr) +{ + return MPIR_Allgather_intra_recexch(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, + comm, false, k, single_phase_recv, coll_attr); +} + +int MPIR_Allgather_intra_recexch_halving(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, + MPI_Datatype recvtype, MPIR_Comm * comm, + int k, int single_phase_recv, int coll_attr) +{ + return MPIR_Allgather_intra_recexch(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, + comm, true, k, single_phase_recv, coll_attr); +} diff --git a/src/mpi/coll/coll_algorithms.txt b/src/mpi/coll/coll_algorithms.txt index 0279538dca4..014aa392754 100644 --- a/src/mpi/coll/coll_algorithms.txt +++ b/src/mpi/coll/coll_algorithms.txt @@ -37,8 +37,6 @@ # the same order. In addition, some of the extra parameter can be constant by specify # a initialization, e.g. param1=val. Rather than repeating the same constant in the # cvar_params, we can use `-` as placeholder for the corresponding constant param. -# * func_name: -# Some algorithm use another algorithm or use a different function name. barrier-intra: k_dissemination @@ -84,17 +82,18 @@ ibcast-intra: tsp_tree extra_params: tree_type, k, chunk_size cvar_params: TREE_TYPE, TREE_KVAL, TREE_PIPELINE_CHUNK_SIZE - tsp_scatterv_recexch_allgatherv - func_name: scatterv_allgatherv - extra_params: allgatherv_algo=MPIR_CVAR_IALLGATHERV_INTRA_ALGORITHM_tsp_recexch_doubling, scatterv_k, allgatherv_k + tsp_scatterv_allgatherv + extra_params: use_ring=0, scatterv_k, allgatherv_k cvar_params: -, SCATTERV_KVAL, ALLGATHERV_RECEXCH_KVAL + tsp_scatterv_recexch_allgatherv + extra_params: scatterv_k, allgatherv_k + cvar_params: SCATTERV_KVAL, ALLGATHERV_RECEXCH_KVAL tsp_scatterv_ring_allgatherv extra_params: scatterv_k cvar_params: SCATTERV_KVAL tsp_ring - func_name: tree - extra_params: tree_type=MPIR_TREE_TYPE_KARY, k=1, chunk_size - cvar_params: -, -, RING_CHUNK_SIZE + extra_params: chunk_size + cvar_params: RING_CHUNK_SIZE bcast-inter: remote_send_local_bcast ibcast-inter: @@ -171,13 +170,11 @@ allgather-intra: restrictions: power-of-two ring recexch_doubling - func_name: recexch - extra_params: recexch_type=MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_DOUBLING, k, single_phase_recv - cvar_params: -, RECEXCH_KVAL, RECEXCH_SINGLE_PHASE_RECV + extra_params: k, single_phase_recv + cvar_params: RECEXCH_KVAL, RECEXCH_SINGLE_PHASE_RECV recexch_halving - func_name: recexch - extra_params: recexch_type=MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING, k, single_phase_recv - cvar_params: -, RECEXCH_KVAL, RECEXCH_SINGLE_PHASE_RECV + extra_params: k, single_phase_recv + cvar_params: RECEXCH_KVAL, RECEXCH_SINGLE_PHASE_RECV allgather-inter: local_gather_remote_bcast iallgather-intra: @@ -190,13 +187,11 @@ iallgather-intra: extra_params: k cvar_params: BRUCKS_KVAL tsp_recexch_doubling - func_name: recexch - extra_params: recexch_type=MPIR_IALLGATHER_RECEXCH_TYPE_DISTANCE_DOUBLING, k - cvar_params: -, RECEXCH_KVAL + extra_params: k + cvar_params: RECEXCH_KVAL tsp_recexch_halving - func_name: recexch - extra_params: recexch_type=MPIR_IALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING, k - cvar_params: -, RECEXCH_KVAL + extra_params: k + cvar_params: RECEXCH_KVAL iallgather-inter: sched_local_gather_remote_bcast @@ -214,14 +209,12 @@ iallgatherv-intra: sched_ring tsp_recexch_doubling restrictions: displs-ordered - func_name: recexch - extra_params: recexch_type=MPIR_IALLGATHERV_RECEXCH_TYPE_DISTANCE_DOUBLING, k - cvar_params: -, RECEXCH_KVAL + extra_params: k + cvar_params: RECEXCH_KVAL tsp_recexch_halving restrictions: displs-ordered - func_name: recexch - extra_params: recexch_type=MPIR_IALLGATHERV_RECEXCH_TYPE_DISTANCE_HALVING, k - cvar_params: -, RECEXCH_KVAL + extra_params: k + cvar_params: RECEXCH_KVAL tsp_ring tsp_brucks extra_params: k @@ -328,9 +321,8 @@ ireduce-intra: extra_params: tree_type, k, chunk_size, buffer_per_child cvar_params: TREE_TYPE, TREE_KVAL, TREE_PIPELINE_CHUNK_SIZE, TREE_BUFFER_PER_CHILD tsp_ring - func_name: tree - extra_params: tree_type=MPIR_TREE_TYPE_KARY, k=1, chunk_size, buffer_per_child - cvar_params: -, -, RING_CHUNK_SIZE, TREE_BUFFER_PER_CHILD + extra_params: chunk_size, buffer_per_child + cvar_params: RING_CHUNK_SIZE, TREE_BUFFER_PER_CHILD ireduce-inter: sched_local_reduce_remote_send @@ -369,12 +361,13 @@ iallreduce-intra: sched_reduce_scatter_allgather restrictions: size-ge-pof2, builtin-op tsp_recexch_single_buffer - func_name: recexch - extra_params: recexch_type=MPIR_IALLREDUCE_RECEXCH_TYPE_SINGLE_BUFFER, k - cvar_params: -, RECEXCH_KVAL + extra_params: k + cvar_params: RECEXCH_KVAL tsp_recexch_multiple_buffer - func_name: recexch - extra_params: recexch_type=MPIR_IALLREDUCE_RECEXCH_TYPE_MULTIPLE_BUFFER, k + extra_params: k + cvar_params: RECEXCH_KVAL + tsp_recexch + extra_params: per_nbr_buffer=1, k cvar_params: -, RECEXCH_KVAL tsp_tree extra_params: tree_type, k, chunk_size, buffer_per_child @@ -407,7 +400,7 @@ ireduce_scatter-intra: restrictions: commutative tsp_recexch restrictions: commutative - extra_params: recexch_type=IREDUCE_SCATTER_RECEXCH_TYPE_DISTANCE_DOUBLING, k + extra_params: is_dist_halving=0, k cvar_params: -, RECEXCH_KVAL ireduce_scatter-inter: sched_remote_reduce_local_scatterv diff --git a/src/mpi/coll/cvars.txt b/src/mpi/coll/cvars.txt index 8737d315b5b..c54a6a36d04 100644 --- a/src/mpi/coll/cvars.txt +++ b/src/mpi/coll/cvars.txt @@ -384,6 +384,7 @@ cvars: sched_scatter_recursive_doubling_allgather - Force Scatter Recursive Doubling Allgather algorithm sched_scatter_ring_allgather - Force Scatter Ring Allgather algorithm tsp_tree - Force Generic Transport Tree algorithm + tsp_scatterv_allgatherv - Force Generic Transport Scatterv followed by Allgatherv algorithm tsp_scatterv_recexch_allgatherv - Force Generic Transport Scatterv followed by Recursive Exchange Allgatherv algorithm tsp_scatterv_ring_allgatherv - Force Generic Transport Scatterv followed by Ring Allgatherv algorithm tsp_ring - Force Generic Transport Ring algorithm @@ -1639,6 +1640,7 @@ cvars: sched_smp - Force smp algorithm sched_recursive_doubling - Force recursive doubling algorithm sched_reduce_scatter_allgather - Force reduce scatter allgather algorithm + tsp_recexch - Force generic transport recursive exchange algorithm tsp_recexch_single_buffer - Force generic transport recursive exchange with single buffer for receives tsp_recexch_multiple_buffer - Force generic transport recursive exchange with multiple buffers for receives tsp_tree - Force generic transport tree algorithm diff --git a/src/mpi/coll/iallgather/iallgather_tsp_recexch.c b/src/mpi/coll/iallgather/iallgather_tsp_recexch.c index 44d44d34736..1e309b52f2b 100644 --- a/src/mpi/coll/iallgather/iallgather_tsp_recexch.c +++ b/src/mpi/coll/iallgather/iallgather_tsp_recexch.c @@ -219,11 +219,11 @@ static int MPIR_TSP_Iallgather_sched_intra_recexch_step3(int step1_sendto, int * * paper, Sack et al, "Faster topology-aware collective algorithms through * non-minimal communication", 2012. * */ -int MPIR_TSP_Iallgather_sched_intra_recexch(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - MPI_Aint recvcount, MPI_Datatype recvtype, - MPIR_Comm * comm, int is_dist_halving, int k, - MPIR_TSP_sched_t sched) +static int MPIR_TSP_Iallgather_sched_intra_recexch(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, + MPI_Aint recvcount, MPI_Datatype recvtype, + MPIR_Comm * comm, int is_dist_halving, int k, + MPIR_TSP_sched_t sched) { int mpi_errno = MPI_SUCCESS; int is_inplace, i; @@ -321,3 +321,22 @@ int MPIR_TSP_Iallgather_sched_intra_recexch(const void *sendbuf, MPI_Aint sendco fn_fail: goto fn_exit; } + +int MPIR_TSP_Iallgather_sched_intra_recexch_doubling(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, + MPI_Aint recvcount, MPI_Datatype recvtype, + MPIR_Comm * comm, int k, + MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Iallgather_sched_intra_recexch(sendbuf, sendcount, sendtype, + recvbuf, recvcount, recvtype, comm, 0, k, sched); +} + +int MPIR_TSP_Iallgather_sched_intra_recexch_halving(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, + MPI_Aint recvcount, MPI_Datatype recvtype, + MPIR_Comm * comm, int k, MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Iallgather_sched_intra_recexch(sendbuf, sendcount, sendtype, + recvbuf, recvcount, recvtype, comm, 1, k, sched); +} diff --git a/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c b/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c index c3cb3c680d3..cf242a1498e 100644 --- a/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c +++ b/src/mpi/coll/iallgatherv/iallgatherv_tsp_recexch.c @@ -232,11 +232,12 @@ static int MPIR_TSP_Iallgatherv_sched_intra_recexch_step3(int step1_sendto, int } /* Routine to schedule a recursive exchange based allgather */ -int MPIR_TSP_Iallgatherv_sched_intra_recexch(const void *sendbuf, MPI_Aint sendcount, - MPI_Datatype sendtype, void *recvbuf, - const MPI_Aint * recvcounts, const MPI_Aint * displs, - MPI_Datatype recvtype, MPIR_Comm * comm, - int is_dist_halving, int k, MPIR_TSP_sched_t sched) +static int MPIR_TSP_Iallgatherv_sched_intra_recexch(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, + const MPI_Aint * recvcounts, + const MPI_Aint * displs, MPI_Datatype recvtype, + MPIR_Comm * comm, int is_dist_halving, int k, + MPIR_TSP_sched_t sched) { int mpi_errno = MPI_SUCCESS; int is_inplace, i; @@ -334,3 +335,29 @@ int MPIR_TSP_Iallgatherv_sched_intra_recexch(const void *sendbuf, MPI_Aint sendc fn_fail: goto fn_exit; } + +int MPIR_TSP_Iallgatherv_sched_intra_recexch_doubling(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, + const MPI_Aint * recvcounts, + const MPI_Aint * displs, + MPI_Datatype recvtype, MPIR_Comm * comm, + int k, MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Iallgatherv_sched_intra_recexch(sendbuf, sendcount, sendtype, + recvbuf, recvcounts, displs, recvtype, comm, + 0, k, sched); + +} + +int MPIR_TSP_Iallgatherv_sched_intra_recexch_halving(const void *sendbuf, MPI_Aint sendcount, + MPI_Datatype sendtype, void *recvbuf, + const MPI_Aint * recvcounts, + const MPI_Aint * displs, MPI_Datatype recvtype, + MPIR_Comm * comm, int k, + MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Iallgatherv_sched_intra_recexch(sendbuf, sendcount, sendtype, + recvbuf, recvcounts, displs, recvtype, comm, + 1, k, sched); + +} diff --git a/src/mpi/coll/iallreduce/iallreduce_tsp_recexch.c b/src/mpi/coll/iallreduce/iallreduce_tsp_recexch.c index 724d27d14b1..85bc5080fab 100644 --- a/src/mpi/coll/iallreduce/iallreduce_tsp_recexch.c +++ b/src/mpi/coll/iallreduce/iallreduce_tsp_recexch.c @@ -9,8 +9,8 @@ #include "iallreduce_tsp_recursive_exchange_common.h" /* Routine to schedule a recursive exchange based allreduce */ -int MPIR_TSP_Iallreduce_sched_intra_recexch(const void *sendbuf, void *recvbuf, MPI_Aint count, - MPI_Datatype datatype, MPI_Op op, +int MPIR_TSP_Iallreduce_sched_intra_recexch(const void *sendbuf, void *recvbuf, + MPI_Aint count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm * comm, int per_nbr_buffer, int k, MPIR_TSP_sched_t sched) { @@ -275,3 +275,21 @@ int MPIR_TSP_Iallreduce_sched_intra_recexch(const void *sendbuf, void *recvbuf, fn_fail: goto fn_exit; } + +int MPIR_TSP_Iallreduce_sched_intra_recexch_single_buffer(const void *sendbuf, void *recvbuf, + MPI_Aint count, MPI_Datatype datatype, + MPI_Op op, MPIR_Comm * comm, int k, + MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Iallreduce_sched_intra_recexch(sendbuf, recvbuf, count, datatype, op, comm, + 0, k, sched); +} + +int MPIR_TSP_Iallreduce_sched_intra_recexch_multiple_buffer(const void *sendbuf, void *recvbuf, + MPI_Aint count, MPI_Datatype datatype, + MPI_Op op, MPIR_Comm * comm, int k, + MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Iallreduce_sched_intra_recexch(sendbuf, recvbuf, count, datatype, op, comm, + 1, k, sched); +} diff --git a/src/mpi/coll/iallreduce/iallreduce_tsp_recexch_reduce_scatter_recexch_allgatherv.c b/src/mpi/coll/iallreduce/iallreduce_tsp_recexch_reduce_scatter_recexch_allgatherv.c index bd7bf934414..76e1dbdc942 100644 --- a/src/mpi/coll/iallreduce/iallreduce_tsp_recexch_reduce_scatter_recexch_allgatherv.c +++ b/src/mpi/coll/iallreduce/iallreduce_tsp_recexch_reduce_scatter_recexch_allgatherv.c @@ -37,8 +37,7 @@ int MPIR_TSP_Iallreduce_sched_intra_recexch_reduce_scatter_recexch_allgatherv(co void *tmp_recvbuf; void **step1_recvbuf = NULL; int tag, vtx_id; - int allgather_algo_type = MPIR_IALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING; - int redscat_algo_type = IREDUCE_SCATTER_RECEXCH_TYPE_DISTANCE_HALVING; + int is_dist_halving = 1; MPIR_CHKLMEM_DECL(); MPIR_FUNC_ENTER; @@ -114,7 +113,7 @@ int MPIR_TSP_Iallreduce_sched_intra_recexch_reduce_scatter_recexch_allgatherv(co MPIR_TSP_Ireduce_scatter_sched_intra_recexch_step2(recvbuf, tmp_recvbuf, cnts, displs, datatype, op, extent, tag, - comm, k, redscat_algo_type, + comm, k, is_dist_halving, step2_nphases, step2_nbrs, rank, nranks, sink_id, 0, NULL, sched); @@ -123,7 +122,7 @@ int MPIR_TSP_Iallreduce_sched_intra_recexch_reduce_scatter_recexch_allgatherv(co MPIR_TSP_Iallgatherv_sched_intra_recexch_step2(step1_sendto, step2_nphases, step2_nbrs, rank, nranks, k, p_of_k, log_pofk, T, &nvtcs, &recv_id, tag, recvbuf, extent, cnts, displs, - datatype, allgather_algo_type, comm, sched); + datatype, is_dist_halving, comm, sched); } diff --git a/src/mpi/coll/ibcast/Makefile.mk b/src/mpi/coll/ibcast/Makefile.mk index 11c9cdf492d..d804bb7a40e 100644 --- a/src/mpi/coll/ibcast/Makefile.mk +++ b/src/mpi/coll/ibcast/Makefile.mk @@ -14,7 +14,6 @@ mpi_core_sources += \ src/mpi/coll/ibcast/ibcast_intra_sched_smp.c \ src/mpi/coll/ibcast/ibcast_inter_sched_flat.c \ src/mpi/coll/ibcast/ibcast_tsp_scatterv_allgatherv.c \ - src/mpi/coll/ibcast/ibcast_tsp_scatterv_ring_allgatherv.c \ src/mpi/coll/ibcast/ibcast_tsp_tree.c \ src/mpi/coll/ibcast/ibcast_tsp_auto.c \ src/mpi/coll/ibcast/ibcast_utils.c diff --git a/src/mpi/coll/ibcast/ibcast_tsp_auto.c b/src/mpi/coll/ibcast/ibcast_tsp_auto.c index 3965500adb7..9278a5787e7 100644 --- a/src/mpi/coll/ibcast/ibcast_tsp_auto.c +++ b/src/mpi/coll/ibcast/ibcast_tsp_auto.c @@ -37,8 +37,7 @@ static int MPIR_Ibcast_sched_intra_tsp_flat_auto(void *buffer, MPI_Aint count, /* gentran scatterv recexch allgather with radix 2 */ mpi_errno = MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(buffer, count, datatype, root, - comm_ptr, - MPIR_CVAR_IALLGATHERV_INTRA_ALGORITHM_tsp_recexch_doubling, + comm_ptr, 0, scatterv_k, allgatherv_k, sched); } MPIR_ERR_CHECK(mpi_errno); diff --git a/src/mpi/coll/ibcast/ibcast_tsp_scatterv_allgatherv.c b/src/mpi/coll/ibcast/ibcast_tsp_scatterv_allgatherv.c index b1fb176a50c..8b98132840c 100644 --- a/src/mpi/coll/ibcast/ibcast_tsp_scatterv_allgatherv.c +++ b/src/mpi/coll/ibcast/ibcast_tsp_scatterv_allgatherv.c @@ -10,7 +10,7 @@ /* Routine to schedule a scatter followed by recursive exchange based broadcast */ int MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, - MPIR_Comm * comm, int allgatherv_algo, + MPIR_Comm * comm, int use_ring, int scatterv_k, int allgatherv_k, MPIR_TSP_sched_t sched) { @@ -181,7 +181,7 @@ int MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(void *buffer, MPI_Aint count mpi_errno = MPIR_TSP_sched_fence(sched); /* wait for scatter to complete */ MPIR_ERR_CHECK(mpi_errno); - if (allgatherv_algo == MPIR_CVAR_IALLGATHERV_INTRA_ALGORITHM_tsp_ring) + if (use_ring) /* Schedule Allgatherv ring */ mpi_errno = MPIR_TSP_Iallgatherv_sched_intra_ring(MPI_IN_PLACE, cnts[rank], MPIR_BYTE_INTERNAL, @@ -190,9 +190,10 @@ int MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(void *buffer, MPI_Aint count else /* Schedule Allgatherv recexch */ mpi_errno = - MPIR_TSP_Iallgatherv_sched_intra_recexch(MPI_IN_PLACE, cnts[rank], MPIR_BYTE_INTERNAL, - tmp_buf, cnts, displs, MPIR_BYTE_INTERNAL, - comm, 0, allgatherv_k, sched); + MPIR_TSP_Iallgatherv_sched_intra_recexch_doubling(MPI_IN_PLACE, cnts[rank], + MPIR_BYTE_INTERNAL, tmp_buf, cnts, + displs, MPIR_BYTE_INTERNAL, comm, + allgatherv_k, sched); MPIR_ERR_CHECK(mpi_errno); if (!is_contig) { @@ -214,3 +215,22 @@ int MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(void *buffer, MPI_Aint count fn_fail: goto fn_exit; } + +int MPIR_TSP_Ibcast_sched_intra_scatterv_ring_allgatherv(void *buffer, MPI_Aint count, + MPI_Datatype datatype, int root, + MPIR_Comm * comm, int scatterv_k, + MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(buffer, count, datatype, root, comm, + 1, scatterv_k, 0, sched); +} + +int MPIR_TSP_Ibcast_sched_intra_scatterv_recexch_allgatherv(void *buffer, MPI_Aint count, + MPI_Datatype datatype, int root, + MPIR_Comm * comm, int scatterv_k, + int allgatherv_k, + MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Ibcast_sched_intra_scatterv_allgatherv(buffer, count, datatype, root, comm, + 0, scatterv_k, allgatherv_k, sched); +} diff --git a/src/mpi/coll/ibcast/ibcast_tsp_tree.c b/src/mpi/coll/ibcast/ibcast_tsp_tree.c index 1a720bccb00..130d447c6e9 100644 --- a/src/mpi/coll/ibcast/ibcast_tsp_tree.c +++ b/src/mpi/coll/ibcast/ibcast_tsp_tree.c @@ -98,3 +98,10 @@ int MPIR_TSP_Ibcast_sched_intra_tree(void *buffer, MPI_Aint count, MPI_Datatype fn_fail: goto fn_exit; } + +int MPIR_TSP_Ibcast_sched_intra_ring(void *buffer, MPI_Aint count, MPI_Datatype datatype, int root, + MPIR_Comm * comm, int chunk_size, MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Ibcast_sched_intra_tree(buffer, count, datatype, root, comm, + MPIR_TREE_TYPE_KARY, 1, chunk_size, sched); +} diff --git a/src/mpi/coll/include/coll_impl.h b/src/mpi/coll/include/coll_impl.h index 8534a0c8df3..05ac1b54b41 100644 --- a/src/mpi/coll/include/coll_impl.h +++ b/src/mpi/coll/include/coll_impl.h @@ -62,6 +62,7 @@ int MPIR_Coll_safe_to_block(void); int MPII_Coll_finalize(void); void MPIR_Coll_algo_init(void); +void MPIR_Coll_cvar_init(void); /* NOTE: MPIR_Coll_auto is one of the composition container functions. However, * MPIR_Coll_composition_auto is a gate function, thus does not take "cnt" parameter. */ int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig); diff --git a/src/mpi/coll/include/coll_types.h b/src/mpi/coll/include/coll_types.h index a32ce6c551d..57e389a98c2 100644 --- a/src/mpi/coll/include/coll_types.h +++ b/src/mpi/coll/include/coll_types.h @@ -29,30 +29,6 @@ enum { MPIR_IALLREDUCE_RECEXCH_TYPE_MULTIPLE_BUFFER }; -/* enumerator for different recexch types */ -enum { - MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_DOUBLING = 0, - MPIR_ALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING -}; - -/* enumerator for different recexch types */ -enum { - MPIR_IALLGATHER_RECEXCH_TYPE_DISTANCE_DOUBLING = 0, - MPIR_IALLGATHER_RECEXCH_TYPE_DISTANCE_HALVING -}; - -/* enumerator for different recexch types */ -enum { - MPIR_IALLGATHERV_RECEXCH_TYPE_DISTANCE_DOUBLING = 0, - MPIR_IALLGATHERV_RECEXCH_TYPE_DISTANCE_HALVING -}; - -/* enumerator for different reduce scatter types */ -enum { - IREDUCE_SCATTER_RECEXCH_TYPE_DISTANCE_DOUBLING = 0, - IREDUCE_SCATTER_RECEXCH_TYPE_DISTANCE_HALVING -}; - /* Collectives request data structure */ typedef struct MPII_Coll_req_t { void *sched; /* pointer to the schedule */ diff --git a/src/mpi/coll/ireduce/ireduce_tsp_tree.c b/src/mpi/coll/ireduce/ireduce_tsp_tree.c index 0c0160eb117..bb3984b4720 100644 --- a/src/mpi/coll/ireduce/ireduce_tsp_tree.c +++ b/src/mpi/coll/ireduce/ireduce_tsp_tree.c @@ -265,3 +265,13 @@ int MPIR_TSP_Ireduce_sched_intra_tree(const void *sendbuf, void *recvbuf, MPI_Ai fn_fail: goto fn_exit; } + +int MPIR_TSP_Ireduce_sched_intra_ring(const void *sendbuf, void *recvbuf, MPI_Aint count, + MPI_Datatype datatype, MPI_Op op, int root, + MPIR_Comm * comm, int chunk_size, + int buffer_per_child, MPIR_TSP_sched_t sched) +{ + return MPIR_TSP_Ireduce_sched_intra_tree(sendbuf, recvbuf, count, datatype, op, root, comm, + MPIR_TREE_TYPE_KARY, 1, chunk_size, buffer_per_child, + sched); +} diff --git a/src/mpi/coll/src/csel_container.c b/src/mpi/coll/src/csel_container.c index 3e6f220fab0..2c54f0218d7 100644 --- a/src/mpi/coll/src/csel_container.c +++ b/src/mpi/coll/src/csel_container.c @@ -244,15 +244,15 @@ static void parse_container_params(struct json_object *obj, MPII_Csel_container_ } break; - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_recexch_allgatherv: + case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_allgatherv: { json_object_object_foreach(obj, key, val) { ckey = MPL_strdup_no_spaces(key); if (!strncmp(ckey, "scatterv_k=", strlen("scatterv_k="))) - cnt->u.ibcast.intra_tsp_scatterv_recexch_allgatherv.scatterv_k = + cnt->u.ibcast.intra_tsp_scatterv_allgatherv.scatterv_k = atoi(ckey + strlen("scatterv_k=")); else if (!strncmp(ckey, "allgatherv_k=", strlen("allgatherv_k="))) - cnt->u.ibcast.intra_tsp_scatterv_recexch_allgatherv.allgatherv_k = + cnt->u.ibcast.intra_tsp_scatterv_allgatherv.allgatherv_k = atoi(ckey + strlen("allgatherv_k=")); MPL_free(ckey); } @@ -271,13 +271,12 @@ static void parse_container_params(struct json_object *obj, MPII_Csel_container_ } break; - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_multiple_buffer: + case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch: { json_object_object_foreach(obj, key, val) { ckey = MPL_strdup_no_spaces(key); if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.iallreduce.intra_tsp_recexch_multiple_buffer.k = - atoi(ckey + strlen("k=")); + cnt->u.iallreduce.intra_tsp_recexch.k = atoi(ckey + strlen("k=")); MPL_free(ckey); } } From d16794927b7c66ad0362f3de95e84e0c5827ec31 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Mon, 25 Aug 2025 09:57:46 -0500 Subject: [PATCH 33/47] coll: remove MPIR_Csel_root Replaced by MPIR_Csel_composition and MPIR_Csel_selection. --- maint/json_gen.sh | 2 - maint/tuning/coll/ch4/generic.json | 213 ----- maint/tuning/coll/ch4/posix_generic.json | 48 -- maint/tuning/coll/mpir/generic.json | 965 ----------------------- src/mpi/coll/include/coll_impl.h | 3 - src/mpi/coll/src/coll_impl.c | 20 +- src/mpi/comm/commutil.c | 1 - 7 files changed, 4 insertions(+), 1248 deletions(-) delete mode 100644 maint/tuning/coll/ch4/generic.json delete mode 100644 maint/tuning/coll/ch4/posix_generic.json delete mode 100644 maint/tuning/coll/mpir/generic.json diff --git a/maint/json_gen.sh b/maint/json_gen.sh index 5e879614839..e6fb56c2fd2 100755 --- a/maint/json_gen.sh +++ b/maint/json_gen.sh @@ -33,7 +33,5 @@ cat > $cfile<csel_comm); - MPIR_ERR_CHECK(mpi_errno); - /* cleanup all collective communicators */ mpi_errno = MPII_Stubalgo_comm_cleanup(comm); MPIR_ERR_CHECK(mpi_errno); diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c index b763e02d98e..41722ff031e 100644 --- a/src/mpi/comm/commutil.c +++ b/src/mpi/comm/commutil.c @@ -317,7 +317,6 @@ int MPII_Comm_init(MPIR_Comm * comm_p) MPIR_stream_comm_init(comm_p); comm_p->persistent_requests = NULL; - comm_p->csel_comm = NULL; /* mutex is only used in VCI granularity. But the overhead of * creation is low, so we always create it. */ From 5e517662af5156dfe52fa9ec494d6fa212cd349d Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 4 Sep 2025 17:59:14 -0500 Subject: [PATCH 34/47] coll/csel: new coll_algorithms.txt --- src/mpi/coll/coll_algorithms.txt | 114 ++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 39 deletions(-) diff --git a/src/mpi/coll/coll_algorithms.txt b/src/mpi/coll/coll_algorithms.txt index 014aa392754..75acf0989f5 100644 --- a/src/mpi/coll/coll_algorithms.txt +++ b/src/mpi/coll/coll_algorithms.txt @@ -16,6 +16,20 @@ # [algorithm_name] # [key]: [values] # +# "func-commkind" may be "general", under which the "algorithm_name" is the full name of the +# algorithm function. General algorithm functions takes (coll_sig, cnt) parameters. +# +# In addition, "conditions" list all conditions and their corresponding checkers. There are +# a few types of conditions distinguished by the format: +# - conidtion_name: func +# The condition calls checker function with signature: bool (*func)(coll_sig). +# - condition_name(thresh): func +# This condition calls a query function that returns a value: int (*func)(coll_sig) +# The thresh marks the upper limit (inclusive) of the condition. +# The conditions are used in specifying both algorithm restrictions and CSEL conditions. +# Example usage in restriction list or JSON file: inplace, !inplace, avg_msg_size(1024), etc. +# Most of the checker functions should be inlined to minimize function call overhead. +# # Notes: # * indentations (use 4 spaces) and ':' behind keys are significant # * auto and nb algorithms are assumed and not listed @@ -25,8 +39,8 @@ # # Recognized attribute keys (by gen_coll.py): # * restrictions: -# possible values include parent-comm, power-of-two, inplace, no-inplace, -# cummutative, builtin-op, node-consecutive, displs-ordered, size-ge-pof2 +# A comma-separated list restrictions. All restrictions must be specified in the top +# conditions list. # * extra_params and cvar_params: # Additional parameters specific to the algorithm functions. Most algorithm functions # use the same arguments (e.g. as MPIR_Bcast), but some may require additional @@ -38,6 +52,28 @@ # a initialization, e.g. param1=val. Rather than repeating the same constant in the # cvar_params, we can use `-` as placeholder for the corresponding constant param. +# ---- +conditions: + inplace: MPIR_Csel_sendbuf_inplace + pof2: MPIR_Csel_comm_size_is_pof2 + commutative: MPIR_Csel_op_is_commutative + builtin_op: MPIR_Csel_op_is_builtin + hierarchical: MPIR_Csel_is_hierarchical + node_consecutive: MPIR_Csel_is_node_consecutive + node_regular: MPIR_Csel_is_node_canonical + count_ge_pof2: MPIR_Csel_count_ge_pof2 + displs_ordered: MPIR_Csel_displs_ordered + block_regular: MPIR_Csel_block_regular + comm_size(thresh): MPIR_Csel_comm_size + avg_msg_size(thresh): MPIR_Csel_avg_msg_size + total_msg_size(thresh): MPIR_Csel_total_msg_size + +# ---- +general: + MPIR_Coll_auto + MPIR_Coll_nb + +# ---- barrier-intra: k_dissemination extra_params: k @@ -46,7 +82,7 @@ barrier-intra: extra_params: k, single_phase_recv cvar_params: RECEXCH_KVAL, RECEXCH_SINGLE_PHASE_RECV smp - restrictions: parent-comm + restrictions: hierarchical barrier-inter: bcast ibarrier-intra: @@ -65,7 +101,7 @@ bcast-intra: scatter_recursive_doubling_allgather scatter_ring_allgather smp - restrictions: parent-comm + restrictions: hierarchical tree extra_params: tree_type, k, is_non_blocking cvar_params: TREE_TYPE, TREE_KVAL, IS_NON_BLOCKING @@ -75,9 +111,9 @@ bcast-intra: ibcast-intra: sched_binomial sched_smp - restrictions: parent-comm + restrictions: hierarchical sched_scatter_recursive_doubling_allgather - restrictions: power-of-two + restrictions: pof2 sched_scatter_ring_allgather tsp_tree extra_params: tree_type, k, chunk_size @@ -167,7 +203,7 @@ allgather-intra: extra_params: k cvar_params: BRUCKS_KVAL recursive_doubling - restrictions: power-of-two + restrictions: pof2 ring recexch_doubling extra_params: k, single_phase_recv @@ -181,7 +217,7 @@ iallgather-intra: sched_ring sched_brucks sched_recursive_doubling - restrictions: power-of-two + restrictions: pof2 tsp_ring tsp_brucks extra_params: k @@ -198,21 +234,21 @@ iallgather-inter: allgatherv-intra: brucks recursive_doubling - restrictions: power-of-two + restrictions: pof2 ring allgatherv-inter: remote_gather_local_bcast iallgatherv-intra: sched_brucks sched_recursive_doubling - restrictions: power-of-two + restrictions: pof2 sched_ring tsp_recexch_doubling - restrictions: displs-ordered + restrictions: displs_ordered extra_params: k cvar_params: RECEXCH_KVAL tsp_recexch_halving - restrictions: displs-ordered + restrictions: displs_ordered extra_params: k cvar_params: RECEXCH_KVAL tsp_ring @@ -224,28 +260,28 @@ iallgatherv-inter: alltoall-intra: brucks - restrictions: noinplace + restrictions: !inplace k_brucks - restrictions: noinplace + restrictions: !inplace extra_params: k cvar_params: BRUCKS_KVAL pairwise - restrictions: noinplace + restrictions: !inplace pairwise_sendrecv_replace restrictions: inplace scattered - restrictions: noinplace + restrictions: !inplace alltoall-inter: pairwise_exchange ialltoall-intra: sched_brucks - restrictions: noinplace + restrictions: !inplace sched_inplace restrictions: inplace sched_pairwise - restrictions: noinplace + restrictions: !inplace sched_permuted_sendrecv - restrictions: noinplace + restrictions: !inplace tsp_ring tsp_brucks extra_params: k, buffer_per_phase @@ -260,20 +296,20 @@ alltoallv-intra: pairwise_sendrecv_replace restrictions: inplace scattered - restrictions: noinplace + restrictions: !inplace alltoallv-inter: pairwise_exchange ialltoallv-intra: sched_blocked - restrictions: noinplace + restrictions: !inplace sched_inplace restrictions: inplace tsp_scattered - restrictions: noinplace + restrictions: !inplace extra_params: batch_size, bblock cvar_params: SCATTERED_BATCH_SIZE, SCATTERED_OUTSTANDING_TASKS tsp_blocked - restrictions: noinplace + restrictions: !inplace extra_params: bblock cvar_params: THROTTLE tsp_inplace @@ -285,16 +321,16 @@ alltoallw-intra: pairwise_sendrecv_replace restrictions: inplace scattered - restrictions: noinplace + restrictions: !inplace alltoallw-inter: pairwise_exchange ialltoallw-intra: sched_blocked - restrictions: noinplace + restrictions: !inplace sched_inplace restrictions: inplace tsp_blocked - restrictions: noinplace + restrictions: !inplace extra_params: bblock cvar_params: THROTTLE tsp_inplace @@ -305,17 +341,17 @@ ialltoallw-inter: reduce-intra: binomial smp - restrictions: commutative, parent-comm + restrictions: commutative, hierarchical reduce_scatter_gather - restrictions: size-ge-pof2, builtin-op + restrictions: count_ge_pof2, builtin_op reduce-inter: local_reduce_remote_send ireduce-intra: sched_smp - restrictions: commutative, parent-comm + restrictions: commutative, hierarchical sched_binomial sched_reduce_scatter_gather - restrictions: size-ge-pof2, builtin-op + restrictions: count_ge_pof2, builtin_op tsp_tree restrictions: commutative extra_params: tree_type, k, chunk_size, buffer_per_child @@ -328,14 +364,14 @@ ireduce-inter: allreduce-intra: smp - restrictions: commutative, parent-comm + restrictions: commutative, hierarchical recursive_doubling recursive_multiplying extra_params: k cvar_params: RECURSIVE_MULTIPLYING_KVAL restrictions: commutative reduce_scatter_allgather - restrictions: size-ge-pof2, builtin-op + restrictions: count_ge_pof2, builtin_op tree extra_params: tree_type, k, chunk_size, buffer_per_child cvar_params: TREE_TYPE, TREE_KVAL, TREE_PIPELINE_CHUNK_SIZE, TREE_BUFFER_PER_CHILD @@ -356,10 +392,10 @@ allreduce-inter: iallreduce-intra: sched_naive sched_smp - restrictions: commutative, parent-comm + restrictions: commutative, hierarchical sched_recursive_doubling sched_reduce_scatter_allgather - restrictions: size-ge-pof2, builtin-op + restrictions: count_ge_pof2, builtin_op tsp_recexch_single_buffer extra_params: k cvar_params: RECEXCH_KVAL @@ -392,7 +428,7 @@ reduce_scatter-inter: remote_reduce_local_scatter ireduce_scatter-intra: sched_noncommutative - restrictions: power-of-two + restrictions: pof2 sched_recursive_doubling sched_pairwise restrictions: commutative @@ -407,7 +443,7 @@ ireduce_scatter-inter: reduce_scatter_block-intra: noncommutative - restrictions: power-of-two + restrictions: pof2 recursive_doubling pairwise restrictions: commutative @@ -417,7 +453,7 @@ reduce_scatter_block-inter: remote_reduce_local_scatter ireduce_scatter_block-intra: sched_noncommutative - restrictions: power-of-two + restrictions: pof2 sched_recursive_doubling sched_pairwise restrictions: commutative @@ -432,11 +468,11 @@ ireduce_scatter_block-inter: scan-intra: smp - restrictions: commutative, node-consecutive + restrictions: commutative, node_consecutive recursive_doubling iscan-intra: sched_smp - restrictions: commutative, node-consecutive + restrictions: commutative, node_consecutive sched_recursive_doubling tsp_recursive_doubling From 6f485a2d8cf6bedd40c150d432ea5a18a6dc710c Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 4 Sep 2025 18:00:44 -0500 Subject: [PATCH 35/47] coll/csel: new coll_selection.json --- src/mpi/coll/coll_selection.json | 602 ++++++++++++++----------------- 1 file changed, 270 insertions(+), 332 deletions(-) diff --git a/src/mpi/coll/coll_selection.json b/src/mpi/coll/coll_selection.json index a099c857f18..f2084e2aa51 100644 --- a/src/mpi/coll/coll_selection.json +++ b/src/mpi/coll/coll_selection.json @@ -1,808 +1,746 @@ { - "collective=bcast": + "collective=bcast-intra": { - "comm_type=intra": - { - "comm_size<8": + "comm_size(8)": { "algorithm=MPIR_Bcast_intra_binomial":{} }, - "comm_size=pow2": + "pof2": { - "avg_msg_size<=12288": + "avg_msg_size(12288)": { "algorithm=MPIR_Bcast_intra_binomial":{} }, - "avg_msg_size<=524288": + "avg_msg_size(524288)": { "algorithm=MPIR_Bcast_intra_scatter_recursive_doubling_allgather":{} }, - "avg_msg_size=any": + "any": { "algorithm=MPIR_Bcast_intra_scatter_ring_allgather":{} } }, - "comm_size=any": + "comm_size(any)": { - "avg_msg_size<=12288": + "avg_msg_size(12288)": { "algorithm=MPIR_Bcast_intra_binomial":{} }, - "avg_msg_size=any": + "any": { "algorithm=MPIR_Bcast_intra_scatter_ring_allgather":{} } } - }, - "comm_type=inter": - { + }, + "collective=bcast-inter": + { "algorithm=MPIR_Bcast_inter_remote_send_local_bcast":{} - } }, - "collective=allreduce": + "collective=allreduce-intra": { - "comm_type=intra": - { - "avg_msg_size<=8": + "avg_msg_size(8)": { "algorithm=MPIR_Allreduce_intra_recursive_doubling":{} }, - "avg_msg_size=any": + "any": { - "is_op_built_in=no": + "!builtin_op": { "algorithm=MPIR_Allreduce_intra_recursive_doubling":{} }, - "is_op_built_in=yes": + "builtin_op": { - "count Date: Thu, 4 Sep 2025 23:19:45 -0500 Subject: [PATCH 36/47] coll/gen: update gen_coll.py --- maint/gen_coll.py | 855 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 697 insertions(+), 158 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index 443a5536127..8bbbddbcdff 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -9,22 +9,28 @@ from local_python.binding_common import * def main(): + G.coll_names = ["barrier", "bcast", "gather", "gatherv", "scatter", "scatterv", "allgather", "allgatherv", "alltoall", "alltoallv", "alltoallw", "reduce", "allreduce", "reduce_scatter", "reduce_scatter_block", "scan", "exscan", "neighbor_allgather", "neighbor_allgatherv", "neighbor_alltoall", "neighbor_alltoallv", "neighbor_alltoallw"] + binding_dir = G.get_srcdir_path("src/binding") c_dir = "src/binding/c" func_list = load_C_func_list(binding_dir, silent=True) - G.algos = load_coll_algos("src/mpi/coll/coll_algorithms.txt") - coll_names = ["barrier", "bcast", "gather", "gatherv", "scatter", "scatterv", "allgather", "allgatherv", "alltoall", "alltoallv", "alltoallw", "reduce", "allreduce", "reduce_scatter", "reduce_scatter_block", "scan", "exscan", "neighbor_allgather", "neighbor_allgatherv", "neighbor_alltoall", "neighbor_alltoallv", "neighbor_alltoallw"] + # Loading coll_algorithms.txt. It sets - + # - G.conditions: a list of conditions that can be used as restrictions and in JSON tuning files + # - G.algos: a two level array: [func-commkind][algo] + load_coll_algos("src/mpi/coll/coll_algorithms.txt") + # Prepare a one level algo array for conveninece - + # - G.algo_list: a one level array [algo] + G.algo_list = collect_algo_list() - G.out = [] - G.prototypes_hash = {} + G.out = [] # output to C file + G.out2 = [] # output to header G.prototypes = [] - G.algo_list = [] G.out.append("#include \"mpiimpl.h\"") - G.out.append("#include \"iallgatherv/iallgatherv.h\"") + G.out.append("#include \"coll_csel.h\"") # dump impl functions - for a in coll_names: + for a in G.coll_names: dump_coll_impl(a, "blocking") dump_coll_impl(a, "nonblocking") dump_coll_impl(a, "persistent") @@ -32,77 +38,97 @@ def main(): # TEMP: dump mpir functions. # Current code base call MPIR_ functions in copositinal algorithms. Create a wrapper that call _impl # for now. We will refactor the compositional algorithms later. - for a in coll_names: + for a in G.coll_names: dump_coll_mpir(a, "blocking") dump_coll_mpir(a, "nonblocking") # dump the container version of the algorithms dump_algo_cnt_fns() add_algo_prototypes() - for a in coll_names: + for a in G.coll_names: add_sched_auto_prototypes(a) + # initialize MPIR_Coll_algo_table and MPIR_Coll_algo_names + dump_MPII_Coll_algo_init() + # initialize MPIR_Coll_cvar_table and MPIR_Coll_type_names + dump_MPII_Coll_type_init() + # initialize MPIR_Coll_condition_names + dump_MPII_Csel_init_condition_names() + # parsing routines for loading JSONs + dump_MPII_Csel_parse_container() + dump_MPII_Csel_parse_operator() + # routine for Csel search + dump_MPII_Csel_run_condition() + # routines for checking algorithm CVARs + dump_MPIR_Coll_cvar_to_algo_id() + dump_MPIR_Coll_init_algo_container() + dump_MPIR_Coll_check_algo_restriction() + + # enum for coll_type, define MPIR_CSEL_NUM_COLL_TYPES + dump_MPIR_Csel_coll_type_e() + # enum for algorithm id, define MPIR_CSEL_NUM_ALGORITHMS + dump_MPIR_Csel_container_type_e() + # enum CSEL_NODE_TYPE, define MPIR_CSEL_NUM_CONDITIONS + dump_MPIR_Csel_node_type_e() + # algorithm container struct + dump_MPII_Csel_container() + G.out2.append("") + dump_c_file("src/mpi/coll/mpir_coll.c", G.out) - dump_coll_algos_h("src/mpi/coll/include/coll_algos.h", G.algo_list, G.prototypes) + dump_coll_algos_h("src/mpi/coll/include/coll_algos.h", G.prototypes, G.out2) -def dump_algo_cnt_fns(): - def get_coll_args(func, func_name): - args = [] - for p in func['parameters']: - if p['name'] == 'comm': - args.append("coll_sig->comm_ptr") - else: - args.append("coll_sig->u.%s.%s" % (func_name, p['name'])) - return ', '.join(args) +def collect_algo_list(): + algo_list = [] + for coll in G.coll_names: + for commkind in ("intra", "inter"): + for blocking in (True, False): + if blocking: + func_commkind = coll + '-' + commkind + else: + func_commkind = 'i' + coll + '-' + commkind + if func_commkind in G.algos: + for algo in G.algos[func_commkind]: + if "allcomm" in algo and func_commkind.endswith("inter"): + continue + algo_list.append(algo) + for algo in G.algos['general']: + algo_list.append(algo) + return algo_list - def get_algo_args(func, func_name, algo): - args = get_coll_args(func, func_name) +def dump_algo_cnt_fns(): + def get_algo_args(coll_name, algo): + args = get_coll_args(coll_name, "csel") if 'extra_params' in algo: args += ", " + get_algo_extra_args(algo, "csel") - if func_name.startswith('i'): + if algo['func-commkind'].startswith('i'): args += ", coll_sig->sched" - elif func_name.startswith('neighbor_'): + elif algo['func-commkind'].startswith('neighbor_'): pass else: args += ", 0" # coll_attr return args - def dump_algo_prep(func_name, algo): - if func_name.startswith('i'): + def dump_algo_prep(algo): + if algo['func-commkind'].startswith('i'): if algo['name'].startswith('tsp_'): G.out.append("MPII_CSEL_CREATE_TSP_SCHED(coll_sig);") else: G.out.append("MPII_CSEL_CREATE_SCHED(coll_sig);") - algo_funcname_hash = {} - for func_commkind in sorted(G.algos): - func_name, commkind = func_commkind.split("-") - if func_name.startswith('i'): - # use blocking func for base parameters - func = G.FUNCS["mpi_" + func_name[1:]] - else: - func = G.FUNCS["mpi_" + func_name] - for algo in G.algos[func_commkind]: - if "allcomm" in algo and commkind == "inter": - continue - algo_funcname = get_algo_funcname(func_name, commkind, algo) - if algo_funcname in algo_funcname_hash: - # skip alias algorithms - continue - else: - algo_funcname_hash[algo_funcname] = 1 - algo_args = get_algo_args(func, func_name, algo) + for algo in G.algo_list: + if algo["func-commkind"] != 'general': + coll_name = get_algo_coll_name(algo) + algo_funcname = get_algo_funcname(algo) + algo_args = get_algo_args(coll_name, algo) decl = "int %s_cnt(%s)" % (algo_funcname, "MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt") add_prototype(decl) - G.algo_list.append(algo_funcname) - dump_split(0, decl) dump_open('{') G.out.append("int mpi_errno = MPI_SUCCESS;") G.out.append("") - dump_algo_prep(func_name, algo) + dump_algo_prep(algo) dump_split(1, "mpi_errno = %s(%s);" % (algo_funcname, algo_args)) G.out.append("MPIR_ERR_CHECK(mpi_errno);") G.out.append("") @@ -114,104 +140,539 @@ def dump_algo_prep(func_name, algo): G.out.append("") def add_algo_prototypes(): - def get_coll_params(func): - mapping = G.MAPS['SMALL_C_KIND_MAP'] - params = [] - for p in func['parameters']: - if p['name'] == 'comm': - params.append("MPIR_Comm * comm_ptr") - else: - s = get_C_param(p, func, mapping) - if p['kind'].startswith('POLY'): - s = re.sub(r'\bint ', 'MPI_Aint ', s) - params.append(s) - return ', '.join(params) - - def get_algo_params(func, func_name, algo): - params = get_coll_params(func) + def get_algo_params(algo): + coll_name = get_algo_coll_name(algo) + params = get_coll_params(coll_name) if 'extra_params' in algo: params += ", " + get_algo_extra_params(algo) - if func_name.startswith('i'): + if algo['func-commkind'].startswith('i'): if algo['name'].startswith('tsp_'): params += ", MPIR_TSP_sched_t s" else: params += ", MPIR_Sched_t s" - elif func_name.startswith('neighbor_'): + elif algo['func-commkind'].startswith('neighbor_'): pass else: params += ", int coll_attr" # coll_attr return params - for func_commkind in sorted(G.algos): - func_name, commkind = func_commkind.split("-") - if func_name.startswith('i'): - # use blocking func for base parameters - func = G.FUNCS["mpi_" + func_name[1:]] + for algo in G.algo_list: + if algo['func-commkind'] == 'general': + decl = "int %s(%s)" % (algo['name'], "MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt") + add_prototype(decl) else: - func = G.FUNCS["mpi_" + func_name] - - algo_funcname_hash = {} - for algo in G.algos[func_commkind]: - if "allcomm" in algo and commkind == "inter": - continue - algo_funcname = get_algo_funcname(func_name, commkind, algo) - if algo_funcname in algo_funcname_hash: - # skip alias algorithms - continue - else: - algo_funcname_hash[algo_funcname] = 1 - algo_params = get_algo_params(func, func_name, algo) + algo_funcname = get_algo_funcname(algo) + algo_params = get_algo_params(algo) decl = "int %s(%s)" % (algo_funcname, algo_params) add_prototype(decl) -def add_sched_auto_prototypes(name): - def get_coll_params(func): - mapping = G.MAPS['SMALL_C_KIND_MAP'] - params = [] - for p in func['parameters']: - if p['name'] == 'comm': - params.append("MPIR_Comm * comm_ptr") - else: - s = get_C_param(p, func, mapping) - if p['kind'].startswith('POLY'): - s = re.sub(r'\bint ', 'MPI_Aint ', s) - params.append(s) - return ', '.join(params) - - func = G.FUNCS["mpi_" + name] - params = get_coll_params(func) +def add_sched_auto_prototypes(coll_name): + params = get_coll_params(coll_name) params += ", MPIR_Sched_t s" - add_prototype("int MPIR_I%s_intra_sched_auto(%s)" % (name, params)) - if not re.match(r'(scan|exscan|neighbor_)', name): - add_prototype("int MPIR_I%s_inter_sched_auto(%s)" % (name, params)) + add_prototype("int MPIR_I%s_intra_sched_auto(%s)" % (coll_name, params)) + if not re.match(r'(scan|exscan|neighbor_)', coll_name): + add_prototype("int MPIR_I%s_inter_sched_auto(%s)" % (coll_name, params)) -def add_prototype(l): - if RE.match(r'int\s+(\w+)\(', l): - func_name = RE.m.group(1) - if func_name not in G.prototypes_hash: - G.prototypes_hash[func_name] = 1 - G.prototypes.append(l) +def dump_MPII_Coll_type_init(): + G.out.append("") + decl = "void MPII_Coll_type_init(void)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + for a in G.coll_names: + for commkind in ("intra", "inter"): + for is_blocking in (True, False): + if commkind == "inter" and re.match(r'(scan|exscan|neighbor_)', a): + # CVARs for these inter-collective does not exist + G.out.append("MPIR_Coll_cvar_table[%s] = 0;" % (coll_type(a, is_blocking, commkind))) + else: + G.out.append("MPIR_Coll_cvar_table[%s] = %s;" % (coll_type(a, is_blocking, commkind), cvar_name(a, is_blocking, commkind))) + + for a in G.coll_names: + for commkind in ("intra", "inter"): + for is_blocking in (True, False): + if is_blocking: + coll_type_name = a + '-' + commkind + else: + coll_type_name = 'i' + a + '-' + commkind + G.out.append("MPIR_Coll_type_names[%s] = \"%s\";" % (coll_type(a, is_blocking, commkind), coll_type_name)) + dump_close('}') + +def dump_MPII_Csel_init_condition_names(): + G.out.append("") + decl = "void MPII_Csel_init_condition_names(void)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + for a in G.conditions: + if RE.match(r'(.+)\(thresh\)', a): + cond = RE.m.group(1) else: - pass + cond = a + G.out.append("MPIR_Csel_condition_names[%s] = \"%s\";" % (condition_id(cond), cond)) + dump_close('}') + +def dump_MPII_Coll_algo_init(): + G.out.append("") + decl = "void MPII_Coll_algo_init(void)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + for a in G.algo_list: + algo_funcname = get_algo_funcname(a) + idx = algo_id(algo_funcname) + if a['func-commkind'] != 'general': + algo_funcname += "_cnt" + G.out.append("MPIR_Coll_algo_table[%s] = %s;" % (idx, algo_funcname)) + for a in G.algo_list: + algo_funcname = get_algo_funcname(a) + idx = algo_id(algo_funcname) + G.out.append("MPIR_Coll_algo_names[%s] = \"%s\";" % (idx, algo_funcname)) + dump_close('}') + + +def dump_MPII_Csel_parse_container(): + G.out.append("") + def dump_json_foreach_open(): + dump_open("json_object_object_foreach(obj, key, val) {") + G.out.append("char *ckey = MPL_strdup_no_spaces(key);") + + def dump_json_foreach_close(): + G.out.append("MPL_free(ckey);") + dump_close("}") + + def dump_parse_params(): + dump_open("switch (cnt->id) {") + for algo in G.algo_list: + if 'extra_params' in algo: + struct_name = algo_struct_name(algo) + extra_params = algo['extra_params'].replace(' ', '').split(',') + G.out.append("case %s:" % algo_id(get_algo_funcname(algo))) + dump_open('{') # protect json_object_object_foreach + G.out.append("int num_params = 0;") + num_expect = 0 + dump_json_foreach_open() + ifstr = "if" + for a in extra_params: + if re.match(r'\w+=(.+)', a): + # skip constant parameter + continue + else: + num_expect += 1 + n = len(a) + 1 + atoi = "atoi" + if a == "tree_type": + atoi = "get_tree_type_from_string" + G.out.append("%s (!strncmp(ckey, \"%s=\", %d)) {" % (ifstr, a, n)) + G.out.append(" cnt->u.%s.%s = %s(ckey + %d);" % (struct_name, a, atoi, n)) + G.out.append(" num_params++;") + ifstr = "} else if" + G.out.append("}") + dump_json_foreach_close(); + dump_open("if (num_params != %d) {" % num_expect) + G.out.append("printf(\"MPII_Csel_parse_container_params: algorithm %s expect %d parameters\\\n\");" % (struct_name, num_expect)) + dump_close('}') + G.out.append("MPIR_Assert(num_params == %d);" % num_expect) + dump_close('}') + G.out.append(" break;") + G.out.append("default:") + G.out.append(" break;") + dump_close('}') # switch + + decl = "int MPII_Csel_parse_container_params(void *json_obj, void *container)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + G.out.append("struct json_object *obj = json_obj;") + G.out.append("MPII_Csel_container_s *cnt = container;") + # G.out.append("obj = json_object_object_get(obj, key);") + dump_parse_params() + G.out.append("") + G.out.append("return MPI_SUCCESS;") + dump_close('}') + +def dump_MPII_Csel_parse_operator(): + decl = "int MPII_Csel_parse_operator(const char *ckey, void *operator_node)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + G.out.append("MPIR_Csel_node_s *csel_node = operator_node;") + dump_open("if (ckey[0] == '!') {") + G.out.append("csel_node->u.condition.negate = true;") + G.out.append("ckey++;") + dump_else() + G.out.append("csel_node->u.condition.negate = false;") + dump_close('}') + + if_clase = "if" + for a in G.conditions: + cond = a + has_thresh = False + if RE.match(r'(.+)\(thresh\)', a): + cond = RE.m.group(1) + has_thresh = True + n = len(cond) + if has_thresh: + G.out.append("%s (strncmp(ckey, \"%s\", %d) == 0) {" % (if_clase, cond, n)) + G.out.append(" csel_node->type = %s;" % condition_id(cond)) + G.out.append(" MPIR_Assert(ckey[%d] == '(');" % n) + G.out.append(" csel_node->u.condition.thresh = atoi(ckey + %d);" % (n + 1)) + else: + G.out.append("%s (strcmp(ckey, \"%s\") == 0) {" % (if_clase, cond)) + G.out.append(" csel_node->type = %s;" % condition_id(cond)) + if_clase = "} else if" + G.out.append("} else {") + G.out.append(" return MPI_ERR_OTHER;") + G.out.append("}") + + G.out.append("") + G.out.append("return MPI_SUCCESS;") + dump_close('}') + +def dump_MPII_Csel_run_condition(): + decl = "void *MPII_Csel_run_condition(void *operator_node, MPIR_Csel_coll_sig_s *coll_sig)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + G.out.append("MPIR_Csel_node_s *node = operator_node;") + G.out.append("bool cond;") + dump_open("switch(node->type) {") + for a in G.conditions: + cond = a + has_thresh = False + if RE.match(r'(.+)\(thresh\)', a): + cond = RE.m.group(1) + has_thresh = True + dump_open("case %s:" % condition_id(cond)) + if has_thresh: + G.out.append("cond = (node->u.condition.thresh <= %s(coll_sig));" % G.conditions[a]) + else: + G.out.append("cond = %s(coll_sig);" % G.conditions[a]) + G.out.append("if (node->u.condition.negate) cond = !cond;") + G.out.append("return cond ? node->success : node->failure;") + dump_close("") + G.out.append("default:") + G.out.append(" break;") + dump_close('}') # switch + G.out.append("return NULL;") + dump_close('}') + +def dump_MPII_Csel_parse_operator(): + decl = "int MPII_Csel_parse_operator(const char *ckey, void *operator_node)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + G.out.append("MPIR_Csel_node_s *csel_node = operator_node;") + dump_open("if (ckey[0] == '!') {") + G.out.append("csel_node->u.condition.negate = true;") + G.out.append("ckey++;") + dump_else() + G.out.append("csel_node->u.condition.negate = false;") + dump_close('}') + + if_clase = "if" + for a in G.conditions: + cond = a + has_thresh = False + if RE.match(r'(.+)\(thresh\)', a): + cond = RE.m.group(1) + has_thresh = True + n = len(cond) + if has_thresh: + G.out.append("%s (strncmp(ckey, \"%s\", %d) == 0) {" % (if_clase, cond, n)) + G.out.append(" csel_node->type = %s;" % condition_id(cond)) + G.out.append(" MPIR_Assert(ckey[%d] == '(');" % n) + G.out.append(" csel_node->u.condition.thresh = atoi(ckey + %d);" % (n + 1)) + else: + G.out.append("%s (strcmp(ckey, \"%s\") == 0) {" % (if_clase, cond)) + G.out.append(" csel_node->type = %s;" % condition_id(cond)) + if_clase = "} else if" + G.out.append("} else {") + G.out.append(" MPIR_Assert(0);") + G.out.append(" return MPI_ERR_OTHER;") + G.out.append("}") + + G.out.append("") + G.out.append("return MPI_SUCCESS;") + dump_close('}') + +def dump_MPII_Csel_run_condition(): + decl = "void *MPII_Csel_run_condition(void *operator_node, MPIR_Csel_coll_sig_s *coll_sig)" + add_prototype(decl) + G.out.append(decl) + dump_open('{') + G.out.append("MPIR_Csel_node_s *node = operator_node;") + G.out.append("bool cond;") + dump_open("switch(node->type) {") + for a in G.conditions: + cond = a + has_thresh = False + if RE.match(r'(.+)\(thresh\)', a): + cond = RE.m.group(1) + has_thresh = True + dump_open("case %s:" % condition_id(cond)) + if has_thresh: + G.out.append("cond = (node->u.condition.thresh <= %s(coll_sig));" % G.conditions[a]) + else: + G.out.append("cond = %s(coll_sig);" % G.conditions[a]) + G.out.append("if (node->u.condition.negate) cond = !cond;") + G.out.append("return cond ? node->success : node->failure;") + dump_close("") + G.out.append("default:") + G.out.append(" break;") + dump_close('}') # switch + G.out.append("return NULL;") + dump_close('}') + +def dump_MPIR_Coll_cvar_to_algo_id(): + G.out.append("") + def dump_cvar_cases(name, commkind): + algo_id_prefix = "MPII_CSEL_CONTAINER_TYPE__ALGORITHM" + + dump_open("switch (cvar_val) {") + G.out.append("case MPIR_CVAR_%s_%s_ALGORITHM_auto:" % (name.upper(), commkind.upper())) + G.out.append(" return %s__MPIR_Coll_auto;" % (algo_id_prefix)) + if not name.startswith("i"): # blocking + G.out.append("case MPIR_CVAR_%s_%s_ALGORITHM_nb:" % (name.upper(), commkind.upper())) + G.out.append(" return %s__MPIR_Coll_nb;" % (algo_id_prefix)) + + func_commkind = name + '-' + commkind + for algo in G.algos[func_commkind]: + G.out.append("case MPIR_CVAR_%s_%s_ALGORITHM_%s:" % (name.upper(), commkind.upper(), algo['name'])) + G.out.append(" return %s__%s;" % (algo_id_prefix, get_algo_funcname(algo))) + dump_close("}") + + decl = "int MPIR_Coll_cvar_to_algo_id(int coll_type, int cvar_val)" + add_prototype(decl) + G.out.append(decl) + dump_open("{") + dump_open("switch (coll_type) {") + for coll in G.coll_names: + for commkind in ("intra", "inter"): + for is_blocking in (True, False): + if is_blocking: + name = coll + else: + name = 'i' + coll + if commkind == "inter" and re.match(r'(scan|exscan|neighbor_)', coll): + continue + G.out.append("case %s:" % coll_type(coll, is_blocking, commkind)) + G.out.append("INDENT") + dump_cvar_cases(name, commkind) + G.out.append("break;") + G.out.append("DEDENT") + dump_close("}") + G.out.append("MPIR_Assert(0);") + G.out.append("return 0;") + dump_close("}") + +def dump_MPIR_Coll_init_algo_container(): + G.out.append("") + decl = "void MPIR_Coll_init_algo_container(MPIR_Csel_coll_sig_s * coll_sig, int algo_id, MPII_Csel_container_s * cnt)" + add_prototype(decl) + G.out.append(decl) + dump_open("{") + G.out.append("memset(cnt, 0, sizeof(*cnt));") + G.out.append("cnt->id = algo_id;") + dump_open("switch (algo_id) {") + for algo in G.algo_list: + if "extra_params" in algo: + struct_name = algo_struct_name(algo) + extra_params = algo['extra_params'].replace(' ', '').split(',') + cvar_params = algo['cvar_params'].replace(' ', '').split(',') + coll_name = algo['func-commkind'].split('-')[0] + G.out.append("case %s:" % algo_id(get_algo_funcname(algo))) + for i, a in enumerate(extra_params): + if re.match(r'\w+=(.+)', a): + # skip constant parameter + continue + else: + cvar_param = "MPIR_CVAR_%s_%s" % (coll_name.upper(), cvar_params[i]) + if a == "tree_type": + cvar_param = "get_tree_type_from_string(%s)" % cvar_param + elif cvar_params[i] == "THROTTLE": + cvar_param = "MPIR_CVAR_ALLTOALL_THROTTLE" + G.out.append(" cnt->u.%s.%s = %s;" % (struct_name, a, cvar_param)) + G.out.append(" break;") + dump_close("}") + dump_close("}") + +def dump_MPIR_Coll_check_algo_restriction(): + G.out.append("") + def dump_check_restriction(restriction): + r = restriction + negate = False + if restriction.startswith('!'): + r = restriction[1:] + negate = True + if RE.match(r'.*\(.*\)', r): + raise Exception("Threshold condition %s cannot be used as a restriction" % r) + + cond = None + if r in G.conditions: + cond = "%s(coll_sig)" % G.conditions[r] + else: + raise Exception("Restriction %s not listed" % restriction) + + if negate: + G.out.append(" if (%s) return false;" % cond) + else: + G.out.append(" if (!%s) return false;" % cond) + + decl = "bool MPIR_Coll_check_algo_restriction(MPIR_Csel_coll_sig_s * coll_sig, int algo_id)" + add_prototype(decl) + G.out.append(decl) + dump_open("{") + dump_open("switch (algo_id) {") + for algo in G.algo_list: + if "restrictions" in algo: + restrictions = algo['restrictions'].replace(' ', '').split(',') + G.out.append("case %s:" % algo_id(get_algo_funcname(algo))) + for r in restrictions: + dump_check_restriction(r) + G.out.append(" break;") + dump_close("}") + G.out.append("return true;") + dump_close("}") + +# e.g. MPIR_CSEL_COLL_TYPE__BARRIER, etc. +def dump_MPIR_Csel_coll_type_e(): + G.out2.append("") + G.out2.append("typedef enum {") + # IMPORTANT: MPIR_Coll_nb algorithm relies on that blocking coll_type is even and its + # nonblocking correspondent is coll_type+1. + for a in G.coll_names: + for commkind in ("intra", "inter"): + for is_blocking in (True, False): + G.out2.append(" %s," % coll_type(a, is_blocking, commkind)) + G.out2.append(" %s" % coll_type_END()) + G.out2.append("} MPIR_Csel_coll_type_e;") + G.out2.append("") + G.out2.append("#define MPIR_CSEL_NUM_COLL_TYPES %s\n" % coll_type_END()) + +def dump_MPIR_Csel_container_type_e(): + G.out2.append("") + G.out2.append("typedef enum {") + for a in G.algo_list: + algo_funcname = get_algo_funcname(a) + G.out2.append(" %s," % algo_id(algo_funcname)) + G.out2.append(" %s" % algo_id_END()) + G.out2.append("} MPIR_Csel_container_type_e;") + G.out2.append("") + G.out2.append("#define MPIR_CSEL_NUM_ALGORITHMS %s\n" % algo_id_END()) + +def dump_MPIR_Csel_node_type_e(): + G.out2.append("") + G.out2.append("typedef enum {") + for a in G.conditions: + G.out2.append(" %s," % condition_id(a)) + G.out2.append(" CSEL_NODE_TYPE__OPERATOR__COLLECTIVE,") + G.out2.append(" CSEL_NODE_TYPE__OPERATOR__ANY,") + G.out2.append(" CSEL_NODE_TYPE__CONTAINER,") + G.out2.append("} MPIR_Csel_node_type_e;") + G.out2.append("") + G.out2.append("#define MPIR_CSEL_NUM_CONDITIONS %s\n" % "CSEL_NODE_TYPE__OPERATOR__COLLECTIVE") + +def dump_MPIR_Csel_node_s(): + G.out2.append("") + G.out2.append("typedef struct MPIR_Csel_node {") + G.out2.append(" MPIR_Csel_node_type_e type;") + G.out2.append(" MPI_Aint thresh;") + G.out2.append(" struct MPIR_Csel_node *success;") + G.out2.append(" struct MPIR_Csel_node *failure;") + G.out2.append("} MPIR_Csel_node_s;") + +def dump_MPII_Csel_container(): + G.out2.append("") + def dump_algo_params(): + for algo in G.algo_list: + if 'extra_params' in algo: + extra_params = algo['extra_params'].replace(' ', '').split(',') + G.out2.append(" struct {") + for a in extra_params: + if re.match(r'\w+=(.+)', a): + # skip constant parameter + continue + else: + G.out2.append(" int %s;" % a) + G.out2.append(" } %s;" % algo_struct_name(algo)) + + G.out2.append("typedef struct MPII_Csel_container {") + G.out2.append(" MPIR_Csel_container_type_e id;") + G.out2.append(" union {") + dump_algo_params() + G.out2.append(" } u;") + G.out2.append("} MPII_Csel_container_s;") + +def dump_MPIR_Csel_node_type_e(): + G.out2.append("") + G.out2.append("typedef enum {") + for a in G.conditions: + G.out2.append(" %s," % condition_id(a)) + G.out2.append(" CSEL_NODE_TYPE__OPERATOR__COLLECTIVE,") + G.out2.append(" CSEL_NODE_TYPE__OPERATOR__ANY,") + G.out2.append(" CSEL_NODE_TYPE__CONTAINER,") + G.out2.append("} MPIR_Csel_node_type_e;") + G.out2.append("") + G.out2.append("#define MPIR_CSEL_NUM_CONDITIONS %s\n" % "CSEL_NODE_TYPE__OPERATOR__COLLECTIVE") + +def dump_MPIR_Csel_node_s(): + G.out2.append("") + G.out2.append("typedef struct MPIR_Csel_node {") + G.out2.append(" MPIR_Csel_node_type_e type;") + G.out2.append(" MPI_Aint thresh;") + G.out2.append(" struct MPIR_Csel_node *success;") + G.out2.append(" struct MPIR_Csel_node *failure;") + G.out2.append("} MPIR_Csel_node_s;") + +def dump_MPII_Csel_container(): + G.out2.append("") + def dump_algo_params(): + for algo in G.algo_list: + if 'extra_params' in algo: + extra_params = algo['extra_params'].replace(' ', '').split(',') + G.out2.append(" struct {") + for a in extra_params: + if re.match(r'\w+=(.+)', a): + # skip constant parameter + continue + else: + G.out2.append(" int %s;" % a) + G.out2.append(" } %s;" % algo_struct_name(algo)) + + G.out2.append("typedef struct MPII_Csel_container {") + G.out2.append(" MPIR_Csel_container_type_e id;") + G.out2.append(" union {") + dump_algo_params() + G.out2.append(" } u;") + G.out2.append("} MPII_Csel_container_s;") + +#---------------------------------------- +def add_prototype(l): + G.prototypes.append(l) def load_coll_algos(algo_txt): - All = {} + G.algos = {} + G.conditions = {} with open(algo_txt) as In: (func_commkind, algo_list, algo) = (None, None, None) for line in In: - if RE.match(r'(\w+-(intra|inter)):', line): + if RE.match(r'(\w+-(intra|inter)|general|conditions):', line): func_commkind = RE.m.group(1) - algo_list = [] - All[func_commkind] = algo_list - elif RE.match(r'\s+(\w+)\s*$', line): - algo = {"name": RE.m.group(1), "func-commkind": func_commkind} - algo_list.append(algo) - elif RE.match(r'\s+(\w+):\s*(.+)', line): - (key, value) = RE.m.group(1,2) - algo[key] = value - return All + if func_commkind != "conditions": + algo_list = [] + G.algos[func_commkind] = algo_list + elif func_commkind == "conditions": + if RE.match(r'\s+([\w()-]+):\s*(\w+)', line): + G.conditions[RE.m.group(1)] = RE.m.group(2) + elif func_commkind: + if RE.match(r'\s+(\w+)\s*$', line): + algo = {"name": RE.m.group(1), "func-commkind": func_commkind} + algo_list.append(algo) + elif RE.match(r'\s+(\w+):\s*(.+)', line): + algo[RE.m.group(1)] = RE.m.group(2) def dump_coll_impl(name, blocking_type): func = G.FUNCS["mpi_" + name] @@ -229,11 +690,15 @@ def dump_coll_impl(name, blocking_type): # Initialize coll_sig G.out.append("MPIR_Csel_coll_sig_s coll_sig;") - if blocking_type == "blocking": - G.out.append("coll_sig.coll_type = MPIR_CSEL_COLL_TYPE__%s;" % name.upper()) - else: + NAME = name.upper() + if blocking_type != "blocking": # nonblocking and persistent - G.out.append("coll_sig.coll_type = MPIR_CSEL_COLL_TYPE__I%s;" % name.upper()) + NAME = 'I' + NAME + dump_open("if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {") + G.out.append("coll_sig.coll_type = MPIR_CSEL_COLL_TYPE__INTRA_%s;" % NAME) + dump_else() + G.out.append("coll_sig.coll_type = MPIR_CSEL_COLL_TYPE__INTER_%s;" % NAME) + dump_close('}') G.out.append("coll_sig.comm_ptr = comm_ptr;") if blocking_type == "persistent": G.out.append("coll_sig.is_persistent = true;") @@ -241,10 +706,14 @@ def dump_coll_impl(name, blocking_type): G.out.append("coll_sig.is_persistent = false;") G.out.append("coll_sig.sched = NULL;") + G.out.append("memset(&coll_sig.cache, 0, sizeof(coll_sig.cache));"); + + phash = {} for p in func['parameters']: if p['name'] == 'comm': pass else: + phash[p['name']] = 1 G.out.append("coll_sig.u.%s.%s = %s;" % (name, p['name'], p['name'])) # Call csel @@ -263,6 +732,7 @@ def dump_coll_impl(name, blocking_type): G.out.append("MPIR_Request *req = MPIR_Request_create(MPIR_REQUEST_KIND__PREQUEST_COLL);") G.out.append("MPIR_ERR_CHKANDJUMP(!req, mpi_errno, MPI_ERR_OTHER, \"**nomem\");") G.out.append("MPIR_Comm_add_ref(comm_ptr);") + G.out.append("req->comm = comm_ptr;") G.out.append("MPIR_Comm_save_inactive_request(comm_ptr, req);") G.out.append("req->u.persist_coll.sched_type = coll_sig.sched_type;") G.out.append("req->u.persist_coll.sched = coll_sig.sched;") @@ -314,7 +784,21 @@ def get_func_name(name, blocking_type): elif blocking_type == "persistent": return name + "_init" -def get_algo_funcname(func_name, commkind, algo): +def get_algo_coll_name(algo): + if algo["func-commkind"] == "general": + raise Exception("general algo!") + + func_name, commkind = algo["func-commkind"].split("-") + if func_name.startswith('i'): + return func_name[1:] + else: + return func_name + +def get_algo_funcname(algo): + if algo["func-commkind"] == "general": + return algo['name'] + + func_name, commkind = algo["func-commkind"].split("-") if 'allcomm' in algo: commkind = 'allcomm' Name = func_name.capitalize() @@ -326,6 +810,33 @@ def get_algo_funcname(func_name, commkind, algo): else: return "MPIR_%s_%s_%s" % (Name, commkind, get_algo_name(algo)) +def get_coll_args(coll_name, kind): + func = G.FUNCS["mpi_" + coll_name] + args = [] + if kind == "csel": + for p in func['parameters']: + if p['name'] == 'comm': + args.append("coll_sig->comm_ptr") + else: + args.append("coll_sig->u.%s.%s" % (coll_name, p['name'])) + else: + raise Exception("unexpected kind") + return ', '.join(args) + +def get_coll_params(coll_name): + func = G.FUNCS["mpi_" + coll_name] + mapping = G.MAPS['SMALL_C_KIND_MAP'] + params = [] + for p in func['parameters']: + if p['name'] == 'comm': + params.append("MPIR_Comm * comm_ptr") + else: + s = get_C_param(p, func, mapping) + if p['kind'].startswith('POLY'): + s = re.sub(r'\bint ', 'MPI_Aint ', s) + params.append(s) + return ', '.join(params) + def get_algo_extra_args(algo, kind): (func_name, commkind) = algo['func-commkind'].split('-') extra_params = algo['extra_params'].replace(' ', '').split(',') @@ -340,7 +851,7 @@ def get_algo_extra_args(algo, kind): out_list.append(RE.m.group(1)) else: if kind == "csel": - prefix = "cnt->u.%s.%s_%s." % (func_name, commkind, algo['name']) + prefix = "cnt->u.%s." % (algo_struct_name(algo)) out_list.append(prefix + extra_params[i]) elif kind == "cvar": prefix = "MPIR_CVAR_%s_" % func_name.upper() @@ -401,6 +912,54 @@ def get_func_params(func, name, blocking_type): return ', '.join(params) +def coll_type(coll_name, is_blocking, commkind): + prefix = "MPIR_CSEL_COLL_TYPE" + if is_blocking: + return "%s__%s_%s" % (prefix, commkind.upper(), coll_name.upper()) + else: + return "%s__%s_I%s" % (prefix, commkind.upper(), coll_name.upper()) + +def coll_type_END(): + return "MPIR_CSEL_COLL_TYPE__END" + +def cvar_name(coll_name, is_blocking, commkind): + if is_blocking: + return "MPIR_CVAR_%s_%s_ALGORITHM" % (coll_name.upper(), commkind.upper()) + else: + return "MPIR_CVAR_I%s_%s_ALGORITHM" % (coll_name.upper(), commkind.upper()) + +def algo_id(algo_funcname): + prefix = "MPII_CSEL_CONTAINER_TYPE__ALGORITHM" + return "%s__%s" % (prefix, algo_funcname) + +def algo_id_END(): + return "MPII_CSEL_CONTAINER_TYPE__ALGORITHM__END" + +def condition_id(name): + prefix = "CSEL_NODE_TYPE__OPERATOR__" + a = re.sub(r'-', '_', name) + a = re.sub(r'\(thresh\)$', '', a) + return prefix + a + +def algo_struct_name(algo): + algo_funcname = get_algo_funcname(algo) + struct_name = re.sub(r'MPIR_', '', algo_funcname).lower() + return struct_name + +def algo_id_END(): + return "MPII_CSEL_CONTAINER_TYPE__ALGORITHM__END" + +def condition_id(name): + prefix = "CSEL_NODE_TYPE__OPERATOR__" + a = re.sub(r'-', '_', name) + a = re.sub(r'\(thresh\)$', '', a) + return prefix + a + +def algo_struct_name(algo): + algo_funcname = get_algo_funcname(algo) + struct_name = re.sub(r'MPIR_', '', algo_funcname).lower() + return struct_name + # ---------------------- def dump_c_file(f, lines): print(" --> [%s]" % f) @@ -425,48 +984,18 @@ def dump_c_file(f, lines): print(" " * indent, end='', file=Out) print(l, file=Out) -def dump_coll_algos_h(f, algolist, prototypes): - def algo_id(a): - prefix = "MPII_CSEL_CONTAINER_TYPE__ALGORITHM" - # TODO: fix the tsp function name - if RE.match(r'MPIR_TSP_(\w+)_sched_intra_(\w+)', a): - return "%s__MPIR_%s_intra_tsp_%s" % (prefix, RE.m.group(1), RE.m.group(2)) - else: - return "%s__%s" % (prefix, a) - +def dump_coll_algos_h(f, prototypes, lines): print(" --> [%s]" % f) with open(f, "w") as Out: for l in G.copyright_c: print(l, file=Out) + print("#ifndef COLL_ALGOS_H_INCLUDED", file=Out) print("#define COLL_ALGOS_H_INCLUDED", file=Out) print("", file=Out) - print("#define MPIR_COLL_ALGORITHM_IDS() \\", file=Out) - for a in algolist[:-1]: - print(" %s, \\" % algo_id(a), file=Out) - print(" %s" % algo_id(algolist[-1]), file=Out) - print("", file=Out) - - print("#define MPIR_COLL_SET_ALGO_TABLE() \\", file=Out) - print(" do { \\", file=Out) - for a in algolist: - print(" MPIR_Coll_algo_table[%s] = %s_cnt; \\" % (algo_id(a), a), file=Out) - print(" } while (0)", file=Out) - print("", file=Out) - - print("#define MPIR_COLL_SET_CONTAINER_ID() \\", file=Out) - print(" do { \\", file=Out) - print(" if (!strcmp(ckey, \"algorithm=%s\")) { \\" % algolist[0], file=Out) - print(" cnt->id = %s; \\" % algo_id(algolist[0]), file=Out) - for a in algolist[1:]: - print(" } else if (!strcmp(ckey, \"algorithm=%s\")) { \\" % a, file=Out) - print(" cnt->id = %s; \\" % algo_id(a), file=Out) - print(" } else { \\", file=Out) - print(" fprintf(stderr, \"unrecognized key \%s\\n\", key); \\", file=Out) - print(" } \\", file=Out) - print(" } while (0)", file=Out) - print("", file=Out) + for l in lines: + print(l, file=Out) for l in prototypes: lines = split_line_with_break(l + ';', '', 80) @@ -498,6 +1027,16 @@ def dump_split(indent, l): tlist = split_line_with_break(l, "", 100 - indent * 4) G.out.extend(tlist) +def dump_macro_open(macro, dowhile=False): + G.out2.append("#define %s \\" % macro) + if dowhile: + G.out2.append(" do { \\") + +def dump_macro_close(dowhile=False): + if dowhile: + G.out2.append(" } while (0)") + G.out2.append("") + # --------------------------------------------------------- if __name__ == "__main__": main() From f18f3e36c6b7684a94aabd814c9967df1fca3718 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 28 Aug 2025 12:11:00 -0500 Subject: [PATCH 37/47] ADI/csel: add hooks for initializing coll_sig Add MPIR_init_coll_sig and MPID_init_coll_sig so we can add arbitrary attr bits or additional fields without hacking maint/gen_coll.py. --- maint/gen_coll.py | 3 +++ src/include/mpir_coll.h | 1 + src/mpi/coll/include/coll_csel.h | 5 +++++ src/mpi/coll/src/coll_impl.c | 4 ++++ src/mpid/ch3/include/mpidpost.h | 4 ++++ src/mpid/ch4/include/mpidpost.h | 4 ++++ 6 files changed, 21 insertions(+) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index 8bbbddbcdff..846d82eed6f 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -716,6 +716,9 @@ def dump_coll_impl(name, blocking_type): phash[p['name']] = 1 G.out.append("coll_sig.u.%s.%s = %s;" % (name, p['name'], p['name'])) + G.out.append("MPIR_Init_coll_sig(&coll_sig);") + G.out.append("MPID_Init_coll_sig(&coll_sig);") + # Call csel G.out.append("") G.out.append("mpi_errno = MPIR_Coll_composition_auto(&coll_sig);") diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index 37ebf57144a..754fdbd6b54 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -418,6 +418,7 @@ struct MPII_Csel_container { }; typedef int (*MPIR_Coll_algo_fn) (MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); +void MPIR_Init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig); /* During init, not all algorithms are safe to use. For example, the csel * may not have been initialized. We define a set of fallback routines that diff --git a/src/mpi/coll/include/coll_csel.h b/src/mpi/coll/include/coll_csel.h index b11c6279a31..6a5a9385bf5 100644 --- a/src/mpi/coll/include/coll_csel.h +++ b/src/mpi/coll/include/coll_csel.h @@ -17,4 +17,9 @@ void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); void *MPII_Create_container(struct json_object *obj); +MPL_STATIC_INLINE_PREFIX void MPIR_init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig) +{ + /* place holder for now */ +} + #endif /* COLL_CSEL_H_INCLUDED */ diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index f2f90b65577..52dcb32c933 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -230,6 +230,10 @@ int MPII_Coll_init(void) goto fn_exit; } +void MPIR_Init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig) +{ +} + int MPII_Coll_finalize(void) { int mpi_errno = MPI_SUCCESS; diff --git a/src/mpid/ch3/include/mpidpost.h b/src/mpid/ch3/include/mpidpost.h index eb0042a036b..5feadf8bd1b 100644 --- a/src/mpid/ch3/include/mpidpost.h +++ b/src/mpid/ch3/include/mpidpost.h @@ -276,4 +276,8 @@ MPL_STATIC_INLINE_PREFIX int MPID_Waitsome(int incount, MPIR_Request * request_p return MPIR_Waitsome_impl(incount, request_ptrs, outcount, array_of_indices, array_of_statuses); } +MPL_STATIC_INLINE_PREFIX void MPID_Init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig) +{ +} + #endif /* MPIDPOST_H_INCLUDED */ diff --git a/src/mpid/ch4/include/mpidpost.h b/src/mpid/ch4/include/mpidpost.h index 0776e979d9e..d69d19eb996 100644 --- a/src/mpid/ch4/include/mpidpost.h +++ b/src/mpid/ch4/include/mpidpost.h @@ -62,4 +62,8 @@ MPL_STATIC_INLINE_PREFIX int MPID_Finalize_async_thread(void) return MPIR_Stop_progress_thread_impl(NULL); } +MPL_STATIC_INLINE_PREFIX void MPID_Init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig) +{ +} + #endif /* MPIDPOST_H_INCLUDED */ From bba499e57e1f4dfd10be6af3c811d136444036b6 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Sun, 24 Aug 2025 11:09:07 -0500 Subject: [PATCH 38/47] coll/csel: update csel Generate tables based on coll_algorithms.txt and use the tables to facilitate csel parsing and error reporting. If user sets an algorithm CVAR, directly construct a container for the cvar-specified algorithm and call it if all restrictions are met. All restrictive checkers are represented by either a bit in coll_sig->attr or a boolean checker function. All restrictions and their checkers are configured in coll_algorithms.txt. --- src/include/mpir_coll.h | 286 +------ src/include/mpir_misc.h | 13 - src/mpi/coll/include/coll_csel.h | 300 ++++++- src/mpi/coll/include/coll_impl.h | 2 - src/mpi/coll/src/Makefile.mk | 1 - src/mpi/coll/src/coll_impl.c | 57 +- src/mpi/coll/src/csel.c | 1272 +++-------------------------- src/mpi/coll/src/csel_container.c | 391 --------- 8 files changed, 471 insertions(+), 1851 deletions(-) delete mode 100644 src/mpi/coll/src/csel_container.c diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index 754fdbd6b54..44ee599961b 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -12,61 +12,31 @@ typedef struct MPII_Csel_container MPII_Csel_container_s; #include "coll_impl.h" #include "coll_algos.h" -typedef enum { - MPIR_CSEL_COLL_TYPE__ALLGATHER = 0, - MPIR_CSEL_COLL_TYPE__ALLGATHERV, - MPIR_CSEL_COLL_TYPE__ALLREDUCE, - MPIR_CSEL_COLL_TYPE__ALLTOALL, - MPIR_CSEL_COLL_TYPE__ALLTOALLV, - MPIR_CSEL_COLL_TYPE__ALLTOALLW, - MPIR_CSEL_COLL_TYPE__BARRIER, - MPIR_CSEL_COLL_TYPE__BCAST, - MPIR_CSEL_COLL_TYPE__EXSCAN, - MPIR_CSEL_COLL_TYPE__GATHER, - MPIR_CSEL_COLL_TYPE__GATHERV, - MPIR_CSEL_COLL_TYPE__IALLGATHER, - MPIR_CSEL_COLL_TYPE__IALLGATHERV, - MPIR_CSEL_COLL_TYPE__IALLREDUCE, - MPIR_CSEL_COLL_TYPE__IALLTOALL, - MPIR_CSEL_COLL_TYPE__IALLTOALLV, - MPIR_CSEL_COLL_TYPE__IALLTOALLW, - MPIR_CSEL_COLL_TYPE__IBARRIER, - MPIR_CSEL_COLL_TYPE__IBCAST, - MPIR_CSEL_COLL_TYPE__IEXSCAN, - MPIR_CSEL_COLL_TYPE__IGATHER, - MPIR_CSEL_COLL_TYPE__IGATHERV, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHER, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHERV, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALL, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLV, - MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLW, - MPIR_CSEL_COLL_TYPE__IREDUCE, - MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER, - MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK, - MPIR_CSEL_COLL_TYPE__ISCAN, - MPIR_CSEL_COLL_TYPE__ISCATTER, - MPIR_CSEL_COLL_TYPE__ISCATTERV, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHER, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHERV, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALL, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLV, - MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLW, - MPIR_CSEL_COLL_TYPE__REDUCE, - MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER, - MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK, - MPIR_CSEL_COLL_TYPE__SCAN, - MPIR_CSEL_COLL_TYPE__SCATTER, - MPIR_CSEL_COLL_TYPE__SCATTERV, - MPIR_CSEL_COLL_TYPE__END, -} MPIR_Csel_coll_type_e; - -typedef enum { - MPIR_COLL_ALGORITHM_IDS(), - /* composition algorithms */ - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto, - /* end */ - MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count, -} MPII_Csel_container_type_e; +/* Define values for collective attribute. + * - The first 8 bits are passed down to basic collective algorithms. + * - The higher bits are used to assist algorithm selections + * - The lower 32 bits are reserved by MPIR-layer + * - The higher 32 bits are reserved for MPID-layer + */ +#define MPIR_COLL_ATTR_CORE_BITS 8 +#define MPIR_COLL_ATTR_MPIR_BITS 32 + +/* bit 0-7 */ +#define MPIR_COLL_ATTR_SYNC 0x1 /* It's an internal collective that focuses + * on synchronization rather than batch latency. + * In particular, advise netmod to avoid using + * injection send. */ +#define MPIR_ERR_PROC_FAILED 0x2 +#define MPIR_ERR_OTHER 0x4 +#define MPIR_COLL_ATTR_ERR_MASK 0x6 + +#define MPIR_COLL_ATTR_HAS_ERR(coll_attr) ((coll_attr) & MPIR_COLL_ATTR_ERR_MASK) + +/* bit 8-31, MPIR-layer */ +#define MPIR_COLL_ATTR__inplace 0x00000100 +#define MPIR_COLL_ATTR__pof2 0x00000200 +#define MPIR_COLL_ATTR__commutative 0x00000400 +#define MPIR_COLL_ATTR__builtin_op 0x00000800 struct MPIR_Csel_coll_sig { MPIR_Csel_coll_type_e coll_type; @@ -75,6 +45,10 @@ struct MPIR_Csel_coll_sig { enum MPIR_sched_type sched_type; bool is_persistent; + struct { + bool is_gpu; + } cache; + union { struct { const void *sendbuf; @@ -215,208 +189,6 @@ struct MPIR_Csel_coll_sig { } u; }; -struct MPII_Csel_container { - MPII_Csel_container_type_e id; - - union { - struct { - struct { - int k; - } intra_tsp_brucks; - struct { - int k; - } intra_tsp_recexch_doubling; - struct { - int k; - } intra_tsp_recexch_halving; - } iallgather; - struct { - struct { - int k; - } intra_tsp_brucks; - struct { - int k; - } intra_tsp_recexch_doubling; - struct { - int k; - } intra_tsp_recexch_halving; - } iallgatherv; - struct { - struct { - int k; - } intra_tsp_recexch_single_buffer; - struct { - int k; - } intra_tsp_recexch; - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - } intra_tsp_tree; - struct { - int k; - } intra_tsp_recexch_reduce_scatter_recexch_allgatherv; - } iallreduce; - struct { - struct { - int k; - int buffer_per_phase; - } intra_tsp_brucks; - struct { - int batch_size; - int bblock; - } intra_tsp_scattered; - } ialltoall; - struct { - struct { - int batch_size; - int bblock; - } intra_tsp_scattered; - struct { - int bblock; - } intra_tsp_blocked; - } ialltoallv; - struct { - struct { - int bblock; - } intra_tsp_blocked; - } ialltoallw; - struct { - struct { - int k; - } intra_k_dissemination; - struct { - int k; - bool single_phase_recv; - } intra_recexch; - } barrier; - struct { - struct { - int k; - } intra_tsp_recexch; - struct { - int k; - } intra_tsp_k_dissemination; - } ibarrier; - struct { - struct { - int tree_type; - int k; - int chunk_size; - } intra_tsp_tree; - struct { - int chunk_size; - } intra_tsp_ring; - struct { - int scatterv_k; - int allgatherv_k; - } intra_tsp_scatterv_allgatherv; - struct { - int scatterv_k; - } intra_tsp_scatterv_ring_allgatherv; - } ibcast; - struct { - struct { - int tree_type; - int k; - int is_non_blocking; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tree; - struct { - int tree_type; - int k; - int is_non_blocking; - int chunk_size; - int recv_pre_posted; - } intra_pipelined_tree; - } bcast; - struct { - struct { - int k; - } intra_k_brucks; - struct { - int k; - bool single_phase_recv; - } intra_recexch_doubling; - struct { - int k; - bool single_phase_recv; - } intra_recexch_halving; - } allgather; - struct { - struct { - int k; - } intra_k_brucks; - } alltoall; - struct { - struct { - int k; - } intra_tsp_tree; - } igather; - struct { - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tsp_tree; - struct { - int chunk_size; - int buffer_per_child; - } intra_tsp_ring; - } ireduce; - struct { - struct { - int k; - } intra_tsp_recexch; - } ireduce_scatter; - struct { - struct { - int k; - } intra_tsp_recexch; - } ireduce_scatter_block; - struct { - struct { - int k; - } intra_recursive_multiplying; - struct { - int tree_type; - int k; - int chunk_size; - int buffer_per_child; - int topo_overhead; - int topo_diff_groups; - int topo_diff_switches; - int topo_same_switches; - } intra_tree; - struct { - int k; - bool single_phase_recv; - } intra_recexch; - struct { - int k; - bool single_phase_recv; - } intra_k_reduce_scatter_allgather; - struct { - int ccl; - } intra_ccl; - } allreduce; - struct { - struct { - int k; - } intra_tsp_tree; - } iscatter; - } u; -}; - typedef int (*MPIR_Coll_algo_fn) (MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt); void MPIR_Init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig); diff --git a/src/include/mpir_misc.h b/src/include/mpir_misc.h index 6743412eff4..5c5d9c1c16d 100644 --- a/src/include/mpir_misc.h +++ b/src/include/mpir_misc.h @@ -14,19 +14,6 @@ #define MPIR_FINALIZE_CALLBACK_DEFAULT_PRIO 0 #define MPIR_FINALIZE_CALLBACK_MAX_PRIO 10 -/* Define values for collective attribute. Collective attributes pass - * down contexts including error flags. - */ -#define MPIR_COLL_ATTR_SYNC 0x1 /* It's an internal collective that focuses - * on synchronization rather than batch latency. - * In particular, advise netmod to avoid using - * injection send. */ -#define MPIR_ERR_PROC_FAILED 0x2 -#define MPIR_ERR_OTHER 0x4 -#define MPIR_COLL_ATTR_ERR_MASK 0x6 - -#define MPIR_COLL_ATTR_HAS_ERR(coll_attr) ((coll_attr) & MPIR_COLL_ATTR_ERR_MASK) - /*E MPIR_Lang_t - Known language bindings for MPI diff --git a/src/mpi/coll/include/coll_csel.h b/src/mpi/coll/include/coll_csel.h index 6a5a9385bf5..53767656143 100644 --- a/src/mpi/coll/include/coll_csel.h +++ b/src/mpi/coll/include/coll_csel.h @@ -8,18 +8,302 @@ #include "json.h" -int MPIR_Csel_create_from_file(const char *json_file, - void *(*create_container) (struct json_object *), void **csel); -int MPIR_Csel_create_from_buf(const char *json, - void *(*create_container) (struct json_object *), void **csel); +typedef struct csel_node { + MPIR_Csel_node_type_e type; + union { + struct { + MPIR_Csel_coll_type_e coll_type; + } collective; + struct { + bool negate; + int thresh; + } condition; + MPII_Csel_container_s *container; + } u; + struct csel_node *success; + struct csel_node *failure; +} MPIR_Csel_node_s; + +extern MPIR_Coll_algo_fn *MPIR_Coll_algo_table; +extern int *MPIR_Coll_cvar_table; +extern const char **MPIR_Coll_type_names; +extern const char **MPIR_Coll_algo_names; +extern const char **MPIR_Csel_condition_names; + +int MPIR_Csel_create_from_file(const char *json_file, void **csel); +int MPIR_Csel_create_from_buf(const char *json, void **csel); int MPIR_Csel_free(void *csel); -void *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); +MPII_Csel_container_s *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig); +void MPIR_Csel_print_tree(MPIR_Csel_node_s * node, int level); + +MPL_STATIC_INLINE_PREFIX int MPIR_Csel_comm_size(MPIR_Csel_coll_sig_s * coll_sig) +{ + /* FIXME: update when we have intercomm algorithms that need select on comm_size */ + MPIR_Assert(coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); + return coll_sig->comm_ptr->local_size; +} + +#define COLL_TYPE_ALL_CASE(NAME) \ + case MPIR_CSEL_COLL_TYPE__INTRA_ ## NAME: \ + case MPIR_CSEL_COLL_TYPE__INTRA_I ## NAME: \ + case MPIR_CSEL_COLL_TYPE__INTER_ ## NAME: \ + case MPIR_CSEL_COLL_TYPE__INTER_I ## NAME + +MPL_STATIC_INLINE_PREFIX MPI_Aint MPIR_Csel_avg_msg_size(MPIR_Csel_coll_sig_s * coll_sig) +{ + MPI_Aint msgsize = 0; + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(ALLREDUCE): + MPIR_Datatype_get_size_macro(coll_sig->u.allreduce.datatype, msgsize); + msgsize *= coll_sig->u.allreduce.count; + break; + COLL_TYPE_ALL_CASE(BCAST): + MPIR_Datatype_get_size_macro(coll_sig->u.bcast.datatype, msgsize); + msgsize *= coll_sig->u.bcast.count; + break; + COLL_TYPE_ALL_CASE(REDUCE): + MPIR_Datatype_get_size_macro(coll_sig->u.reduce.datatype, msgsize); + msgsize *= coll_sig->u.reduce.count; + break; + COLL_TYPE_ALL_CASE(ALLTOALL): + MPIR_Datatype_get_size_macro(coll_sig->u.alltoall.sendtype, msgsize); + msgsize *= coll_sig->u.alltoall.sendcount; + break; + default: + fprintf(stderr, "avg_msg_size not defined for coll_type %d\n", coll_sig->coll_type); + MPIR_Assert(0); + break; + } + return msgsize; +} + +MPL_STATIC_INLINE_PREFIX MPI_Aint MPIR_Csel_total_msg_size(MPIR_Csel_coll_sig_s * coll_sig) +{ + MPI_Aint total_bytes = 0; + int comm_size = coll_sig->comm_ptr->local_size; + + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(ALLREDUCE): + MPIR_Datatype_get_size_macro(coll_sig->u.allreduce.datatype, total_bytes); + total_bytes *= coll_sig->u.allreduce.count * comm_size; + break; + COLL_TYPE_ALL_CASE(BCAST): + MPIR_Datatype_get_size_macro(coll_sig->u.bcast.datatype, total_bytes); + total_bytes *= coll_sig->u.bcast.count * comm_size; + break; + COLL_TYPE_ALL_CASE(REDUCE): + MPIR_Datatype_get_size_macro(coll_sig->u.reduce.datatype, total_bytes); + total_bytes *= coll_sig->u.reduce.count * comm_size; + break; + COLL_TYPE_ALL_CASE(ALLTOALL): + MPIR_Datatype_get_size_macro(coll_sig->u.alltoall.sendtype, total_bytes); + total_bytes *= coll_sig->u.alltoall.sendcount * comm_size; + break; + COLL_TYPE_ALL_CASE(ALLTOALLV): + MPIR_Datatype_get_size_macro(coll_sig->u.alltoallv.sendtype, total_bytes); + { + MPI_Aint count = 0; + for (int i = 0; i < comm_size; i++) { + count += coll_sig->u.alltoallv.sendcounts[i]; + } + total_bytes *= count; + } + break; + COLL_TYPE_ALL_CASE(ALLTOALLW): + for (int i = 0; i < comm_size; i++) { + MPI_Aint typesize; + MPIR_Datatype_get_size_macro(coll_sig->u.alltoallw.sendtypes[i], typesize); + total_bytes += (coll_sig->u.alltoallw.sendcounts[i] * typesize); + } + break; + COLL_TYPE_ALL_CASE(ALLGATHER): + MPIR_Datatype_get_size_macro(coll_sig->u.allgather.recvtype, total_bytes); + total_bytes *= coll_sig->u.allgather.recvcount * comm_size; + break; + COLL_TYPE_ALL_CASE(ALLGATHERV): + MPIR_Datatype_get_size_macro(coll_sig->u.allgatherv.recvtype, total_bytes); + { + MPI_Aint count = 0; + for (int i = 0; i < comm_size; i++) { + count += coll_sig->u.allgatherv.recvcounts[i]; + } + total_bytes *= count; + } + break; + COLL_TYPE_ALL_CASE(GATHER): + if (coll_sig->u.gather.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig->u.gather.recvtype, total_bytes); + /* use remote size for intercomm root */ + if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig->u.gather.recvcount * (coll_sig->comm_ptr->remote_size); + else + total_bytes = coll_sig->u.gather.recvcount * comm_size; + } else { + MPIR_Datatype_get_size_macro(coll_sig->u.gather.sendtype, total_bytes); + total_bytes = coll_sig->u.gather.sendcount * comm_size; + } + break; + COLL_TYPE_ALL_CASE(SCATTER): + if (coll_sig->u.scatter.root == MPI_ROOT) { + MPIR_Datatype_get_size_macro(coll_sig->u.scatter.sendtype, total_bytes); + /* use remote size for intercomm root */ + if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) + total_bytes = coll_sig->u.scatter.sendcount * (coll_sig->comm_ptr->remote_size); + else + total_bytes = coll_sig->u.scatter.sendcount * comm_size; + } else { + MPIR_Datatype_get_size_macro(coll_sig->u.scatter.recvtype, total_bytes); + total_bytes = coll_sig->u.scatter.recvcount * comm_size; + } + break; + COLL_TYPE_ALL_CASE(REDUCE_SCATTER): + MPIR_Datatype_get_size_macro(coll_sig->u.reduce_scatter.datatype, total_bytes); + { + MPI_Aint count = 0; + for (int i = 0; i < comm_size; i++) { + count += coll_sig->u.reduce_scatter.recvcounts[i]; + } + total_bytes *= count; + } + break; + COLL_TYPE_ALL_CASE(REDUCE_SCATTER_BLOCK): + MPIR_Datatype_get_size_macro(coll_sig->u.reduce_scatter_block.datatype, total_bytes); + total_bytes *= coll_sig->u.reduce_scatter_block.recvcount * comm_size; + break; + default: + MPIR_Assert(0); + break; + } + + return total_bytes; +} + +/* boolean csel checker_functions */ +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_comm_size_is_pof2(MPIR_Csel_coll_sig_s * coll_sig) +{ + return MPL_is_pof2(coll_sig->comm_ptr->local_size); +} + +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_count_ge_pof2(MPIR_Csel_coll_sig_s * coll_sig) +{ + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(REDUCE): + return (coll_sig->u.reduce.count >= MPL_pof2(coll_sig->comm_ptr->local_size)); + COLL_TYPE_ALL_CASE(ALLREDUCE): + return (coll_sig->u.allreduce.count >= MPL_pof2(coll_sig->comm_ptr->local_size)); + default: + printf("MPIR_Csel_count_ge_pof2: unsupported coll_type: %s\n", + MPIR_Coll_type_names[coll_sig->coll_type]); + MPIR_Assert(0); + return false; + } +} + +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_block_regular(MPIR_Csel_coll_sig_s * coll_sig) +{ + const MPI_Aint *counts = NULL; + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(REDUCE_SCATTER): + counts = coll_sig->u.reduce_scatter.recvcounts; + break; + default: + MPIR_Assert(0); + return false; + } + + for (int i = 1; i < coll_sig->comm_ptr->local_size; i++) { + if (counts[i] != counts[0]) { + return false; + } + } + return true; +} -void *MPII_Create_container(struct json_object *obj); +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_displs_ordered(MPIR_Csel_coll_sig_s * coll_sig) +{ + const MPI_Aint *counts = NULL; + const MPI_Aint *displs = NULL; + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(ALLGATHERV): + counts = coll_sig->u.allgatherv.recvcounts; + displs = coll_sig->u.allgatherv.displs; + break; + default: + return false; + } + + MPI_Aint pos = 0; + for (int i = 0; i < coll_sig->comm_ptr->local_size; i++) { + if (pos != displs[i]) + return false; + pos += counts[i]; + } + return true; +} + +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_sendbuf_inplace(MPIR_Csel_coll_sig_s * coll_sig) +{ + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(ALLTOALL): + return (coll_sig->u.alltoall.sendbuf == MPI_IN_PLACE); + COLL_TYPE_ALL_CASE(ALLTOALLV): + return (coll_sig->u.alltoallv.sendbuf == MPI_IN_PLACE); + COLL_TYPE_ALL_CASE(ALLTOALLW): + return (coll_sig->u.alltoallw.sendbuf == MPI_IN_PLACE); + default: + fprintf(stderr, "is_sendbuf_inplace not defined for coll_type %d\n", + coll_sig->coll_type); + MPIR_Assert(0); + return false; + } +} + +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_op_is_builtin(MPIR_Csel_coll_sig_s * coll_sig) +{ + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(ALLREDUCE): + return HANDLE_IS_BUILTIN(coll_sig->u.allreduce.op); + COLL_TYPE_ALL_CASE(REDUCE): + return HANDLE_IS_BUILTIN(coll_sig->u.reduce.op); + break; + default: + fprintf(stderr, "is_op_builtin not defined for coll_type %d\n", coll_sig->coll_type); + MPIR_Assert(0); + return false; + } +} + +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_op_is_commutative(MPIR_Csel_coll_sig_s * coll_sig) +{ + switch (coll_sig->coll_type) { + COLL_TYPE_ALL_CASE(ALLREDUCE): + return MPIR_Op_is_commutative(coll_sig->u.allreduce.op); + COLL_TYPE_ALL_CASE(REDUCE): + return MPIR_Op_is_commutative(coll_sig->u.reduce.op); + COLL_TYPE_ALL_CASE(REDUCE_SCATTER): + return MPIR_Op_is_commutative(coll_sig->u.reduce_scatter.op); + COLL_TYPE_ALL_CASE(REDUCE_SCATTER_BLOCK): + return MPIR_Op_is_commutative(coll_sig->u.reduce_scatter_block.op); + default: + fprintf(stderr, "is_commutative not defined for coll_type %d\n", coll_sig->coll_type); + MPIR_Assert(0); + return false; + } +} + +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_is_hierarchical(MPIR_Csel_coll_sig_s * coll_sig) +{ + return MPIR_Comm_is_parent_comm(coll_sig->comm_ptr); +} + +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_is_node_consecutive(MPIR_Csel_coll_sig_s * coll_sig) +{ + return MPII_Comm_is_node_consecutive(coll_sig->comm_ptr); +} -MPL_STATIC_INLINE_PREFIX void MPIR_init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig) +MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_is_node_canonical(MPIR_Csel_coll_sig_s * coll_sig) { - /* place holder for now */ + return MPII_Comm_is_node_canonical(coll_sig->comm_ptr); } #endif /* COLL_CSEL_H_INCLUDED */ diff --git a/src/mpi/coll/include/coll_impl.h b/src/mpi/coll/include/coll_impl.h index d618d22b68a..b38e9f509ca 100644 --- a/src/mpi/coll/include/coll_impl.h +++ b/src/mpi/coll/include/coll_impl.h @@ -58,8 +58,6 @@ int MPIR_Coll_safe_to_block(void); int MPII_Coll_finalize(void); -void MPIR_Coll_algo_init(void); -void MPIR_Coll_cvar_init(void); /* NOTE: MPIR_Coll_auto is one of the composition container functions. However, * MPIR_Coll_composition_auto is a gate function, thus does not take "cnt" parameter. */ int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig); diff --git a/src/mpi/coll/src/Makefile.mk b/src/mpi/coll/src/Makefile.mk index 1dd1b78b204..91ee6b595b6 100644 --- a/src/mpi/coll/src/Makefile.mk +++ b/src/mpi/coll/src/Makefile.mk @@ -10,5 +10,4 @@ mpi_core_sources += \ src/mpi/coll/src/coll_impl.c \ src/mpi/coll/src/csel.c \ - src/mpi/coll/src/csel_container.c \ src/mpi/coll/src/csel_json_autogen.c diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 52dcb32c933..7efee5259af 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -123,6 +123,14 @@ void *MPIR_Csel_selection = NULL; /* table of all collective algorithms */ MPIR_Coll_algo_fn *MPIR_Coll_algo_table; +/* table of collective algorithm cvars */ +int *MPIR_Coll_cvar_table; + +/* string tables to facilitate parsing and debugging */ +const char **MPIR_Coll_type_names; +const char **MPIR_Coll_algo_names; +const char **MPIR_Csel_condition_names; + MPIR_Tree_type_t get_tree_type_from_string(const char *tree_str) { MPIR_Tree_type_t tree_type = MPIR_TREE_TYPE_KARY; @@ -172,9 +180,9 @@ int get_ccl_from_string(const char *ccl_str) #define LOAD_CSEL_JSON(csel_var, cvar_name, builtin_str) \ do { \ if (!strcmp(cvar_name, "")) { \ - mpi_errno = MPIR_Csel_create_from_buf(builtin_str, MPII_Create_container, &csel_var); \ + mpi_errno = MPIR_Csel_create_from_buf(builtin_str, &csel_var); \ } else { \ - mpi_errno = MPIR_Csel_create_from_file(cvar_name, MPII_Create_container, &csel_var); \ + mpi_errno = MPIR_Csel_create_from_file(cvar_name, &csel_var); \ } \ MPIR_ERR_CHECK(mpi_errno); \ } while (0) @@ -214,16 +222,24 @@ int MPII_Coll_init(void) mpi_errno = MPII_Recexchalgo_init(); MPIR_ERR_CHECK(mpi_errno); + /* FIXME: this is hackish. Define the "num" constants in coll_algos.h */ + MPIR_Coll_cvar_table = MPL_malloc(MPIR_CSEL_NUM_COLL_TYPES * sizeof(int), MPL_MEM_COLL); + MPIR_Coll_type_names = MPL_malloc(MPIR_CSEL_NUM_COLL_TYPES * sizeof(char *), MPL_MEM_COLL); + MPIR_Coll_algo_table = + MPL_malloc(MPIR_CSEL_NUM_ALGORITHMS * sizeof(MPIR_Coll_algo_fn), MPL_MEM_COLL); + MPIR_Coll_algo_names = MPL_malloc(MPIR_CSEL_NUM_ALGORITHMS * sizeof(char *), MPL_MEM_COLL); + MPIR_Csel_condition_names = MPL_malloc(MPIR_CSEL_NUM_CONDITIONS * sizeof(char *), MPL_MEM_COLL); + + MPII_Coll_type_init(); + MPII_Coll_algo_init(); + MPII_Csel_init_condition_names(); + /* initialize selection tree */ LOAD_CSEL_JSON(MPIR_Csel_composition, MPIR_CVAR_COLL_COMPOSITION_JSON_FILE, MPII_coll_composition_json); LOAD_CSEL_JSON(MPIR_Csel_selection, MPIR_CVAR_COLL_SELECTION_JSON_FILE, MPII_coll_selection_json); - MPIR_Coll_algo_table = MPL_malloc(MPII_CSEL_CONTAINER_TYPE__ALGORITHM__Algorithm_count * - sizeof(MPIR_Coll_algo_fn), MPL_MEM_COLL); - MPIR_Coll_algo_init(); - fn_exit: return mpi_errno; fn_fail: @@ -251,6 +267,10 @@ int MPII_Coll_finalize(void) MPIR_ERR_CHECK(mpi_errno); MPL_free(MPIR_Coll_algo_table); + MPL_free(MPIR_Coll_cvar_table); + MPL_free(MPIR_Coll_algo_names); + MPL_free(MPIR_Coll_type_names); + MPL_free(MPIR_Csel_condition_names); fn_exit: return mpi_errno; @@ -398,12 +418,6 @@ void MPIR_Coll_host_buffer_persist_set(void *host_sendbuf, void *host_recvbuf, v } } -void MPIR_Coll_algo_init(void) -{ - MPIR_COLL_SET_ALGO_TABLE(); - MPIR_Coll_algo_table[MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Coll_auto] = MPIR_Coll_auto; -} - int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig) { int mpi_errno = MPI_SUCCESS; @@ -426,6 +440,25 @@ int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me) { int mpi_errno = MPI_SUCCESS; + /* First check whether user has set an algorithm CVAR */ + int coll_type = coll_sig->coll_type; + int cvar_val = MPIR_Coll_cvar_table[coll_type]; + if (cvar_val) { + int algo_id = MPIR_Coll_cvar_to_algo_id(coll_type, cvar_val); + bool restriction_ok = MPIR_Coll_check_algo_restriction(coll_sig, algo_id); + + if (restriction_ok) { + MPII_Csel_container_s algo_cnt; + MPIR_Coll_init_algo_container(coll_sig, algo_id, &algo_cnt); + mpi_errno = MPIR_Coll_algo_table[algo_id] (coll_sig, &algo_cnt); + MPIR_ERR_CHECK(mpi_errno); + goto fn_exit; + } else { + /* Error or Fall-thru */ + } + } + + /* Search an algorithm by Csel */ MPII_Csel_container_s *cnt = MPIR_Csel_search(MPIR_Csel_selection, coll_sig); MPIR_ERR_CHKANDJUMP(!cnt, mpi_errno, MPI_ERR_OTHER, "**csel_noresult"); diff --git a/src/mpi/coll/src/csel.c b/src/mpi/coll/src/csel.c index 3804535d165..684307d9ece 100644 --- a/src/mpi/coll/src/csel.c +++ b/src/mpi/coll/src/csel.c @@ -11,318 +11,57 @@ #include #include -typedef enum { - /* global operator types */ - CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED = 0, - - /* comm-specific operator types */ - CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTRA, - CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTER, - - CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LE, - CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LT, - CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_NODE_COMM_SIZE, - CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_POW2, - - CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY, - CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE, - - CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LE, - CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LT, - - /* collective selection operator */ - CSEL_NODE_TYPE__OPERATOR__COLLECTIVE, - - /* message-specific operator types */ - CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LE, - CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LT, - CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LE, - CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LT, - - CSEL_NODE_TYPE__OPERATOR__COUNT_LE, - CSEL_NODE_TYPE__OPERATOR__COUNT_LT_POW2, - - CSEL_NODE_TYPE__OPERATOR__IS_SBUF_INPLACE, - CSEL_NODE_TYPE__OPERATOR__IS_BLOCK_REGULAR, - CSEL_NODE_TYPE__OPERATOR__IS_COMMUTATIVE, - CSEL_NODE_TYPE__OPERATOR__IS_OP_BUILT_IN, - - /* any - has to be the last branch in an array */ - CSEL_NODE_TYPE__OPERATOR__ANY, - - /* container type */ - CSEL_NODE_TYPE__CONTAINER, -} csel_node_type_e; - -typedef struct csel_node { - csel_node_type_e type; - - union { - /* global types */ - struct { - int val; - } is_multi_threaded; - - /* comm-specific operator types */ - struct { - int val; - } comm_size_le; - struct { - int val; - } comm_size_lt; - - /* collective selection operator */ - struct { - MPIR_Csel_coll_type_e coll_type; - } collective; - - /* message-specific operator types */ - struct { - int val; - } avg_msg_size_le; - struct { - int val; - } avg_msg_size_lt; - struct { - int val; - } total_msg_size_le; - struct { - int val; - } total_msg_size_lt; - struct { - int val; - } count_le; - struct { - bool val; - } is_commutative; - struct { - bool val; - } is_sbuf_inplace; - struct { - bool val; - } is_op_built_in; - struct { - bool val; - } is_block_regular; - struct { - bool val; - } is_node_consecutive; - struct { - int val; - } comm_avg_ppn_le; - struct { - int val; - } comm_avg_ppn_lt; - struct { - bool val; - } comm_hierarchy; - struct { - void *container; - } cnt; - } u; - - struct csel_node *success; - struct csel_node *failure; -} csel_node_s; - -static int nesting = -1; -#define nprintf(...) \ - do { \ - for (int i = 0; i < nesting; i++) \ - printf(" "); \ - printf(__VA_ARGS__); \ - } while (0) - -static void print_tree(csel_node_s * node) ATTRIBUTE((unused)); -static void print_tree(csel_node_s * node) -{ - nesting++; - - if (node == NULL) - return; - - switch (node->type) { - case CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED: - nprintf("MPI library is multithreaded\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTRA: - nprintf("comm_type is intra\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTER: - nprintf("comm_type is inter\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__COLLECTIVE: - nprintf("collective: %d\n", node->u.collective.coll_type); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LE: - nprintf("comm_size <= %d\n", node->u.comm_size_le.val); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LT: - nprintf("comm_size < %d\n", node->u.comm_size_lt.val); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_POW2: - nprintf("comm_size is power-of-two\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_NODE_COMM_SIZE: - nprintf("comm_size is the same as node_comm_size\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LE: - nprintf("avg_msg_size <= %d\n", node->u.avg_msg_size_le.val); - break; - case CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LT: - nprintf("avg_msg_size < %d\n", node->u.avg_msg_size_lt.val); - break; - case CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LE: - nprintf("total_msg_size <= %d\n", node->u.total_msg_size_le.val); - break; - case CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LT: - nprintf("total_msg_size < %d\n", node->u.total_msg_size_lt.val); - break; - case CSEL_NODE_TYPE__CONTAINER: - nprintf("container\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__COUNT_LE: - nprintf("count <= %d\n", node->u.count_le.val); - break; - case CSEL_NODE_TYPE__OPERATOR__COUNT_LT_POW2: - nprintf("count < nearest power-of-two less than comm size\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__IS_SBUF_INPLACE: - nprintf("source buffer is MPI_IN_PLACE\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__IS_BLOCK_REGULAR: - nprintf("all blocks have the same count\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY: - if (node->u.comm_hierarchy.val) - nprintf("communicator has hierarchical structure\n"); - else - nprintf("communicator does not have hierarchical structure\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE: - nprintf("process ranks are consecutive on the node\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LE: - nprintf("communicator's avg ppn <= %d\n", node->u.comm_avg_ppn_le.val); - break; - case CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LT: - nprintf("communicator's avg ppn < %d\n", node->u.comm_avg_ppn_lt.val); - break; - case CSEL_NODE_TYPE__OPERATOR__IS_COMMUTATIVE: - if (node->u.is_commutative.val == true) - nprintf("operation is commutative\n"); - else - nprintf("operation is not commutative\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__IS_OP_BUILT_IN: - nprintf("other operators\n"); - break; - case CSEL_NODE_TYPE__OPERATOR__ANY: - nprintf("any\n"); - break; - default: - nprintf("unknown operator\n"); - MPIR_Assert(0); - } - - if (node->type != CSEL_NODE_TYPE__CONTAINER) { - print_tree(node->success); - if (node->failure) { - nesting--; - print_tree(node->failure); - nesting++; - } - } - - nesting--; -} - -static void validate_tree(csel_node_s * node) -{ - static int coll = -1; - - /* if we reached a leaf node, we are done */ - if (node->type == CSEL_NODE_TYPE__CONTAINER) - return; - - /* if we see the collective type, store it */ - if (node->type == CSEL_NODE_TYPE__OPERATOR__COLLECTIVE) - coll = node->u.collective.coll_type; - - /* success path should never be NULL */ - if (node->success == NULL) { - fprintf(stderr, "unexpected NULL success path for coll %d\n", coll); - MPIR_Assert(0); - } else { - validate_tree(node->success); - } - - if (node->type == CSEL_NODE_TYPE__OPERATOR__ANY) { - /* for "ANY"-style operators, the failure path must be NULL */ - if (node->failure) { - fprintf(stderr, "unexpected non-NULL failure path for coll %d\n", coll); - MPIR_Assert(0); - } - } else if (node->type != CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED && - node->type != CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTRA && - node->type != CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTER && - node->type != CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY && - node->type != CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE && - node->type != CSEL_NODE_TYPE__OPERATOR__IS_SBUF_INPLACE && - node->type != CSEL_NODE_TYPE__OPERATOR__IS_BLOCK_REGULAR && - node->type != CSEL_NODE_TYPE__OPERATOR__IS_COMMUTATIVE && - node->type != CSEL_NODE_TYPE__OPERATOR__IS_OP_BUILT_IN && - node->type != CSEL_NODE_TYPE__OPERATOR__COLLECTIVE) { - /* for boolean types, the failure path might or might not be - * NULL, but for everything else, the failure path must not be - * NULL */ - if (node->failure == NULL) { - fprintf(stderr, "unexpected NULL failure path for coll %d\n", coll); - MPIR_Assert(0); - } - } - - if (node->success) - validate_tree(node->success); - if (node->failure) - validate_tree(node->failure); -} +static void free_tree(MPIR_Csel_node_s * node); static bool key_is_any(const char *ckey) { int len = strlen(ckey); - if (strcmp(ckey, "any") == 0 || strcmp(ckey + len - 4, "=any") == 0) { + if (strcmp(ckey, "any") == 0 || strcmp(ckey + len - 5, "(any)") == 0) { return true; } else { return false; } } -static csel_node_s *parse_json_tree(struct json_object *obj, - void *(*create_container) (struct json_object *)) +static MPIR_Csel_node_s *parse_json_tree(struct json_object *obj) { enum json_type type ATTRIBUTE((unused)); - csel_node_s *prevnode = NULL, *tmp, *node = NULL; + MPIR_Csel_node_s *prevnode = NULL, *tmp, *node = NULL; json_object_object_foreach(obj, key, val) { type = json_object_get_type(val); MPIR_Assert(type == json_type_object); - char *ckey = MPL_strdup_no_spaces(key); + const char *ckey = key; + + tmp = MPL_calloc(sizeof(MPIR_Csel_node_s), 1, MPL_MEM_COLL); + + if (!strncmp(ckey, "algorithm=", strlen("algorithm="))) { + const char *s = ckey + strlen("algorithm="); - tmp = MPL_malloc(sizeof(csel_node_s), MPL_MEM_COLL); + MPII_Csel_container_s *cnt = MPL_calloc(sizeof(MPII_Csel_container_s), 1, MPL_MEM_COLL); + cnt->id = MPIR_CSEL_NUM_ALGORITHMS; + for (int i = 0; i < MPIR_CSEL_NUM_ALGORITHMS; i++) { + if (!strcmp(s, MPIR_Coll_algo_names[i])) { + cnt->id = i; + break; + } + } + if (cnt->id == MPIR_CSEL_NUM_ALGORITHMS) { + printf("parse_json_tree: unrecognized algorithm %s\n", s); + return NULL; + } + + MPII_Csel_parse_container_params(val, cnt); - if (!strncmp(ckey, "composition=", strlen("composition=")) || - !strncmp(ckey, "algorithm=", strlen("algorithm="))) { tmp->type = CSEL_NODE_TYPE__CONTAINER; - tmp->u.cnt.container = create_container(obj); - MPL_free(ckey); + tmp->u.container = cnt; return tmp; } /* this node must be an operator type */ - tmp->success = parse_json_tree(json_object_object_get(obj, key), create_container); + tmp->success = parse_json_tree(json_object_object_get(obj, key)); tmp->failure = NULL; if (node == NULL) @@ -331,224 +70,55 @@ static csel_node_s *parse_json_tree(struct json_object *obj, prevnode->failure = tmp; prevnode = tmp; - if (!strcmp(ckey, "is_multi_threaded=yes")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED; - tmp->u.is_multi_threaded.val = true; - } else if (!strcmp(ckey, "is_multi_threaded=no")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED; - tmp->u.is_multi_threaded.val = false; - } else if (!strcmp(ckey, "comm_type=intra")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTRA; - } else if (!strcmp(ckey, "comm_type=inter")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTER; - } else if (!strncmp(ckey, "collective=", strlen("collective="))) { + if (!strncmp(ckey, "collective=", strlen("collective="))) { tmp->type = CSEL_NODE_TYPE__OPERATOR__COLLECTIVE; - - char *str = ckey + strlen("collective="); - if (!strcmp(str, "allgather")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ALLGATHER; - else if (!strcmp(str, "allgatherv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ALLGATHERV; - else if (!strcmp(str, "allreduce")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ALLREDUCE; - else if (!strcmp(str, "alltoall")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALL; - else if (!strcmp(str, "alltoallv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALLV; - else if (!strcmp(str, "alltoallw")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ALLTOALLW; - else if (!strcmp(str, "barrier")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__BARRIER; - else if (!strcmp(str, "bcast")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__BCAST; - else if (!strcmp(str, "exscan")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__EXSCAN; - else if (!strcmp(str, "gather")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__GATHER; - else if (!strcmp(str, "gatherv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__GATHERV; - else if (!strcmp(str, "iallgather")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IALLGATHER; - else if (!strcmp(str, "iallgatherv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IALLGATHERV; - else if (!strcmp(str, "iallreduce")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IALLREDUCE; - else if (!strcmp(str, "ialltoall")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IALLTOALL; - else if (!strcmp(str, "ialltoallv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IALLTOALLV; - else if (!strcmp(str, "ialltoallw")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IALLTOALLW; - else if (!strcmp(str, "ibarrier")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IBARRIER; - else if (!strcmp(str, "ibcast")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IBCAST; - else if (!strcmp(str, "iexscan")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IEXSCAN; - else if (!strcmp(str, "igather")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IGATHER; - else if (!strcmp(str, "igatherv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IGATHERV; - else if (!strcmp(str, "ineighbor_allgather")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHER; - else if (!strcmp(str, "ineighbor_allgatherv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLGATHERV; - else if (!strcmp(str, "ineighbor_alltoall")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALL; - else if (!strcmp(str, "ineighbor_alltoallv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLV; - else if (!strcmp(str, "ineighbor_alltoallw")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__INEIGHBOR_ALLTOALLW; - else if (!strcmp(str, "ireduce")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IREDUCE; - else if (!strcmp(str, "ireduce_scatter")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER; - else if (!strcmp(str, "ireduce_scatter_block")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK; - else if (!strcmp(str, "iscan")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ISCAN; - else if (!strcmp(str, "iscatter")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ISCATTER; - else if (!strcmp(str, "iscatterv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__ISCATTERV; - else if (!strcmp(str, "neighbor_allgather")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHER; - else if (!strcmp(str, "neighbor_allgatherv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLGATHERV; - else if (!strcmp(str, "neighbor_alltoall")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALL; - else if (!strcmp(str, "neighbor_alltoallv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLV; - else if (!strcmp(str, "neighbor_alltoallw")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__NEIGHBOR_ALLTOALLW; - else if (!strcmp(str, "reduce")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__REDUCE; - else if (!strcmp(str, "reduce_scatter")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER; - else if (!strcmp(str, "reduce_scatter_block")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK; - else if (!strcmp(str, "scan")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__SCAN; - else if (!strcmp(str, "scatter")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__SCATTER; - else if (!strcmp(str, "scatterv")) - tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__SCATTERV; - else { - MPIR_Assert(0); + const char *s = ckey + strlen("collective="); + tmp->u.collective.coll_type = MPIR_CSEL_COLL_TYPE__END; + for (int i = 0; i < MPIR_CSEL_NUM_COLL_TYPES; i++) { + if (!strcmp(s, MPIR_Coll_type_names[i])) { + tmp->u.collective.coll_type = i; + break; + } } - } else if (!strcmp(ckey, "comm_size=pow2")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_POW2; - } else if (!strcmp(ckey, "comm_size=node_comm_size")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_NODE_COMM_SIZE; - } else if (!strncmp(ckey, "comm_size<=", strlen("comm_size<="))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LE; - tmp->u.comm_size_le.val = atoi(ckey + strlen("comm_size<=")); - } else if (!strncmp(ckey, "comm_size<", strlen("comm_size<"))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LT; - tmp->u.comm_size_lt.val = atoi(ckey + strlen("comm_size<")); - } else if (!strncmp(ckey, "count<=", strlen("count<="))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COUNT_LE; - tmp->u.count_le.val = atoi(ckey + strlen("count<=")); - } else if (!strcmp(ckey, "counttype = CSEL_NODE_TYPE__OPERATOR__COUNT_LT_POW2; - } else if (!strncmp(ckey, "avg_msg_size<=", strlen("avg_msg_size<="))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LE; - tmp->u.avg_msg_size_le.val = atoi(ckey + strlen("avg_msg_size<=")); - } else if (!strncmp(ckey, "avg_msg_size<", strlen("avg_msg_size<"))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LT; - tmp->u.avg_msg_size_lt.val = atoi(ckey + strlen("avg_msg_size<")); - } else if (!strncmp(ckey, "total_msg_size<=", strlen("total_msg_size<="))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LE; - tmp->u.total_msg_size_le.val = atoi(ckey + strlen("total_msg_size<=")); - } else if (!strncmp(ckey, "total_msg_size<", strlen("total_msg_size<"))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LT; - tmp->u.total_msg_size_lt.val = atoi(ckey + strlen("total_msg_size<")); - } else if (!strcmp(ckey, "is_commutative=yes")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_COMMUTATIVE; - tmp->u.is_commutative.val = true; - } else if (!strcmp(ckey, "is_commutative=no")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_COMMUTATIVE; - tmp->u.is_commutative.val = false; - } else if (!strcmp(ckey, "is_sendbuf_inplace=yes")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_SBUF_INPLACE; - tmp->u.is_sbuf_inplace.val = true; - } else if (!strcmp(ckey, "is_sendbuf_inplace=no")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_SBUF_INPLACE; - tmp->u.is_sbuf_inplace.val = false; - } else if (!strcmp(ckey, "is_op_built_in=yes")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_OP_BUILT_IN; - tmp->u.is_op_built_in.val = true; - } else if (!strcmp(ckey, "is_op_built_in=no")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_OP_BUILT_IN; - tmp->u.is_op_built_in.val = false; - } else if (!strcmp(ckey, "is_block_regular=yes")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_BLOCK_REGULAR; - tmp->u.is_block_regular.val = true; - } else if (!strcmp(ckey, "is_block_regular=no")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_BLOCK_REGULAR; - tmp->u.is_block_regular.val = false; - } else if (!strcmp(ckey, "is_node_consecutive=yes")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE; - tmp->u.is_node_consecutive.val = true; - } else if (!strcmp(ckey, "is_node_consecutive=no")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE; - tmp->u.is_node_consecutive.val = false; - } else if (!strncmp(ckey, "comm_avg_ppn<=", strlen("comm_avg_ppn<="))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LE; - tmp->u.comm_avg_ppn_le.val = atoi(ckey + strlen("comm_avg_ppn<=")); - } else if (!strncmp(ckey, "comm_avg_ppn<", strlen("comm_avg_ppn<"))) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LT; - tmp->u.comm_avg_ppn_le.val = atoi(ckey + strlen("comm_avg_ppn<")); - } else if (!strcmp(ckey, "comm_hierarchy=parent")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY; - tmp->u.comm_hierarchy.val = true; - } else if (!strcmp(ckey, "comm_hierarchy=node_roots")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY; - tmp->u.comm_hierarchy.val = false; - } else if (!strcmp(ckey, "comm_hierarchy=node")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY; - tmp->u.comm_hierarchy.val = false; - } else if (!strcmp(ckey, "comm_hierarchy=flat")) { - tmp->type = CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY; - tmp->u.comm_hierarchy.val = false; + MPIR_Assert(tmp->u.collective.coll_type != MPIR_CSEL_COLL_TYPE__END); } else if (key_is_any(ckey)) { tmp->type = CSEL_NODE_TYPE__OPERATOR__ANY; } else { - fprintf(stderr, "unknown key %s\n", key); - fflush(stderr); - MPIR_Assert(0); + int mpi_errno = MPII_Csel_parse_operator(ckey, tmp); + if (mpi_errno != MPI_SUCCESS) { + printf("parse_json_tree: unknown key %s\n", ckey); + MPIR_Assert(0); + } } - - MPL_free(ckey); } return node; } -int MPIR_Csel_create_from_buf(const char *json, - void *(*create_container) (struct json_object *), void **csel_) +int MPIR_Csel_create_from_buf(const char *json, void **csel_) { + MPIR_Csel_node_s *csel_root = NULL; + struct json_object *tree; tree = json_tokener_parse(json); if (tree == NULL) goto fn_exit; - csel_node_s *csel_root = parse_json_tree(tree, create_container); - if (csel_root) { - validate_tree(csel_root); - } else { - MPIR_Assert(0); - } + csel_root = parse_json_tree(tree); + MPIR_Assert(csel_root); json_object_put(tree); fn_exit: + if (0 && MPIR_Process.rank == 0) { + printf("====\n"); + MPIR_Csel_print_tree(csel_root, 0); + } *csel_ = csel_root; return 0; } -int MPIR_Csel_create_from_file(const char *json_file, - void *(*create_container) (struct json_object *), void **csel_) +int MPIR_Csel_create_from_file(const char *json_file, void **csel_) { int mpi_errno = MPI_SUCCESS; @@ -563,85 +133,31 @@ int MPIR_Csel_create_from_file(const char *json_file, char *json = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - MPIR_Csel_create_from_buf(json, create_container, csel_); + MPIR_Csel_create_from_buf(json, csel_); fn_fail: return mpi_errno; } -static csel_node_s *prune_tree(csel_node_s * root, MPIR_Comm * comm_ptr) +int MPIR_Csel_free(void *csel_root) { - /* Do not prune tree based on CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED, as during init - * MPIR_IS_THREADED is set to 0 temporarily, which results in having incorrect pruned tree */ - for (csel_node_s * node = root; node;) { - switch (node->type) { - case CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTRA: - if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTER: - if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LE: - if (comm_ptr->local_size <= node->u.comm_size_le.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LT: - if (comm_ptr->local_size < node->u.comm_size_lt.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_NODE_COMM_SIZE: - if ((comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY) && comm_ptr->num_external == 1) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_POW2: - if (comm_ptr->local_size & (comm_ptr->local_size - 1)) - node = node->failure; - else - node = node->success; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY: - if (MPIR_Comm_is_parent_comm(comm_ptr) == node->u.comm_hierarchy.val) - node = node->success; - else - node = node->failure; - break; + int mpi_errno = MPI_SUCCESS; - case CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE: - if (MPII_Comm_is_node_consecutive(comm_ptr) == node->u.is_node_consecutive.val) - node = node->success; - else - node = node->failure; - break; + if (csel_root) { + free_tree(csel_root); + } - case CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LE: - if ((comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY) && - comm_ptr->local_size <= node->u.comm_avg_ppn_le.val * comm_ptr->num_external) - node = node->success; - else - node = node->failure; - break; + return mpi_errno; +} - case CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LT: - if ((comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY) && - comm_ptr->local_size < node->u.comm_avg_ppn_le.val * comm_ptr->num_external) +MPII_Csel_container_s *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s * coll_sig) +{ + MPIR_Assert(csel_); + MPIR_Csel_node_s *node = csel_; + while (node) { + switch (node->type) { + case CSEL_NODE_TYPE__OPERATOR__COLLECTIVE: + if (node->u.collective.coll_type == coll_sig->coll_type) node = node->success; else node = node->failure; @@ -651,17 +167,26 @@ static csel_node_s *prune_tree(csel_node_s * root, MPIR_Comm * comm_ptr) node = node->success; break; + case CSEL_NODE_TYPE__CONTAINER: + return node->u.container; + default: - return node; + node = MPII_Csel_run_condition(node, coll_sig); + MPIR_Assert(node); + break; } } - return root; + + MPIR_Assert(0 && "MPIR_Csel_search failed to find an algorithm"); + return NULL; } -static void free_tree(csel_node_s * node) +/* -- internal static routines -- */ + +static void free_tree(MPIR_Csel_node_s * node) { if (node->type == CSEL_NODE_TYPE__CONTAINER) { - MPL_free(node->u.cnt.container); + MPL_free(node->u.container); MPL_free(node); } else { if (node->success) @@ -672,626 +197,39 @@ static void free_tree(csel_node_s * node) } } -int MPIR_Csel_free(void *csel_root) -{ - int mpi_errno = MPI_SUCCESS; - - if (csel_root) { - free_tree(csel_root); - } - - return mpi_errno; -} - -static inline bool is_sendbuf_inplace(MPIR_Csel_coll_sig_s * coll_sig) -{ - bool sendbuf_inplace = false; - switch (coll_sig->coll_type) { - case MPIR_CSEL_COLL_TYPE__ALLTOALL: - sendbuf_inplace = (coll_sig->u.alltoall.sendbuf == MPI_IN_PLACE); - break; - case MPIR_CSEL_COLL_TYPE__IALLTOALL: - sendbuf_inplace = (coll_sig->u.ialltoall.sendbuf == MPI_IN_PLACE); - break; - case MPIR_CSEL_COLL_TYPE__ALLTOALLV: - sendbuf_inplace = (coll_sig->u.alltoallv.sendbuf == MPI_IN_PLACE); - break; - case MPIR_CSEL_COLL_TYPE__IALLTOALLV: - sendbuf_inplace = (coll_sig->u.ialltoallv.sendbuf == MPI_IN_PLACE); - break; - case MPIR_CSEL_COLL_TYPE__ALLTOALLW: - sendbuf_inplace = (coll_sig->u.alltoallw.sendbuf == MPI_IN_PLACE); - break; - case MPIR_CSEL_COLL_TYPE__IALLTOALLW: - sendbuf_inplace = (coll_sig->u.ialltoallw.sendbuf == MPI_IN_PLACE); - break; - default: - fprintf(stderr, "is_sendbuf_inplace not defined for coll_type %d\n", - coll_sig->coll_type); - MPIR_Assert(0); - break; - } - return sendbuf_inplace; -} - -static inline bool is_commutative(MPIR_Csel_coll_sig_s * coll_sig) -{ - bool commutative = false; - switch (coll_sig->coll_type) { - case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - commutative = MPIR_Op_is_commutative(coll_sig->u.allreduce.op); - break; - case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - commutative = MPIR_Op_is_commutative(coll_sig->u.iallreduce.op); - break; - case MPIR_CSEL_COLL_TYPE__REDUCE: - commutative = MPIR_Op_is_commutative(coll_sig->u.reduce.op); - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE: - commutative = MPIR_Op_is_commutative(coll_sig->u.ireduce.op); - break; - case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - commutative = MPIR_Op_is_commutative(coll_sig->u.reduce_scatter.op); - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - commutative = MPIR_Op_is_commutative(coll_sig->u.ireduce_scatter.op); - break; - case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - commutative = MPIR_Op_is_commutative(coll_sig->u.reduce_scatter_block.op); - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - commutative = MPIR_Op_is_commutative(coll_sig->u.ireduce_scatter_block.op); - break; - default: - fprintf(stderr, "is_commutative not defined for coll_type %d\n", coll_sig->coll_type); - MPIR_Assert(0); - break; - } - return commutative; -} - -static inline bool is_op_built_in(MPIR_Csel_coll_sig_s * coll_sig) -{ - bool op_built_in = false; - switch (coll_sig->coll_type) { - case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig->u.allreduce.op) == HANDLE_KIND_BUILTIN; - break; - case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig->u.iallreduce.op) == HANDLE_KIND_BUILTIN; - break; - case MPIR_CSEL_COLL_TYPE__REDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig->u.reduce.op) == HANDLE_KIND_BUILTIN; - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE: - op_built_in = HANDLE_GET_KIND(coll_sig->u.ireduce.op) == HANDLE_KIND_BUILTIN; - break; - default: - fprintf(stderr, "is_op_builtin not defined for coll_type %d\n", coll_sig->coll_type); - MPIR_Assert(0); - break; - } - return op_built_in; -} - -static inline bool is_block_regular(MPIR_Csel_coll_sig_s * coll_sig) -{ - bool is_regular = true; - int i = 0; - const MPI_Aint *recvcounts = NULL; - - switch (coll_sig->coll_type) { - case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - recvcounts = coll_sig->u.reduce_scatter.recvcounts; - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - recvcounts = coll_sig->u.ireduce_scatter.recvcounts; - break; - default: - MPIR_Assert(0); - break; - } - for (i = 0; i < (coll_sig->comm_ptr->local_size - 1); ++i) { - if (recvcounts[i] != recvcounts[i + 1]) { - is_regular = false; - break; - } - } - return is_regular; -} - -static inline MPI_Aint get_avg_msgsize(MPIR_Csel_coll_sig_s * coll_sig) -{ - MPI_Aint msgsize = 0; - - switch (coll_sig->coll_type) { - case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.allreduce.datatype, msgsize); - msgsize *= coll_sig->u.allreduce.count; - break; - - case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.iallreduce.datatype, msgsize); - msgsize *= coll_sig->u.iallreduce.count; - break; - - case MPIR_CSEL_COLL_TYPE__BCAST: - MPIR_Datatype_get_size_macro(coll_sig->u.bcast.datatype, msgsize); - msgsize *= coll_sig->u.bcast.count; - break; - - case MPIR_CSEL_COLL_TYPE__IBCAST: - MPIR_Datatype_get_size_macro(coll_sig->u.ibcast.datatype, msgsize); - msgsize *= coll_sig->u.ibcast.count; - break; - - case MPIR_CSEL_COLL_TYPE__REDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.reduce.datatype, msgsize); - msgsize *= coll_sig->u.reduce.count; - break; - - case MPIR_CSEL_COLL_TYPE__IREDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.ireduce.datatype, msgsize); - msgsize *= coll_sig->u.ireduce.count; - break; - - case MPIR_CSEL_COLL_TYPE__ALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig->u.alltoall.sendtype, msgsize); - msgsize *= coll_sig->u.alltoall.sendcount; - break; - - case MPIR_CSEL_COLL_TYPE__IALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig->u.ialltoall.sendtype, msgsize); - msgsize *= coll_sig->u.ialltoall.sendcount; - break; - - default: - fprintf(stderr, "avg_msg_size not defined for coll_type %d\n", coll_sig->coll_type); - MPIR_Assert(0); - break; - } - - return msgsize; -} - -static inline MPI_Aint get_count(MPIR_Csel_coll_sig_s * coll_sig) +void MPIR_Csel_print_tree(MPIR_Csel_node_s * node, int level) { - MPI_Aint count = 0; - int i = 0; - int comm_size = coll_sig->comm_ptr->local_size; - - switch (coll_sig->coll_type) { - case MPIR_CSEL_COLL_TYPE__BCAST: - count = coll_sig->u.bcast.count; - break; - case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - count = coll_sig->u.allreduce.count; - break; - case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - count = coll_sig->u.iallreduce.count; - break; - case MPIR_CSEL_COLL_TYPE__REDUCE: - count = coll_sig->u.reduce.count; - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE: - count = coll_sig->u.ireduce.count; - break; - case MPIR_CSEL_COLL_TYPE__ALLGATHER: - count = coll_sig->u.allgather.recvcount; - break; - case MPIR_CSEL_COLL_TYPE__ALLGATHERV: - count = 0; - for (i = 0; i < comm_size; i++) - count += coll_sig->u.allgatherv.recvcounts[i]; - break; - case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - for (i = 0; i < comm_size; i++) - count += coll_sig->u.reduce_scatter.recvcounts[i]; - break; - case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - count = coll_sig->u.reduce_scatter_block.recvcount; - break; - case MPIR_CSEL_COLL_TYPE__IALLGATHER: - count = coll_sig->u.iallgather.recvcount; - break; - case MPIR_CSEL_COLL_TYPE__IALLGATHERV: - for (i = 0; i < comm_size; i++) - count += coll_sig->u.iallgatherv.recvcounts[i]; - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - for (i = 0; i < comm_size; i++) - count += coll_sig->u.ireduce_scatter.recvcounts[i]; - break; - case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - count = coll_sig->u.ireduce_scatter_block.recvcount; - break; - default: - MPIR_Assert(0); - break; + for (int i = 0; i < level; i++) { + printf(" "); } - return count; -} - -static inline MPI_Aint get_total_msgsize(MPIR_Csel_coll_sig_s * coll_sig) -{ - MPI_Aint total_bytes = 0, i = 0, count = 0, typesize = 0; - int comm_size = coll_sig->comm_ptr->local_size; - - switch (coll_sig->coll_type) { - case MPIR_CSEL_COLL_TYPE__ALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.allreduce.datatype, total_bytes); - total_bytes *= coll_sig->u.allreduce.count * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__BCAST: - MPIR_Datatype_get_size_macro(coll_sig->u.bcast.datatype, total_bytes); - total_bytes *= coll_sig->u.bcast.count * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__REDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.reduce.datatype, total_bytes); - total_bytes *= coll_sig->u.reduce.count * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__ALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig->u.alltoall.sendtype, total_bytes); - total_bytes *= coll_sig->u.alltoall.sendcount * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__ALLTOALLV: - MPIR_Datatype_get_size_macro(coll_sig->u.alltoallv.sendtype, total_bytes); - count = 0; - for (i = 0; i < comm_size; i++) - count += coll_sig->u.alltoallv.sendcounts[i]; - total_bytes *= count; - break; - - case MPIR_CSEL_COLL_TYPE__ALLTOALLW: - count = 0; - typesize = 0; - for (i = 0; i < comm_size; i++) { - MPIR_Datatype_get_size_macro(coll_sig->u.alltoallw.sendtypes[i], typesize); - count = coll_sig->u.alltoallw.sendcounts[i]; - total_bytes += (count * typesize); - } - break; - - case MPIR_CSEL_COLL_TYPE__ALLGATHER: - MPIR_Datatype_get_size_macro(coll_sig->u.allgather.recvtype, total_bytes); - total_bytes *= coll_sig->u.allgather.recvcount * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__ALLGATHERV: - MPIR_Datatype_get_size_macro(coll_sig->u.allgatherv.recvtype, total_bytes); - count = 0; - for (i = 0; i < comm_size; i++) - count += coll_sig->u.allgatherv.recvcounts[i]; - total_bytes *= count; - break; - - case MPIR_CSEL_COLL_TYPE__GATHER: - if (coll_sig->u.gather.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig->u.gather.recvtype, total_bytes); - /* use remote size for intercomm root */ - if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_sig->u.gather.recvcount * (coll_sig->comm_ptr->remote_size); - else - total_bytes = coll_sig->u.gather.recvcount * comm_size; - } else { - MPIR_Datatype_get_size_macro(coll_sig->u.gather.sendtype, total_bytes); - total_bytes = coll_sig->u.gather.sendcount * comm_size; - } - break; - - case MPIR_CSEL_COLL_TYPE__SCATTER: - if (coll_sig->u.scatter.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig->u.scatter.sendtype, total_bytes); - /* use remote size for intercomm root */ - if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_sig->u.scatter.sendcount * (coll_sig->comm_ptr->remote_size); - else - total_bytes = coll_sig->u.scatter.sendcount * comm_size; - } else { - MPIR_Datatype_get_size_macro(coll_sig->u.scatter.recvtype, total_bytes); - total_bytes = coll_sig->u.scatter.recvcount * comm_size; - } - break; - - case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER: - MPIR_Datatype_get_size_macro(coll_sig->u.reduce_scatter.datatype, total_bytes); - count = 0; - for (i = 0; i < comm_size; i++) - count += coll_sig->u.reduce_scatter.recvcounts[i]; - total_bytes *= count; - break; - - case MPIR_CSEL_COLL_TYPE__REDUCE_SCATTER_BLOCK: - MPIR_Datatype_get_size_macro(coll_sig->u.reduce_scatter_block.datatype, total_bytes); - total_bytes *= coll_sig->u.reduce_scatter_block.recvcount * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__IALLREDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.iallreduce.datatype, total_bytes); - total_bytes *= coll_sig->u.iallreduce.count * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__IBCAST: - MPIR_Datatype_get_size_macro(coll_sig->u.ibcast.datatype, total_bytes); - total_bytes *= coll_sig->u.ibcast.count * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__IREDUCE: - MPIR_Datatype_get_size_macro(coll_sig->u.ireduce.datatype, total_bytes); - total_bytes *= coll_sig->u.ireduce.count * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__IALLTOALL: - MPIR_Datatype_get_size_macro(coll_sig->u.ialltoall.sendtype, total_bytes); - total_bytes *= coll_sig->u.ialltoall.sendcount * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__IALLTOALLV: - MPIR_Datatype_get_size_macro(coll_sig->u.ialltoallv.sendtype, total_bytes); - count = 0; - for (i = 0; i < comm_size; i++) - count += coll_sig->u.ialltoallv.sendcounts[i]; - total_bytes *= count; - break; - - case MPIR_CSEL_COLL_TYPE__IALLTOALLW: - count = 0; - typesize = 0; - for (i = 0; i < comm_size; i++) { - MPIR_Datatype_get_size_macro(coll_sig->u.ialltoallw.sendtypes[i], typesize); - count = coll_sig->u.ialltoallw.sendcounts[i]; - total_bytes += (count * typesize); - } - break; - - case MPIR_CSEL_COLL_TYPE__IALLGATHER: - MPIR_Datatype_get_size_macro(coll_sig->u.iallgather.recvtype, total_bytes); - total_bytes *= coll_sig->u.iallgather.recvcount * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__IALLGATHERV: - MPIR_Datatype_get_size_macro(coll_sig->u.iallgatherv.recvtype, total_bytes); - count = 0; - for (i = 0; i < comm_size; i++) - count += coll_sig->u.iallgatherv.recvcounts[i]; - total_bytes *= count; - break; - - case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER: - MPIR_Datatype_get_size_macro(coll_sig->u.ireduce_scatter.datatype, total_bytes); - count = 0; - for (i = 0; i < comm_size; i++) - count += coll_sig->u.ireduce_scatter.recvcounts[i]; - total_bytes *= count; - break; - - case MPIR_CSEL_COLL_TYPE__IREDUCE_SCATTER_BLOCK: - MPIR_Datatype_get_size_macro(coll_sig->u.ireduce_scatter_block.datatype, total_bytes); - total_bytes = coll_sig->u.ireduce_scatter_block.recvcount * comm_size; - break; - - case MPIR_CSEL_COLL_TYPE__IGATHER: - if (coll_sig->u.igather.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig->u.igather.recvtype, total_bytes); - /* use remote size for intercomm root */ - if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = coll_sig->u.igather.recvcount * (coll_sig->comm_ptr->remote_size); - else - total_bytes = coll_sig->u.igather.recvcount * comm_size; + if (node->type == CSEL_NODE_TYPE__CONTAINER) { + printf("Algorithm: %s\n", MPIR_Coll_algo_names[node->u.container->id]); + } else if (node->type == CSEL_NODE_TYPE__OPERATOR__COLLECTIVE) { + printf("Collective: %s\n", MPIR_Coll_type_names[node->u.collective.coll_type]); + } else if (node->type == CSEL_NODE_TYPE__OPERATOR__ANY) { + printf("ANY\n"); + } else { + if (!node->u.condition.negate) { + if (!node->u.condition.thresh) { + printf("condition: %s\n", MPIR_Csel_condition_names[node->type]); } else { - MPIR_Datatype_get_size_macro(coll_sig->u.igather.sendtype, total_bytes); - total_bytes = coll_sig->u.igather.sendcount * comm_size; + printf("condition: %s(%d)\n", MPIR_Csel_condition_names[node->type], + node->u.condition.thresh); } - break; - - case MPIR_CSEL_COLL_TYPE__ISCATTER: - if (coll_sig->u.iscatter.root == MPI_ROOT) { - MPIR_Datatype_get_size_macro(coll_sig->u.iscatter.sendtype, total_bytes); - /* use remote size for intercomm root */ - if (coll_sig->comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - total_bytes = - coll_sig->u.iscatter.sendcount * (coll_sig->comm_ptr->remote_size); - else - total_bytes = coll_sig->u.iscatter.sendcount * comm_size; + } else { + if (!node->u.condition.thresh) { + printf("condition: !%s\n", MPIR_Csel_condition_names[node->type]); } else { - MPIR_Datatype_get_size_macro(coll_sig->u.iscatter.recvtype, total_bytes); - total_bytes = coll_sig->u.iscatter.recvcount * comm_size; + printf("condition: !%s(%d)\n", MPIR_Csel_condition_names[node->type], + node->u.condition.thresh); } - break; - - default: - MPIR_Assert(0); - break; - } - - return total_bytes; -} - -void *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s * coll_sig) -{ - MPIR_Comm *comm_ptr = coll_sig->comm_ptr; - - MPIR_Assert(csel_); - - csel_node_s *root = csel_; - - for (csel_node_s * node = root; node;) { - switch (node->type) { - case CSEL_NODE_TYPE__OPERATOR__IS_MULTI_THREADED: - if (MPIR_IS_THREADED == node->u.is_multi_threaded.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTRA: - if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_TYPE_INTER: - if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LE: - if (comm_ptr->local_size <= node->u.comm_size_le.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_LT: - if (comm_ptr->local_size <= node->u.comm_size_lt.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_NODE_COMM_SIZE: - if ((comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY) && comm_ptr->num_external == 1) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_SIZE_POW2: - if (!(comm_ptr->local_size & (comm_ptr->local_size - 1))) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COLLECTIVE: - if (node->u.collective.coll_type == coll_sig->coll_type) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LE: - if (get_avg_msgsize(coll_sig) <= node->u.avg_msg_size_le.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__AVG_MSG_SIZE_LT: - if (get_avg_msgsize(coll_sig) < node->u.avg_msg_size_lt.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LE: - if (get_total_msgsize(coll_sig) <= node->u.total_msg_size_le.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__TOTAL_MSG_SIZE_LT: - if (get_total_msgsize(coll_sig) < node->u.total_msg_size_lt.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COUNT_LE: - if (get_count(coll_sig) <= node->u.count_le.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COUNT_LT_POW2: - if (get_count(coll_sig) < MPL_pof2(coll_sig->comm_ptr->local_size)) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__IS_COMMUTATIVE: - if (is_commutative(coll_sig) == node->u.is_commutative.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__IS_SBUF_INPLACE: - if (is_sendbuf_inplace(coll_sig) == node->u.is_sbuf_inplace.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__IS_OP_BUILT_IN: - if (is_op_built_in(coll_sig) == node->u.is_op_built_in.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__IS_BLOCK_REGULAR: - if (is_block_regular(coll_sig) == node->u.is_block_regular.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__IS_NODE_CONSECUTIVE: - if (MPII_Comm_is_node_consecutive(coll_sig->comm_ptr) == - node->u.is_node_consecutive.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LE: - if ((comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY) && - comm_ptr->local_size <= node->u.comm_avg_ppn_le.val * comm_ptr->num_external) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_AVG_PPN_LT: - if ((comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY) && - comm_ptr->local_size < node->u.comm_avg_ppn_le.val * comm_ptr->num_external) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__COMM_HIERARCHY: - if (MPIR_Comm_is_parent_comm(comm_ptr) == node->u.comm_hierarchy.val) - node = node->success; - else - node = node->failure; - break; - - case CSEL_NODE_TYPE__OPERATOR__ANY: - node = node->success; - break; - - case CSEL_NODE_TYPE__CONTAINER: - goto fn_exit; - - default: - MPIR_Assert(0); } } - MPIR_Assert(node == NULL); - - fn_exit: - return node ? node->u.cnt.container : NULL; + if (node->success) { + MPIR_Csel_print_tree(node->success, level + 1); + } + if (node->failure) { + MPIR_Csel_print_tree(node->failure, level + 1); + } } diff --git a/src/mpi/coll/src/csel_container.c b/src/mpi/coll/src/csel_container.c deleted file mode 100644 index 2c54f0218d7..00000000000 --- a/src/mpi/coll/src/csel_container.c +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpiimpl.h" -#include "coll_impl.h" -#include "mpl.h" -#include "coll_csel.h" - -static void parse_container_params(struct json_object *obj, MPII_Csel_container_s * cnt) -{ - MPIR_Assert(obj != NULL); - char *ckey; - - switch (cnt->id) { - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_tree: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "chunk_size=", strlen("chunk_size="))) - cnt->u.ibcast.intra_tsp_tree.chunk_size = - atoi(ckey + strlen("chunk_size=")); - else if (!strncmp(ckey, "tree_type=", strlen("tree_type="))) - cnt->u.ibcast.intra_tsp_tree.tree_type = - get_tree_type_from_string(ckey + strlen("tree_type=")); - else if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.ibcast.intra_tsp_tree.k = atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_ring: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "chunk_size=", strlen("chunk_size="))) - cnt->u.ibcast.intra_tsp_ring.chunk_size = - atoi(ckey + strlen("chunk_size=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_tree: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "tree_type=", strlen("tree_type="))) - cnt->u.bcast.intra_tree.tree_type = - get_tree_type_from_string(ckey + strlen("tree_type=")); - else if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.bcast.intra_tree.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "is_non_blocking=", strlen("is_non_blocking="))) - cnt->u.bcast.intra_tree.is_non_blocking = - atoi(ckey + strlen("is_non_blocking=")); - else if (!strncmp(ckey, "topo_overhead=", strlen("topo_overhead="))) - cnt->u.bcast.intra_tree.topo_overhead = - atoi(ckey + strlen("topo_overhead=")); - else if (!strncmp(ckey, "topo_diff_groups=", strlen("topo_diff_groups="))) - cnt->u.bcast.intra_tree.topo_diff_groups = - atoi(ckey + strlen("topo_diff_groups=")); - else if (!strncmp(ckey, "topo_diff_switches=", strlen("topo_diff_switches="))) - cnt->u.bcast.intra_tree.topo_diff_switches = - atoi(ckey + strlen("topo_diff_switches=")); - else if (!strncmp(ckey, "topo_same_switches=", strlen("topo_same_switches="))) - cnt->u.bcast.intra_tree.topo_same_switches = - atoi(ckey + strlen("topo_same_switches=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Bcast_intra_pipelined_tree: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "tree_type=", strlen("tree_type="))) - cnt->u.bcast.intra_pipelined_tree.tree_type = - get_tree_type_from_string(ckey + strlen("tree_type=")); - else if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.bcast.intra_pipelined_tree.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "is_non_blocking=", strlen("is_non_blocking="))) - cnt->u.bcast.intra_pipelined_tree.is_non_blocking = - atoi(ckey + strlen("is_non_blocking=")); - else if (!strncmp(ckey, "chunk_size=", strlen("chunk_size="))) - cnt->u.bcast.intra_pipelined_tree.chunk_size = - atoi(ckey + strlen("chunk_size=")); - else if (!strncmp(ckey, "recv_pre_posted=", strlen("recv_pre_posted="))) - cnt->u.bcast.intra_pipelined_tree.recv_pre_posted = - atoi(ckey + strlen("recv_pre_posted=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_tree: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "buffer_per_child=", strlen("buffer_per_child="))) - cnt->u.ireduce.intra_tsp_tree.buffer_per_child = - atoi(ckey + strlen("buffer_per_child=")); - else if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.ireduce.intra_tsp_tree.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "tree_type=", strlen("tree_type="))) - cnt->u.ireduce.intra_tsp_tree.tree_type = - get_tree_type_from_string(ckey + strlen("tree_type=")); - else if (!strncmp(ckey, "chunk_size=", strlen("chunk_size="))) - cnt->u.ireduce.intra_tsp_tree.chunk_size = - atoi(ckey + strlen("chunk_size=")); - else if (!strncmp(ckey, "topo_overhead=", strlen("topo_overhead="))) - cnt->u.ireduce.intra_tsp_tree.topo_overhead = - atoi(ckey + strlen("topo_overhead=")); - else if (!strncmp(ckey, "topo_diff_groups=", strlen("topo_diff_groups="))) - cnt->u.ireduce.intra_tsp_tree.topo_diff_groups = - atoi(ckey + strlen("topo_diff_groups=")); - else if (!strncmp(ckey, "topo_diff_switches=", strlen("topo_diff_switches="))) - cnt->u.ireduce.intra_tsp_tree.topo_diff_switches = - atoi(ckey + strlen("topo_diff_switches=")); - else if (!strncmp(ckey, "topo_same_switches=", strlen("topo_same_switches="))) - cnt->u.ireduce.intra_tsp_tree.topo_same_switches = - atoi(ckey + strlen("topo_same_switches=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ireduce_intra_tsp_ring: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "buffer_per_child=", strlen("buffer_per_child="))) - cnt->u.ireduce.intra_tsp_ring.buffer_per_child = - atoi(ckey + strlen("buffer_per_child=")); - else if (!strncmp(ckey, "chunk_size=", strlen("chunk_size="))) - cnt->u.ireduce.intra_tsp_tree.chunk_size = - atoi(ckey + strlen("chunk_size=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_tree: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "buffer_per_child=", strlen("buffer_per_child="))) - cnt->u.iallreduce.intra_tsp_tree.buffer_per_child = - atoi(ckey + strlen("buffer_per_child=")); - else if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.iallreduce.intra_tsp_tree.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "tree_type=", strlen("tree_type="))) - cnt->u.iallreduce.intra_tsp_tree.tree_type = - get_tree_type_from_string(ckey + strlen("tree_type=")); - else if (!strncmp(ckey, "chunk_size=", strlen("chunk_size="))) - cnt->u.iallreduce.intra_tsp_tree.chunk_size = - atoi(ckey + strlen("chunk_size=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recursive_multiplying: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.allreduce.intra_recursive_multiplying.k = atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_tree: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "buffer_per_child=", strlen("buffer_per_child="))) - cnt->u.allreduce.intra_tree.buffer_per_child = - atoi(ckey + strlen("buffer_per_child=")); - else if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.allreduce.intra_tree.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "tree_type=", strlen("tree_type="))) - cnt->u.allreduce.intra_tree.tree_type = - get_tree_type_from_string(ckey + strlen("tree_type=")); - else if (!strncmp(ckey, "chunk_size=", strlen("chunk_size="))) - cnt->u.allreduce.intra_tree.chunk_size = atoi(ckey + strlen("chunk_size=")); - else if (!strncmp(ckey, "topo_overhead=", strlen("topo_overhead="))) - cnt->u.allreduce.intra_tree.topo_overhead = - atoi(ckey + strlen("topo_overhead=")); - else if (!strncmp(ckey, "topo_diff_groups=", strlen("topo_diff_groups="))) - cnt->u.allreduce.intra_tree.topo_diff_groups = - atoi(ckey + strlen("topo_diff_groups=")); - else if (!strncmp(ckey, "topo_diff_switches=", strlen("topo_diff_switches="))) - cnt->u.allreduce.intra_tree.topo_diff_switches = - atoi(ckey + strlen("topo_diff_switches=")); - else if (!strncmp(ckey, "topo_same_switches=", strlen("topo_same_switches="))) - cnt->u.allreduce.intra_tree.topo_same_switches = - atoi(ckey + strlen("topo_same_switches=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_recexch: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.allreduce.intra_recexch.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "single_phase_recv=", strlen("single_phase_recv="))) - cnt->u.allreduce.intra_recexch.single_phase_recv = - atoi(ckey + strlen("single_phase_recv=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_k_reduce_scatter_allgather: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.allreduce.intra_k_reduce_scatter_allgather.k = - atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "single_phase_recv=", strlen("single_phase_recv="))) - cnt->u.allreduce.intra_k_reduce_scatter_allgather.single_phase_recv = - atoi(ckey + strlen("single_phase_recv=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allreduce_intra_ccl: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "ccl=", strlen("ccl="))) - cnt->u.allreduce.intra_ccl.ccl = get_ccl_from_string(ckey + strlen("ccl=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Ibcast_intra_tsp_scatterv_allgatherv: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "scatterv_k=", strlen("scatterv_k="))) - cnt->u.ibcast.intra_tsp_scatterv_allgatherv.scatterv_k = - atoi(ckey + strlen("scatterv_k=")); - else if (!strncmp(ckey, "allgatherv_k=", strlen("allgatherv_k="))) - cnt->u.ibcast.intra_tsp_scatterv_allgatherv.allgatherv_k = - atoi(ckey + strlen("allgatherv_k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_single_buffer: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.iallreduce.intra_tsp_recexch_single_buffer.k = - atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.iallreduce.intra_tsp_recexch.k = atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Iallreduce_intra_tsp_recexch_reduce_scatter_recexch_allgatherv: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.iallreduce.intra_tsp_recexch_reduce_scatter_recexch_allgatherv.k = - atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_k_brucks: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.allgather.intra_k_brucks.k = atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_doubling: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.allgather.intra_recexch_doubling.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "single_phase_recv=", strlen("single_phase_recv="))) - cnt->u.allgather.intra_recexch_doubling.single_phase_recv = - atoi(ckey + strlen("single_phase_recv=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Allgather_intra_recexch_halving: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.allgather.intra_recexch_halving.k = atoi(ckey + strlen("k=")); - else if (!strncmp(ckey, "single_phase_recv=", strlen("single_phase_recv="))) - cnt->u.allgather.intra_recexch_halving.single_phase_recv = - atoi(ckey + strlen("single_phase_recv=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoall_intra_k_brucks: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.alltoall.intra_k_brucks.k = atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.barrier.intra_k_dissemination.k = atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - case MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch: - { - json_object_object_foreach(obj, key, val) { - ckey = MPL_strdup_no_spaces(key); - if (!strncmp(ckey, "k=", strlen("k="))) - cnt->u.barrier.intra_recexch.k = atoi(ckey + strlen("k=")); - MPL_free(ckey); - } - } - break; - - default: - /* Algorithm does not have parameters */ - break; - } -} - -void *MPII_Create_container(struct json_object *obj) -{ - MPII_Csel_container_s *cnt = MPL_malloc(sizeof(MPII_Csel_container_s), MPL_MEM_COLL); - - json_object_object_foreach(obj, key, val) { - char *ckey = MPL_strdup_no_spaces(key); - - MPIR_COLL_SET_CONTAINER_ID(); - - MPL_free(ckey); - } - - /* process algorithm parameters */ - parse_container_params(json_object_object_get(obj, key), cnt); - - return (void *) cnt; -} From 12f3f8af8525547ec754b50cb4b678027294f002 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 11:49:14 -0500 Subject: [PATCH 39/47] ---- START HERE ---- --- dummy | 1 + 1 file changed, 1 insertion(+) create mode 100644 dummy diff --git a/dummy b/dummy new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dummy @@ -0,0 +1 @@ +1 From 3aa7619dee097e045181b3f333b2a7f036222710 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 13:32:36 -0500 Subject: [PATCH 40/47] TEMP: fixup coll/csel: update csel --- src/include/mpir_coll.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index 44ee599961b..ab8763aef7b 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -32,12 +32,6 @@ typedef struct MPII_Csel_container MPII_Csel_container_s; #define MPIR_COLL_ATTR_HAS_ERR(coll_attr) ((coll_attr) & MPIR_COLL_ATTR_ERR_MASK) -/* bit 8-31, MPIR-layer */ -#define MPIR_COLL_ATTR__inplace 0x00000100 -#define MPIR_COLL_ATTR__pof2 0x00000200 -#define MPIR_COLL_ATTR__commutative 0x00000400 -#define MPIR_COLL_ATTR__builtin_op 0x00000800 - struct MPIR_Csel_coll_sig { MPIR_Csel_coll_type_e coll_type; MPIR_Comm *comm_ptr; From 78a8b7d395886ff33110dc04065b4564b3868712 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 19:37:34 -0500 Subject: [PATCH 41/47] coll/gen_coll: add "inline" and "func_name" directives In coll_algorithms.txt, add "inline" attribute to skip add prototype for the corresponding algorithm function since it is inlined in the headers. Add "func_name" to directly specify algorithm function name. Add "macro_guard" to specify a preproc condition for the algorithm function. For example, the ch4 posix algorithm function needs be protected by "#if defined(MPIDI_CH4_SHM_POSIX)" (to be defined). --- maint/gen_coll.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index 846d82eed6f..c414ebbc0c3 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -126,6 +126,8 @@ def dump_algo_prep(algo): add_prototype(decl) dump_split(0, decl) dump_open('{') + if 'macro_guard' in algo: + G.out.append("#if %s" % algo['macro_guard']) G.out.append("int mpi_errno = MPI_SUCCESS;") G.out.append("") dump_algo_prep(algo) @@ -136,6 +138,10 @@ def dump_algo_prep(algo): G.out.append("return mpi_errno;") G.out.append("fn_fail:") G.out.append("goto fn_exit;") + if 'macro_guard' in algo: + G.out.append("#else") + G.out.append("return MPI_ERR_OTHER;") + G.out.append("#endif") dump_close('}') G.out.append("") @@ -159,6 +165,9 @@ def get_algo_params(algo): return params for algo in G.algo_list: + if 'inline' in algo: + # inline functions are already defined in headers, do not need prototypes + continue if algo['func-commkind'] == 'general': decl = "int %s(%s)" % (algo['name'], "MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt") add_prototype(decl) @@ -798,6 +807,8 @@ def get_algo_coll_name(algo): return func_name def get_algo_funcname(algo): + if "func_name" in algo: + return algo['func_name'] if algo["func-commkind"] == "general": return algo['name'] @@ -983,7 +994,7 @@ def dump_c_file(f, lines): print(" %s:" % RE.m.group(1), file=Out) else: # print the line with correct indentations - if indent > 0 and not RE.match(r'#(if|endif)', l): + if indent > 0 and not RE.match(r'#(if|else|endif)', l): print(" " * indent, end='', file=Out) print(l, file=Out) From 58e03082c3ca4e17072cc413ae804461a95181e4 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 21:11:03 -0500 Subject: [PATCH 42/47] coll/gen_coll: add conditional condition Add conditional condition - the condition function only can be called inside preprocess macro guard. We need generate another header file, coll_autogen.h, that are loaded after mpidpos.h. "coll_algos.h" goes into mpir_coll.h, which is included in between mpidpre.h and mpidpost.h. Refactor a bit so all the conditions parsing logics are wrapped in functions such as get_conditon_name, get_condition_func, etc. and they are defined together. --- maint/gen_coll.py | 156 +++++++++++++++++++------------ src/mpi/coll/coll_algorithms.txt | 2 + src/mpi/coll/include/coll_csel.h | 2 + 3 files changed, 98 insertions(+), 62 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index c414ebbc0c3..eb8d312b3f2 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -24,7 +24,8 @@ def main(): G.algo_list = collect_algo_list() G.out = [] # output to C file - G.out2 = [] # output to header + G.out2 = [] # output to a header that is included in mpir_coll.h, between mpidpre.h and mpidpost.h + G.out3 = [] # output to a header that is included in coll_csel.h, after mpidpost.h G.prototypes = [] G.out.append("#include \"mpiimpl.h\"") G.out.append("#include \"coll_csel.h\"") @@ -72,10 +73,12 @@ def main(): dump_MPIR_Csel_node_type_e() # algorithm container struct dump_MPII_Csel_container() - G.out2.append("") + # conditional conditions (conditions with #if-guard) + dump_conditional_conditions(G.out3) dump_c_file("src/mpi/coll/mpir_coll.c", G.out) dump_coll_algos_h("src/mpi/coll/include/coll_algos.h", G.prototypes, G.out2) + dump_coll_autogen_h("src/mpi/coll/include/coll_autogen.h", G.out3) def collect_algo_list(): algo_list = [] @@ -216,11 +219,7 @@ def dump_MPII_Csel_init_condition_names(): G.out.append(decl) dump_open('{') for a in G.conditions: - if RE.match(r'(.+)\(thresh\)', a): - cond = RE.m.group(1) - else: - cond = a - G.out.append("MPIR_Csel_condition_names[%s] = \"%s\";" % (condition_id(cond), cond)) + G.out.append("MPIR_Csel_condition_names[%s] = \"%s\";" % (condition_id(a), get_condition_name(a))) dump_close('}') def dump_MPII_Coll_algo_init(): @@ -317,20 +316,17 @@ def dump_MPII_Csel_parse_operator(): if_clase = "if" for a in G.conditions: - cond = a - has_thresh = False - if RE.match(r'(.+)\(thresh\)', a): - cond = RE.m.group(1) - has_thresh = True - n = len(cond) + name = get_condition_name(a) + has_thresh = condition_has_thresh(a) + n = len(name) if has_thresh: - G.out.append("%s (strncmp(ckey, \"%s\", %d) == 0) {" % (if_clase, cond, n)) - G.out.append(" csel_node->type = %s;" % condition_id(cond)) + G.out.append("%s (strncmp(ckey, \"%s\", %d) == 0) {" % (if_clase, name, n)) + G.out.append(" csel_node->type = %s;" % condition_id(a)) G.out.append(" MPIR_Assert(ckey[%d] == '(');" % n) G.out.append(" csel_node->u.condition.thresh = atoi(ckey + %d);" % (n + 1)) else: - G.out.append("%s (strcmp(ckey, \"%s\") == 0) {" % (if_clase, cond)) - G.out.append(" csel_node->type = %s;" % condition_id(cond)) + G.out.append("%s (strcmp(ckey, \"%s\") == 0) {" % (if_clase, name)) + G.out.append(" csel_node->type = %s;" % condition_id(a)) if_clase = "} else if" G.out.append("} else {") G.out.append(" return MPI_ERR_OTHER;") @@ -349,16 +345,13 @@ def dump_MPII_Csel_run_condition(): G.out.append("bool cond;") dump_open("switch(node->type) {") for a in G.conditions: - cond = a - has_thresh = False - if RE.match(r'(.+)\(thresh\)', a): - cond = RE.m.group(1) - has_thresh = True - dump_open("case %s:" % condition_id(cond)) + has_thresh = condition_has_thresh(a) + cond_func = get_condition_function(a) + dump_open("case %s:" % condition_id(a)) if has_thresh: - G.out.append("cond = (node->u.condition.thresh <= %s(coll_sig));" % G.conditions[a]) + G.out.append("cond = (node->u.condition.thresh <= %s(coll_sig));" % cond_func) else: - G.out.append("cond = %s(coll_sig);" % G.conditions[a]) + G.out.append("cond = %s(coll_sig);" % cond_func) G.out.append("if (node->u.condition.negate) cond = !cond;") G.out.append("return cond ? node->success : node->failure;") dump_close("") @@ -383,20 +376,17 @@ def dump_MPII_Csel_parse_operator(): if_clase = "if" for a in G.conditions: - cond = a - has_thresh = False - if RE.match(r'(.+)\(thresh\)', a): - cond = RE.m.group(1) - has_thresh = True - n = len(cond) + name = get_condition_name(a) + has_thresh = condition_has_thresh(a) + n = len(name) if has_thresh: - G.out.append("%s (strncmp(ckey, \"%s\", %d) == 0) {" % (if_clase, cond, n)) - G.out.append(" csel_node->type = %s;" % condition_id(cond)) + G.out.append("%s (strncmp(ckey, \"%s\", %d) == 0) {" % (if_clase, name, n)) + G.out.append(" csel_node->type = %s;" % condition_id(a)) G.out.append(" MPIR_Assert(ckey[%d] == '(');" % n) G.out.append(" csel_node->u.condition.thresh = atoi(ckey + %d);" % (n + 1)) else: - G.out.append("%s (strcmp(ckey, \"%s\") == 0) {" % (if_clase, cond)) - G.out.append(" csel_node->type = %s;" % condition_id(cond)) + G.out.append("%s (strcmp(ckey, \"%s\") == 0) {" % (if_clase, name)) + G.out.append(" csel_node->type = %s;" % condition_id(a)) if_clase = "} else if" G.out.append("} else {") G.out.append(" MPIR_Assert(0);") @@ -416,16 +406,13 @@ def dump_MPII_Csel_run_condition(): G.out.append("bool cond;") dump_open("switch(node->type) {") for a in G.conditions: - cond = a - has_thresh = False - if RE.match(r'(.+)\(thresh\)', a): - cond = RE.m.group(1) - has_thresh = True - dump_open("case %s:" % condition_id(cond)) + has_thresh = condition_has_thresh(a) + cond_func = get_condition_function(a) + dump_open("case %s:" % condition_id(a)) if has_thresh: - G.out.append("cond = (node->u.condition.thresh <= %s(coll_sig));" % G.conditions[a]) + G.out.append("cond = (node->u.condition.thresh <= %s(coll_sig));" % cond_func) else: - G.out.append("cond = %s(coll_sig);" % G.conditions[a]) + G.out.append("cond = %s(coll_sig);" % cond_func) G.out.append("if (node->u.condition.negate) cond = !cond;") G.out.append("return cond ? node->success : node->failure;") dump_close("") @@ -519,16 +506,16 @@ def dump_check_restriction(restriction): if RE.match(r'.*\(.*\)', r): raise Exception("Threshold condition %s cannot be used as a restriction" % r) - cond = None if r in G.conditions: - cond = "%s(coll_sig)" % G.conditions[r] + # We assume we can directly call conditional condition since we are inside the algorithm macro_guard + cond = "%s(coll_sig)" % get_condition_function(r) + if negate: + G.out.append(" if (%s) return false;" % cond) + else: + G.out.append(" if (!%s) return false;" % cond) else: raise Exception("Restriction %s not listed" % restriction) - if negate: - G.out.append(" if (%s) return false;" % cond) - else: - G.out.append(" if (!%s) return false;" % cond) decl = "bool MPIR_Coll_check_algo_restriction(MPIR_Csel_coll_sig_s * coll_sig, int algo_id)" add_prototype(decl) @@ -658,6 +645,13 @@ def dump_algo_params(): G.out2.append(" } u;") G.out2.append("} MPII_Csel_container_s;") +def dump_conditional_conditions(out): + out.append("") + out.append("/* conditional CSEL conditions */") + for a in G.conditions: + if condition_need_wrapper(a): + dump_condition_wrapper(a, out) + #---------------------------------------- def add_prototype(l): G.prototypes.append(l) @@ -674,7 +668,7 @@ def load_coll_algos(algo_txt): algo_list = [] G.algos[func_commkind] = algo_list elif func_commkind == "conditions": - if RE.match(r'\s+([\w()-]+):\s*(\w+)', line): + if RE.match(r'\s+([\w()-]+):\s*(\w+.*)', line): G.conditions[RE.m.group(1)] = RE.m.group(2) elif func_commkind: if RE.match(r'\s+(\w+)\s*$', line): @@ -949,12 +943,6 @@ def algo_id(algo_funcname): def algo_id_END(): return "MPII_CSEL_CONTAINER_TYPE__ALGORITHM__END" -def condition_id(name): - prefix = "CSEL_NODE_TYPE__OPERATOR__" - a = re.sub(r'-', '_', name) - a = re.sub(r'\(thresh\)$', '', a) - return prefix + a - def algo_struct_name(algo): algo_funcname = get_algo_funcname(algo) struct_name = re.sub(r'MPIR_', '', algo_funcname).lower() @@ -963,17 +951,48 @@ def algo_struct_name(algo): def algo_id_END(): return "MPII_CSEL_CONTAINER_TYPE__ALGORITHM__END" -def condition_id(name): - prefix = "CSEL_NODE_TYPE__OPERATOR__" - a = re.sub(r'-', '_', name) - a = re.sub(r'\(thresh\)$', '', a) - return prefix + a - def algo_struct_name(algo): algo_funcname = get_algo_funcname(algo) struct_name = re.sub(r'MPIR_', '', algo_funcname).lower() return struct_name +def get_condition_name(a): + name = re.sub(r'\(thresh\)$', '', a) + return name + +def get_condition_function(a): + if RE.match(r'(\w+)\s*$', G.conditions[a]): + return RE.m.group(1) + else: + return "MPIR_CSEL_check_" + get_condition_name(a) + +def condition_has_thresh(a): + return a.endswith('(thresh)') + +def condition_need_wrapper(a): + return re.match(r'(\w+) #if (.+)', G.conditions[a]) + +def dump_condition_wrapper(a, out): + if RE.match(r'(\w+) #if (.+)', G.conditions[a]): + (actual_func, macro_guard) = RE.m.group(1, 2) + cond_func = get_condition_function(a) + out.append("") + out.append("MPL_STATIC_INLINE_PREFIX bool %s(MPIR_Csel_coll_sig_s * coll_sig)" % cond_func) + out.append("{") + out.append("#if " + macro_guard) + out.append(" return %s(coll_sig);" % actual_func) + out.append("#else") + # always false, i.e. never select this algorithm/subtree + out.append(" return false;") + out.append("#endif") + out.append("}") + else: + raise Exception("Does not need condition wrapper") + +def condition_id(a): + prefix = "CSEL_NODE_TYPE__OPERATOR__" + return prefix + get_condition_name(a) + # ---------------------- def dump_c_file(f, lines): print(" --> [%s]" % f) @@ -1011,12 +1030,25 @@ def dump_coll_algos_h(f, prototypes, lines): for l in lines: print(l, file=Out) + print("\n/* function prototypes */\n", file=Out) for l in prototypes: lines = split_line_with_break(l + ';', '', 80) for l2 in lines: print(l2, file=Out) print("#endif /* COLL_ALGOS_H_INCLUDED */", file=Out) +def dump_coll_autogen_h(f, lines): + print(" --> [%s]" % f) + with open(f, "w") as Out: + for l in G.copyright_c: + print(l, file=Out) + print("#ifndef COLL_AUTOGEN_H_INCLUDED", file=Out) + print("#define COLL_AUTOGEN_H_INCLUDED", file=Out) + print("", file=Out) + for l in lines: + print(l, file=Out) + print("#endif /* COLL_AUTOGEN_H_INCLUDED */", file=Out) + def dump_open(line): G.out.append(line) G.out.append("INDENT") diff --git a/src/mpi/coll/coll_algorithms.txt b/src/mpi/coll/coll_algorithms.txt index 75acf0989f5..770a22d5089 100644 --- a/src/mpi/coll/coll_algorithms.txt +++ b/src/mpi/coll/coll_algorithms.txt @@ -26,6 +26,8 @@ # - condition_name(thresh): func # This condition calls a query function that returns a value: int (*func)(coll_sig) # The thresh marks the upper limit (inclusive) of the condition. +# - func can optionally have a macro-guard, e.g. func #if defined(MPIDI_CH4_SHM_POSIX). +# A wrapper checker function will be generated that wraps the "func" within macro-guard. # The conditions are used in specifying both algorithm restrictions and CSEL conditions. # Example usage in restriction list or JSON file: inplace, !inplace, avg_msg_size(1024), etc. # Most of the checker functions should be inlined to minimize function call overhead. diff --git a/src/mpi/coll/include/coll_csel.h b/src/mpi/coll/include/coll_csel.h index 53767656143..e773bad56b5 100644 --- a/src/mpi/coll/include/coll_csel.h +++ b/src/mpi/coll/include/coll_csel.h @@ -306,4 +306,6 @@ MPL_STATIC_INLINE_PREFIX bool MPIR_Csel_is_node_canonical(MPIR_Csel_coll_sig_s * return MPII_Comm_is_node_canonical(coll_sig->comm_ptr); } +#include "coll_autogen.h" + #endif /* COLL_CSEL_H_INCLUDED */ From 0307432ccfa313b6904cfa14ad5c0aea9fdb368b Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 13:35:28 -0500 Subject: [PATCH 43/47] coll: add is_cvar to struct MPIR_Csel_coll_sig Sometime we may want to do differently between restriction-check and condition check. For example, algorithm like release_gather normally gets selelcted only after user calls the collective certain number of times. But if user selects the algorithm by CVAR, it won't make sense to do this repeat check in the restriction-check. --- src/include/mpir_coll.h | 2 ++ src/mpi/coll/src/coll_impl.c | 1 + 2 files changed, 3 insertions(+) diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index ab8763aef7b..7b327e8cf65 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -37,7 +37,9 @@ struct MPIR_Csel_coll_sig { MPIR_Comm *comm_ptr; void *sched; enum MPIR_sched_type sched_type; + /* TODO: consider compress bool flags into "int attr" */ bool is_persistent; + bool is_cvar; /* if the algorithm is selected by a CVAR */ struct { bool is_gpu; diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 7efee5259af..1431aa2c7d0 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -444,6 +444,7 @@ int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me) int coll_type = coll_sig->coll_type; int cvar_val = MPIR_Coll_cvar_table[coll_type]; if (cvar_val) { + coll_sig->is_cvar = true; int algo_id = MPIR_Coll_cvar_to_algo_id(coll_type, cvar_val); bool restriction_ok = MPIR_Coll_check_algo_restriction(coll_sig, algo_id); From f949b6b8f0489c37481996e4747e9653325481a7 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 15:28:54 -0500 Subject: [PATCH 44/47] coll: add coll_sig->flags for boolean flags Rather than add individual boolean flags, use bit mask "flags" instead. It is easier to make sure we zero-initialize all the flags that way. --- maint/gen_coll.py | 5 ++--- src/include/mpir_coll.h | 8 +++++--- src/mpi/coll/include/coll_impl.h | 6 ++++-- src/mpi/coll/src/coll_impl.c | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index eb8d312b3f2..ae923f46484 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -703,10 +703,9 @@ def dump_coll_impl(name, blocking_type): G.out.append("coll_sig.coll_type = MPIR_CSEL_COLL_TYPE__INTER_%s;" % NAME) dump_close('}') G.out.append("coll_sig.comm_ptr = comm_ptr;") + G.out.append("coll_sig.flags = 0;") if blocking_type == "persistent": - G.out.append("coll_sig.is_persistent = true;") - else: - G.out.append("coll_sig.is_persistent = false;") + G.out.append("coll_sig.flags |= MPIR_COLL_SIG_FLAG__PERSISTENT;") G.out.append("coll_sig.sched = NULL;") G.out.append("memset(&coll_sig.cache, 0, sizeof(coll_sig.cache));"); diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h index 7b327e8cf65..d58835e9d17 100644 --- a/src/include/mpir_coll.h +++ b/src/include/mpir_coll.h @@ -32,14 +32,16 @@ typedef struct MPII_Csel_container MPII_Csel_container_s; #define MPIR_COLL_ATTR_HAS_ERR(coll_attr) ((coll_attr) & MPIR_COLL_ATTR_ERR_MASK) +/* Define bits for coll_sig->flags */ +#define MPIR_COLL_SIG_FLAG__PERSISTENT 0x1 +#define MPIR_COLL_SIG_FLAG__CVAR 0x2 + struct MPIR_Csel_coll_sig { MPIR_Csel_coll_type_e coll_type; MPIR_Comm *comm_ptr; void *sched; enum MPIR_sched_type sched_type; - /* TODO: consider compress bool flags into "int attr" */ - bool is_persistent; - bool is_cvar; /* if the algorithm is selected by a CVAR */ + uint32_t flags; struct { bool is_gpu; diff --git a/src/mpi/coll/include/coll_impl.h b/src/mpi/coll/include/coll_impl.h index b38e9f509ca..6110b420a7e 100644 --- a/src/mpi/coll/include/coll_impl.h +++ b/src/mpi/coll/include/coll_impl.h @@ -90,7 +90,8 @@ int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt) do { \ if (coll_sig->sched == NULL) { \ coll_sig->sched_type = MPIR_SCHED_GENTRAN; \ - MPIR_TSP_sched_create(&coll_sig->sched, coll_sig->is_persistent); \ + bool is_persistent = (coll_sig)->flags & MPIR_COLL_SIG_FLAG__PERSISTENT; \ + MPIR_TSP_sched_create(&coll_sig->sched, is_persistent); \ } else { \ MPIR_Assert(coll_sig->sched_type = MPIR_SCHED_GENTRAN); \ } \ @@ -101,7 +102,8 @@ int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt) if (coll_sig->sched == NULL) { \ MPIR_Sched_t s = MPIR_SCHED_NULL; \ enum MPIR_Sched_kind sched_kind = MPIR_SCHED_KIND_REGULAR; \ - if (coll_sig->is_persistent) { \ + bool is_persistent = (coll_sig)->flags & MPIR_COLL_SIG_FLAG__PERSISTENT; \ + if (is_persistent) { \ sched_kind = MPIR_SCHED_KIND_PERSISTENT; \ } \ mpi_errno = MPIR_Sched_create(&s, sched_kind); \ diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 1431aa2c7d0..d950958e997 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -444,7 +444,7 @@ int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me) int coll_type = coll_sig->coll_type; int cvar_val = MPIR_Coll_cvar_table[coll_type]; if (cvar_val) { - coll_sig->is_cvar = true; + coll_sig->flags |= MPIR_COLL_SIG_FLAG__CVAR; int algo_id = MPIR_Coll_cvar_to_algo_id(coll_type, cvar_val); bool restriction_ok = MPIR_Coll_check_algo_restriction(coll_sig, algo_id); From 20eb0bd71135e612c3d3984805f28914879b940d Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 15:03:21 -0500 Subject: [PATCH 45/47] coll: add MPIR_CVAR_DUMP_COLL_ALGO_COUNTERS Provide a simple mechanism for a rank to dump collective algorithm counters. Set MPIR_CVAR_DUMP_COLL_ALGO_COUNTERS to the global rank of the process that we want it to dump since it is undesirable for every process to dump yet it does not always makes sense for rank 0 to dump especially when we don't always use comm world. It is counted in the CSEL framework so internal collectives are not counted when we internally use _fallback algorithms. --- maint/gen_coll.py | 2 ++ src/mpi/coll/include/coll_csel.h | 1 + src/mpi/coll/src/coll_impl.c | 33 ++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/maint/gen_coll.py b/maint/gen_coll.py index ae923f46484..4140cb792c5 100644 --- a/maint/gen_coll.py +++ b/maint/gen_coll.py @@ -120,6 +120,7 @@ def dump_algo_prep(algo): else: G.out.append("MPII_CSEL_CREATE_SCHED(coll_sig);") + G.out.append("") for algo in G.algo_list: if algo["func-commkind"] != 'general': coll_name = get_algo_coll_name(algo) @@ -134,6 +135,7 @@ def dump_algo_prep(algo): G.out.append("int mpi_errno = MPI_SUCCESS;") G.out.append("") dump_algo_prep(algo) + G.out.append("MPIR_Coll_algo_counters[%s]++;" % algo_id(algo_funcname)) dump_split(1, "mpi_errno = %s(%s);" % (algo_funcname, algo_args)) G.out.append("MPIR_ERR_CHECK(mpi_errno);") G.out.append("") diff --git a/src/mpi/coll/include/coll_csel.h b/src/mpi/coll/include/coll_csel.h index e773bad56b5..f3b8121d103 100644 --- a/src/mpi/coll/include/coll_csel.h +++ b/src/mpi/coll/include/coll_csel.h @@ -29,6 +29,7 @@ extern int *MPIR_Coll_cvar_table; extern const char **MPIR_Coll_type_names; extern const char **MPIR_Coll_algo_names; extern const char **MPIR_Csel_condition_names; +extern int *MPIR_Coll_algo_counters; int MPIR_Csel_create_from_file(const char *json_file, void **csel); int MPIR_Csel_create_from_buf(const char *json, void **csel); diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index d950958e997..0bc416dcf6f 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -103,6 +103,17 @@ categories : If set to true, rank 0 will dump the network coordinates to a file named "coords" in the current folder. If set to false, the network coordinates will not be dumped. + - name : MPIR_CVAR_DUMP_COLL_ALGO_COUNTERS + category : COLLECTIVE + type : int + default : -1 + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_ALL_EQ + description : >- + Set MPIR_CVAR_DUMP_COLL_ALGO_COUNTERS to a global rank number (including 0) for that rank to dump collective + algorithm counters. + === END_MPI_T_CVAR_INFO_BLOCK === */ @@ -131,6 +142,9 @@ const char **MPIR_Coll_type_names; const char **MPIR_Coll_algo_names; const char **MPIR_Csel_condition_names; +/* algorithm counters */ +int *MPIR_Coll_algo_counters; + MPIR_Tree_type_t get_tree_type_from_string(const char *tree_str) { MPIR_Tree_type_t tree_type = MPIR_TREE_TYPE_KARY; @@ -230,6 +244,8 @@ int MPII_Coll_init(void) MPIR_Coll_algo_names = MPL_malloc(MPIR_CSEL_NUM_ALGORITHMS * sizeof(char *), MPL_MEM_COLL); MPIR_Csel_condition_names = MPL_malloc(MPIR_CSEL_NUM_CONDITIONS * sizeof(char *), MPL_MEM_COLL); + MPIR_Coll_algo_counters = MPL_calloc(MPIR_CSEL_NUM_ALGORITHMS, sizeof(int), MPL_MEM_COLL); + MPII_Coll_type_init(); MPII_Coll_algo_init(); MPII_Csel_init_condition_names(); @@ -250,10 +266,26 @@ void MPIR_Init_coll_sig(MPIR_Csel_coll_sig_s * coll_sig) { } +static void dump_coll_algo_counters(void) +{ + printf("==== Dump collective algorithm counters ====\n"); + for (int i = 0; i < MPIR_CSEL_NUM_ALGORITHMS; i++) { + if (MPIR_Coll_algo_counters[i] > 0) { + printf("%10d %s\n", MPIR_Coll_algo_counters[i], MPIR_Coll_algo_names[i]); + } + } + printf("==== END collective algorithm counters ====\n"); +} + int MPII_Coll_finalize(void) { int mpi_errno = MPI_SUCCESS; + if (MPIR_CVAR_DUMP_COLL_ALGO_COUNTERS >= 0 && + MPIR_Process.rank == MPIR_CVAR_DUMP_COLL_ALGO_COUNTERS) { + dump_coll_algo_counters(); + } + /* deregister non blocking collectives progress hook */ MPIR_Progress_hook_deregister(MPIR_Nbc_progress_hook_id); @@ -271,6 +303,7 @@ int MPII_Coll_finalize(void) MPL_free(MPIR_Coll_algo_names); MPL_free(MPIR_Coll_type_names); MPL_free(MPIR_Csel_condition_names); + MPL_free(MPIR_Coll_algo_counters); fn_exit: return mpi_errno; From 6c28e119528f9c460dca8141eebb3942ce1280dc Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Thu, 25 Sep 2025 12:14:44 -0500 Subject: [PATCH 46/47] coll/ch4: activate CH4 POSIX release_gather bcast Enable CVARs and JSONs to select ch4-posix layer release_gather algorithms. Select MPIDI_POSIX_mpi_bcast_release_gather if it passes MPIDI_CH4_release_gather condition check, which only passes if comm is an posix intranode comm. --- src/mpi/coll/coll_algorithms.txt | 8 +++ src/mpi/coll/coll_selection.json | 4 ++ src/mpi/coll/cvars.txt | 1 + .../ch4/shm/posix/posix_coll_release_gather.h | 59 +++++++++++++------ src/mpid/ch4/shm/posix/posix_pre.h | 4 ++ .../shm/posix/release_gather/release_gather.c | 1 + 6 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/mpi/coll/coll_algorithms.txt b/src/mpi/coll/coll_algorithms.txt index 770a22d5089..cd4564d3533 100644 --- a/src/mpi/coll/coll_algorithms.txt +++ b/src/mpi/coll/coll_algorithms.txt @@ -70,6 +70,9 @@ conditions: avg_msg_size(thresh): MPIR_Csel_avg_msg_size total_msg_size(thresh): MPIR_Csel_total_msg_size + # conditional conditions - only call the condition function under macro_guard + MPIDI_CH4_release_gather: MPIDI_POSIX_check_release_gather #if defined(MPIDI_CH4_SHM_POSIX) + # ---- general: MPIR_Coll_auto @@ -110,6 +113,11 @@ bcast-intra: pipelined_tree extra_params: tree_type, k, is_non_blocking, chunk_size, recv_pre_posted cvar_params: TREE_TYPE, TREE_KVAL, IS_NON_BLOCKING, TREE_PIPELINE_CHUNK_SIZE, RECV_PRE_POST + release_gather + func_name: MPIDI_POSIX_mpi_bcast_release_gather + inline: 1 + macro_guard: defined(MPIDI_CH4_SHM_POSIX) + restrictions: MPIDI_CH4_release_gather ibcast-intra: sched_binomial sched_smp diff --git a/src/mpi/coll/coll_selection.json b/src/mpi/coll/coll_selection.json index f2084e2aa51..c2921fdc70a 100644 --- a/src/mpi/coll/coll_selection.json +++ b/src/mpi/coll/coll_selection.json @@ -1,6 +1,10 @@ { "collective=bcast-intra": { + "MPIDI_CH4_release_gather": + { + "algorithm=MPIDI_POSIX_mpi_bcast_release_gather":{} + }, "comm_size(8)": { "algorithm=MPIR_Bcast_intra_binomial":{} diff --git a/src/mpi/coll/cvars.txt b/src/mpi/coll/cvars.txt index c54a6a36d04..88ea69c2bfd 100644 --- a/src/mpi/coll/cvars.txt +++ b/src/mpi/coll/cvars.txt @@ -202,6 +202,7 @@ cvars: scatter_ring_allgather - Force Scatter Ring pipelined_tree - Force tree-based pipelined algorithm tree - Force tree-based algorithm + release_gather - Force CH4 POSIX release_gather algorithm - name : MPIR_CVAR_BCAST_TREE_KVAL category : COLLECTIVE diff --git a/src/mpid/ch4/shm/posix/posix_coll_release_gather.h b/src/mpid/ch4/shm/posix/posix_coll_release_gather.h index 40a0612b256..44247315a35 100644 --- a/src/mpid/ch4/shm/posix/posix_coll_release_gather.h +++ b/src/mpid/ch4/shm/posix/posix_coll_release_gather.h @@ -29,6 +29,47 @@ #include "algo_common.h" #include "release_gather.h" +MPL_STATIC_INLINE_PREFIX bool MPIDI_POSIX_check_release_gather(MPIR_Csel_coll_sig_s * coll_sig) +{ + if (MPIR_IS_THREADED) { + return false; + } + + /* Check whether comm is an intranode comm */ + MPIR_Comm *comm_ptr = coll_sig->comm_ptr; + MPIR_Assert(comm_ptr->attr & MPIR_COMM_ATTR__HIERARCHY); + if (comm_ptr->num_external > 1) { + return false; + } + + /* check coll_type */ + MPIDI_POSIX_release_gather_opcode_t opcode; + switch (coll_sig->coll_type) { + case MPIR_CSEL_COLL_TYPE__INTRA_BCAST: + opcode = MPIDI_POSIX_RELEASE_GATHER_OPCODE_BCAST; + break; + default: + return false; + } + + /* Check repeats if the algorithm CVAR is not set */ + if (!(coll_sig->flags & MPIR_COLL_SIG_FLAG__CVAR)) { + MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++; + if (MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls < + MPIR_CVAR_POSIX_NUM_COLLS_THRESHOLD) { + return false; + } + } + + /* Lazy initialization of release_gather specific struct */ + int mpi_errno = MPIDI_POSIX_mpi_release_gather_comm_init(comm_ptr, opcode); + if (mpi_errno != MPI_SUCCESS) { + return false; + } + + return true; +} + /* Intra-node bcast is implemented as a release step followed by gather step in release_gather * framework. The actual data movement happens in release step. Gather step makes sure that * the shared bcast buffer can be reused for next bcast call. Release gather framework has @@ -59,20 +100,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast_release_gather(void *buffer, goto fn_exit; } - MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++; - if (MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls < - MPIR_CVAR_POSIX_NUM_COLLS_THRESHOLD) { - /* Fallback to pt2pt algorithms if the total number of release_gather collective calls is - * less than the specified threshold */ - goto fallback; - } - - /* Lazy initialization of release_gather specific struct */ - mpi_errno = - MPIDI_POSIX_mpi_release_gather_comm_init(comm_ptr, MPIDI_POSIX_RELEASE_GATHER_OPCODE_BCAST); - MPII_COLLECTIVE_FALLBACK_CHECK(MPIR_Comm_rank(comm_ptr), !mpi_errno, mpi_errno, - "release_gather bcast cannot create more shared memory. Falling back to pt2pt algorithms.\n"); - my_rank = MPIR_Comm_rank(comm_ptr); MPIR_Type_get_extent_impl(datatype, &lb, &extent); MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); @@ -150,10 +177,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast_release_gather(void *buffer, return mpi_errno; fn_fail: goto fn_exit; - fallback: - /* FIXME: proper error */ - mpi_errno = MPI_ERR_OTHER; - goto fn_exit; } /* Intra-node reduce is implemented as a release step followed by gather step in release_gather diff --git a/src/mpid/ch4/shm/posix/posix_pre.h b/src/mpid/ch4/shm/posix/posix_pre.h index 0bbda3a783a..478340994e7 100644 --- a/src/mpid/ch4/shm/posix/posix_pre.h +++ b/src/mpid/ch4/shm/posix/posix_pre.h @@ -9,6 +9,10 @@ #include #include "release_gather_types.h" +/* define a macro to sigify that we have CH4 POSIX. An example usage is + * to macro-guard posix collective algorithms */ +#define MPIDI_CH4_SHM_POSIX 1 + #define MPIDI_POSIX_MAX_AM_HDR_SIZE 800 /* constrained by MPIDI_POSIX_AM_HDR_POOL_CELL_SIZE */ #define MPIDI_POSIX_AM_MSG_HEADER_SIZE (sizeof(MPIDI_POSIX_am_header_t)) #define MPIDI_POSIX_MAX_IOV_NUM (3) /* am_hdr, [padding], payload */ diff --git a/src/mpid/ch4/shm/posix/release_gather/release_gather.c b/src/mpid/ch4/shm/posix/release_gather/release_gather.c index 15c12b80237..9de12c5fe14 100644 --- a/src/mpid/ch4/shm/posix/release_gather/release_gather.c +++ b/src/mpid/ch4/shm/posix/release_gather/release_gather.c @@ -481,6 +481,7 @@ int MPIDI_POSIX_mpi_release_gather_comm_init(MPIR_Comm * comm_ptr, if (mpi_errno != MPI_SUCCESS) { MPIDI_POSIX_mpi_release_gather_comm_free(comm_ptr); RELEASE_GATHER_FIELD(comm_ptr, is_initialized) = 0; + /* TODO: set a flag so next time we skip the try */ } return mpi_errno; fn_fail: From 9db18809009130b741e8f3c8b33ba15170437905 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 26 Sep 2025 16:27:40 -0500 Subject: [PATCH 47/47] coll/ch4: activate release_gather for reduce/allreduce/barrier Extend the previous commit to activate release_gather algorithm for reduce, allreduce, and barrier. --- src/mpi/coll/coll_algorithms.txt | 15 +++ src/mpi/coll/coll_selection.json | 19 +++- src/mpi/coll/cvars.txt | 3 + .../ch4/shm/posix/posix_coll_release_gather.h | 98 ++++++------------- 4 files changed, 66 insertions(+), 69 deletions(-) diff --git a/src/mpi/coll/coll_algorithms.txt b/src/mpi/coll/coll_algorithms.txt index cd4564d3533..b16d820f487 100644 --- a/src/mpi/coll/coll_algorithms.txt +++ b/src/mpi/coll/coll_algorithms.txt @@ -88,6 +88,11 @@ barrier-intra: cvar_params: RECEXCH_KVAL, RECEXCH_SINGLE_PHASE_RECV smp restrictions: hierarchical + release_gather + func_name: MPIDI_POSIX_mpi_barrier_release_gather + inline: 1 + macro_guard: defined(MPIDI_CH4_SHM_POSIX) + restrictions: MPIDI_CH4_release_gather barrier-inter: bcast ibarrier-intra: @@ -354,6 +359,11 @@ reduce-intra: restrictions: commutative, hierarchical reduce_scatter_gather restrictions: count_ge_pof2, builtin_op + release_gather + func_name: MPIDI_POSIX_mpi_reduce_release_gather + inline: 1 + macro_guard: defined(MPIDI_CH4_SHM_POSIX) + restrictions: MPIDI_CH4_release_gather reduce-inter: local_reduce_remote_send ireduce-intra: @@ -397,6 +407,11 @@ allreduce-intra: ccl extra_params: ccl cvar_params: CCL + release_gather + func_name: MPIDI_POSIX_mpi_allreduce_release_gather + inline: 1 + macro_guard: defined(MPIDI_CH4_SHM_POSIX) + restrictions: MPIDI_CH4_release_gather allreduce-inter: reduce_exchange_bcast iallreduce-intra: diff --git a/src/mpi/coll/coll_selection.json b/src/mpi/coll/coll_selection.json index c2921fdc70a..f09dcd3fa61 100644 --- a/src/mpi/coll/coll_selection.json +++ b/src/mpi/coll/coll_selection.json @@ -42,6 +42,10 @@ }, "collective=allreduce-intra": { + "MPIDI_CH4_release_gather": + { + "algorithm=MPIDI_POSIX_mpi_allreduce_release_gather":{} + }, "avg_msg_size(8)": { "algorithm=MPIR_Allreduce_intra_recursive_doubling":{} @@ -129,6 +133,10 @@ }, "collective=reduce-intra": { + "MPIDI_CH4_release_gather": + { + "algorithm=MPIDI_POSIX_mpi_reduce_release_gather":{} + }, "avg_msg_size(2048)": { "algorithm=MPIR_Reduce_intra_binomial":{} @@ -277,8 +285,15 @@ }, "collective=barrier-intra": { - "algorithm=MPIR_Barrier_intra_k_dissemination":{ - "k=2": {} + "MPIDI_CH4_release_gather": + { + "algorithm=MPIDI_POSIX_mpi_barrier_release_gather":{} + }, + "any": + { + "algorithm=MPIR_Barrier_intra_k_dissemination":{ + "k=2": {} + } } }, "collective=barrier-inter": diff --git a/src/mpi/coll/cvars.txt b/src/mpi/coll/cvars.txt index 88ea69c2bfd..4955dd613bd 100644 --- a/src/mpi/coll/cvars.txt +++ b/src/mpi/coll/cvars.txt @@ -21,6 +21,7 @@ cvars: smp - Force smp algorithm k_dissemination - Force high radix dissemination algorithm recexch - Force recursive exchange algorithm + release_gather - Force CH4 POSIX release_gather algorithm - name : MPIR_CVAR_BARRIER_INTER_ALGORITHM category : COLLECTIVE @@ -1205,6 +1206,7 @@ cvars: nb - Force nonblocking algorithm smp - Force smp algorithm reduce_scatter_gather - Force reduce scatter gather algorithm + release_gather - Force CH4 POSIX release_gather algorithm - name : MPIR_CVAR_REDUCE_INTER_ALGORITHM category : COLLECTIVE @@ -1405,6 +1407,7 @@ cvars: ring - Force ring algorithm k_reduce_scatter_allgather - Force reduce scatter allgather algorithm ccl - Force CCL algorithm + release_gather - Force CH4 POSIX release_gather algorithm - name : MPIR_CVAR_ALLREDUCE_RECURSIVE_MULTIPLYING_KVAL category : COLLECTIVE diff --git a/src/mpid/ch4/shm/posix/posix_coll_release_gather.h b/src/mpid/ch4/shm/posix/posix_coll_release_gather.h index 44247315a35..c2b25d60f4e 100644 --- a/src/mpid/ch4/shm/posix/posix_coll_release_gather.h +++ b/src/mpid/ch4/shm/posix/posix_coll_release_gather.h @@ -44,14 +44,45 @@ MPL_STATIC_INLINE_PREFIX bool MPIDI_POSIX_check_release_gather(MPIR_Csel_coll_si /* check coll_type */ MPIDI_POSIX_release_gather_opcode_t opcode; + MPI_Datatype datatype_for_reduce = MPI_DATATYPE_NULL; + MPI_Op op_for_reduce; switch (coll_sig->coll_type) { case MPIR_CSEL_COLL_TYPE__INTRA_BCAST: opcode = MPIDI_POSIX_RELEASE_GATHER_OPCODE_BCAST; break; + case MPIR_CSEL_COLL_TYPE__INTRA_REDUCE: + opcode = MPIDI_POSIX_RELEASE_GATHER_OPCODE_REDUCE; + datatype_for_reduce = coll_sig->u.reduce.datatype; + op_for_reduce = coll_sig->u.reduce.op; + break; + case MPIR_CSEL_COLL_TYPE__INTRA_ALLREDUCE: + opcode = MPIDI_POSIX_RELEASE_GATHER_OPCODE_ALLREDUCE; + datatype_for_reduce = coll_sig->u.allreduce.datatype; + op_for_reduce = coll_sig->u.allreduce.op; + break; + case MPIR_CSEL_COLL_TYPE__INTRA_BARRIER: + opcode = MPIDI_POSIX_RELEASE_GATHER_OPCODE_BARRIER; + break; default: return false; } + if (datatype_for_reduce != MPI_DATATYPE_NULL) { + MPI_Aint type_size, dummy_lb, extent, true_extent; + MPIR_Datatype_get_size_macro(datatype_for_reduce, type_size); + MPIR_Type_get_extent_impl(datatype_for_reduce, &dummy_lb, &extent); + MPIR_Type_get_true_extent_impl(datatype_for_reduce, &dummy_lb, &true_extent); + extent = MPL_MAX(extent, true_extent); + if (MPL_MAX(type_size, extent) >= + MPIR_CVAR_REDUCE_INTRANODE_BUFFER_TOTAL_SIZE / MPIR_CVAR_REDUCE_INTRANODE_NUM_CELLS) { + return false; + } + + if (!MPIR_Op_is_commutative(op_for_reduce)) { + return false; + } + } + /* Check repeats if the algorithm CVAR is not set */ if (!(coll_sig->flags & MPIR_COLL_SIG_FLAG__CVAR)) { MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++; @@ -217,25 +248,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_reduce_release_gather(const void *s MPIR_Type_get_extent_impl(datatype, &lb, &extent); MPIR_Type_get_true_extent_impl(datatype, &lb, &true_extent); extent = MPL_MAX(extent, true_extent); - if (MPL_MAX(type_size, extent) >= - MPIR_CVAR_REDUCE_INTRANODE_BUFFER_TOTAL_SIZE / MPIR_CVAR_REDUCE_INTRANODE_NUM_CELLS) { - goto fallback; - } - - MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++; - if (MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls < - MPIR_CVAR_POSIX_NUM_COLLS_THRESHOLD) { - /* Fallback to pt2pt algorithms if the total number of release_gather collective calls is - * less than the specified threshold */ - goto fallback; - } - - /* Lazy initialization of release_gather specific struct */ - mpi_errno = - MPIDI_POSIX_mpi_release_gather_comm_init(comm_ptr, - MPIDI_POSIX_RELEASE_GATHER_OPCODE_REDUCE); - MPII_COLLECTIVE_FALLBACK_CHECK(MPIR_Comm_rank(comm_ptr), !mpi_errno, mpi_errno, - "release_gather reduce cannot create more shared memory. Falling back to pt2pt algorithms.\n"); if (sendbuf == MPI_IN_PLACE) { sendbuf = recvbuf; @@ -271,10 +283,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_reduce_release_gather(const void *s return mpi_errno; fn_fail: goto fn_exit; - fallback: - /* FIXME: proper error */ - mpi_errno = MPI_ERR_OTHER; - goto fn_exit; } /* Intra-node allreduce is implemented as a gather step followed by a release step in release_gather @@ -309,25 +317,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allreduce_release_gather(const void MPIR_Type_get_extent_impl(datatype, &lb, &extent); MPIR_Type_get_true_extent_impl(datatype, &lb, &true_extent); extent = MPL_MAX(extent, true_extent); - if (MPL_MAX(type_size, extent) >= - MPIR_CVAR_REDUCE_INTRANODE_BUFFER_TOTAL_SIZE / MPIR_CVAR_REDUCE_INTRANODE_NUM_CELLS) { - goto fallback; - } - - MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++; - if (MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls < - MPIR_CVAR_POSIX_NUM_COLLS_THRESHOLD) { - /* Fallback to pt2pt algorithms if the total number of release_gather collective calls is - * less than the specified threshold */ - goto fallback; - } - - /* Lazy initialization of release_gather specific struct */ - mpi_errno = - MPIDI_POSIX_mpi_release_gather_comm_init(comm_ptr, - MPIDI_POSIX_RELEASE_GATHER_OPCODE_ALLREDUCE); - MPII_COLLECTIVE_FALLBACK_CHECK(MPIR_Comm_rank(comm_ptr), !mpi_errno, mpi_errno, - "release_gather allreduce cannot create more shared memory. Falling back to pt2pt algorithms.\n"); if (sendbuf == MPI_IN_PLACE) { sendbuf = recvbuf; @@ -365,11 +354,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_allreduce_release_gather(const void fn_fail: goto fn_exit; - - fallback: - /* FIXME: proper error */ - mpi_errno = MPI_ERR_OTHER; - goto fn_exit; } /* Intra-node barrier is implemented as a gather step followed by a release step in release_gather @@ -382,21 +366,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_barrier_release_gather(MPIR_Comm * MPIR_FUNC_ENTER; - MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls++; - if (MPIDI_POSIX_COMM(comm_ptr, release_gather).num_collective_calls < - MPIR_CVAR_POSIX_NUM_COLLS_THRESHOLD) { - /* Fallback to pt2pt algorithms if the total number of release_gather collective calls is - * less than the specified threshold */ - goto fallback; - } - - /* Lazy initialization of release_gather specific struct */ - mpi_errno = - MPIDI_POSIX_mpi_release_gather_comm_init(comm_ptr, - MPIDI_POSIX_RELEASE_GATHER_OPCODE_BARRIER); - MPII_COLLECTIVE_FALLBACK_CHECK(MPIR_Comm_rank(comm_ptr), !mpi_errno, mpi_errno, - "release_gather barrier cannot create more shared memory. Falling back to pt2pt algorithms.\n"); - mpi_errno = MPIDI_POSIX_mpi_release_gather_gather(NULL, NULL, 0, MPI_DATATYPE_NULL, MPI_OP_NULL, 0, comm_ptr, coll_attr, @@ -414,11 +383,6 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_barrier_release_gather(MPIR_Comm * fn_fail: goto fn_exit; - - fallback: - /* FIXME: proper error */ - mpi_errno = MPI_ERR_OTHER; - goto fn_exit; } #endif /* POSIX_COLL_RELEASE_GATHER_H_INCLUDED */