From 863bd46ce7f1cf7d5459e50743ac1b09fac65c4c Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Wed, 7 Jan 2026 19:51:56 +0100 Subject: [PATCH] use poet dispatcher --- CMakeLists.txt | 1 + cmake/setupPOET.cmake | 14 ++++ include/cufinufft/utils.h | 3 +- include/finufft_common/utils.h | 115 --------------------------------- makefile | 67 +++++++++++++------ src/CMakeLists.txt | 2 +- src/cuda/CMakeLists.txt | 3 +- src/spreadinterp.cpp | 11 ++-- 8 files changed, 72 insertions(+), 144 deletions(-) create mode 100644 cmake/setupPOET.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 06b6eacf5..c74f87b96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,6 +78,7 @@ mark_as_advanced( ) include(setupCPM) +include(setupPOET) if(FINUFFT_USE_CPU) add_subdirectory(src) diff --git a/cmake/setupPOET.cmake b/cmake/setupPOET.cmake new file mode 100644 index 000000000..7b1162d84 --- /dev/null +++ b/cmake/setupPOET.cmake @@ -0,0 +1,14 @@ +CPMAddPackage( + NAME + POET + GIT_REPOSITORY + https://github.com/DiamonDinoia/poet.git + GIT_TAG + main + EXCLUDE_FROM_ALL + YES + GIT_SHALLOW + YES + OPTIONS + "POET_BUILD_TESTS OFF" +) diff --git a/include/cufinufft/utils.h b/include/cufinufft/utils.h index 93646e07c..65cbafa42 100644 --- a/include/cufinufft/utils.h +++ b/include/cufinufft/utils.h @@ -13,6 +13,7 @@ #include // for std::forward #include +#include #ifndef _USE_MATH_DEFINES #define _USE_MATH_DEFINES @@ -187,7 +188,7 @@ template auto launch_dispatch_ns(Func &&func, int target_ns, Args &&...args) { using NsSeq = make_range; auto params = std::make_tuple(DispatchParam{target_ns}); - return dispatch(std::forward(func), params, std::forward(args)...); + return poet::dispatch(std::forward(func), params, std::forward(args)...); } /** diff --git a/include/finufft_common/utils.h b/include/finufft_common/utils.h index 008c1e1da..b0ad40c1f 100644 --- a/include/finufft_common/utils.h +++ b/include/finufft_common/utils.h @@ -33,120 +33,5 @@ template struct DispatchParam { using seq_type = Seq; }; -// Cartesian product over integer sequences. -// Invokes f.template operator()<...>() for each combination of values. -// The functor F must provide a templated call operator. -// Adapted upon suggestion from Nils Wentzell: godbolt.org/z/GM94xb1j4 -// -namespace detail { - -template struct Product; - -// Recursive case: at least two sequences remaining -template -struct Product, Seq2, Rest...> { - template static void apply(F &f) { - (Product::template apply(f), ...); - } -}; - -// Base case: single sequence left -template struct Product> { - template static void apply(F &f) { - (f.template operator()(), ...); - } -}; - -template void product(F &f, Seq...) { - Product::template apply<>(f); -} - -// Helper functor invoked for each combination to check runtime values -template -struct DispatcherCaller { - Func &func; - const std::array &vals; - ArgTuple &args; - std::conditional_t, char, ResultType> result{}; - template void operator()() { - static constexpr std::array p{Params...}; - if (p == vals) { - if constexpr (std::is_void_v) { - std::apply( - [&](auto &&...a) { - func.template operator()(std::forward(a)...); - }, - args); - } else { - result = std::apply( - [&](auto &&...a) { - return func.template operator()(std::forward(a)...); - }, - args); - } - } - } -}; - -template struct seq_first; -template -struct seq_first> : std::integral_constant { -}; - -template -auto extract_vals_impl(const Tuple &t, std::index_sequence) { - return std::array{std::get(t).runtime_val...}; -} -template auto extract_vals(const Tuple &t) { - using T = std::remove_reference_t; - return extract_vals_impl(t, std::make_index_sequence>{}); -} - -template -auto extract_seqs_impl(const Tuple &t, std::index_sequence) { - using T = std::remove_reference_t; - return std::make_tuple(typename std::tuple_element_t::seq_type{}...); -} -template auto extract_seqs(const Tuple &t) { - using T = std::remove_reference_t; - return extract_seqs_impl(t, std::make_index_sequence>{}); -} - -template -struct dispatch_result_helper { - template - static auto test(std::index_sequence) - -> decltype(std::declval().template operator()::value...>( - std::get(std::declval())...)); - using type = decltype(test(std::make_index_sequence>{})); -}; -template struct dispatch_result; -template -struct dispatch_result> { - using type = typename dispatch_result_helper::type; -}; -template -using dispatch_result_t = typename dispatch_result::type; - -} // namespace detail - -// Generic dispatcher mapping runtime ints to template parameters. -// params is a tuple of DispatchParam holding runtime values and sequences. -// When a match is found, the functor is invoked with those template parameters -// and its result returned. Otherwise, the default-constructed result is returned. -template -decltype(auto) dispatch(Func &&func, ParamTuple &¶ms, Args &&...args) { - using tuple_t = std::remove_reference_t; - constexpr std::size_t N = std::tuple_size_v; - auto vals = detail::extract_vals(params); - auto seqs = detail::extract_seqs(params); - auto arg_tuple = std::forward_as_tuple(std::forward(args)...); - using result_t = detail::dispatch_result_t; - detail::DispatcherCaller caller{func, vals, - arg_tuple}; - std::apply([&](auto &&...s) { detail::product(caller, s...); }, seqs); - if constexpr (!std::is_void_v) return caller.result; -} - } // namespace common } // namespace finufft diff --git a/makefile b/makefile index 383515722..c3a20cac6 100644 --- a/makefile +++ b/makefile @@ -71,6 +71,11 @@ XSIMD_URL := https://github.com/xtensor-stack/xsimd.git XSIMD_VERSION := 14.0.0 XSIMD_DIR := $(DEPS_ROOT)/xsimd +# POET header-only dependency repo (fetched like xsimd) +POET_URL := https://github.com/DiamonDinoia/poet.git +POET_VERSION := main +POET_DIR := $(DEPS_ROOT)/poet + # DUCC sources optional dependency repo DUCC_URL := https://github.com/mreineck/ducc.git DUCC_VERSION := ducc0_0_39_1 @@ -92,7 +97,7 @@ FINUFFT = $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) # Now come flags that should be added, whatever user overrode in make.inc. # -fPIC (position-indep code) needed to build dyn lib (.so) # Also, we force return (via :=) to the land of simply-expanded variables... -INCL = -Iinclude -I$(XSIMD_DIR)/include +INCL = -Iinclude -I$(XSIMD_DIR)/include -I$(POET_DIR)/include # single-thread total list of math and FFT libs (now both precisions)... # (Note: finufft tests use LIBSFFT; spread & util tests only need LIBS) LIBSFFT := $(LIBS) @@ -205,13 +210,13 @@ fortran/%.o: fortran/%.cpp $(HEADERS) %.o: %.f $(FC) -c $(FFLAGS) $< -o $@ -# spreadinterp include auto-generated code, xsimd header-only dependency; +# spreadinterp include auto-generated code, xsimd and POET header-only dependencies; # if FFT=DUCC also setup ducc with fft.h dependency on $(DUCC_SETUP)... # Note src/spreadinterp.cpp includes finufft/finufft_core.h which includes finufft/fft.h # so fftw/ducc header needed for spreadinterp, though spreadinterp should not # depend on fftw/ducc directly? include/finufft/fft.h: $(DUCC_SETUP) -SHEAD = $(wildcard src/*.h) $(XSIMD_DIR)/include/xsimd/xsimd.hpp +SHEAD = $(wildcard src/*.h) $(XSIMD_DIR)/include/xsimd/xsimd.hpp $(POET_DIR)/include/poet/poet.hpp src/spreadinterp.o: $(SHEAD) # we need xsimd functionality in finufft_core.h, which is included by many other @@ -480,23 +485,31 @@ docker-wheel: # ================== SETUP/COMPILE OF EXTERNAL DEPENDENCIES =============== define clone_repo - @if [ ! -d "$(3)" ]; then \ - echo "Cloning repository $(1) at tag $(2) into directory $(3)"; \ - git clone --no-checkout $(1) $(3) && \ - cd $(3) && \ - git fetch origin tag $(2) --force && \ - git -c advice.detachedHead=false checkout $(2); \ - else \ - cd $(3) && \ - CURRENT_VERSION=$$(git describe --tags --abbrev=0 2>/dev/null || echo ""); \ - if [ "$$CURRENT_VERSION" = "$(2)" ]; then \ - echo "Directory $(3) already exists and is at the correct version $(2)."; \ - else \ - echo "Directory $(3) exists but is at version $$CURRENT_VERSION. Checking out the correct version $(2)."; \ - git fetch origin tag $(2) --force && \ - git -c advice.detachedHead=false checkout $(2) || { echo "Error: Failed to checkout version $(2) in $(3)."; exit 1; }; \ - fi; \ - fi +@if [ ! -d "$(3)" ]; then \ + echo "Cloning repository $(1) at ref $(2) into directory $(3)"; \ + git clone --no-checkout $(1) $(3) && \ + cd $(3) && \ + git fetch origin --prune >/dev/null 2>&1 || true; \ + if git ls-remote --tags origin | grep -q "refs/tags/$(2)$$"; then \ + git fetch origin tag $(2) --force && git -c advice.detachedHead=false checkout $(2) || { echo "Error: Failed to checkout tag $(2) in $(3)."; exit 1; }; \ + elif git ls-remote --heads origin | grep -q "refs/heads/$(2)$$"; then \ + git fetch origin $(2) --force && git -c advice.detachedHead=false checkout -B $(2) origin/$(2) || { echo "Error: Failed to checkout branch $(2) in $(3)."; exit 1; }; \ + else \ + git fetch origin --prune >/dev/null 2>&1 || true; \ + git -c advice.detachedHead=false checkout $(2) || { echo "Error: Failed to checkout ref $(2) in $(3)."; exit 1; }; \ + fi; \ +else \ + cd $(3) && \ + git fetch origin --prune >/dev/null 2>&1 || true; \ + if git ls-remote --tags origin | grep -q "refs/tags/$(2)$$"; then \ + git fetch origin tag $(2) --force && git -c advice.detachedHead=false checkout $(2) && git reset --hard $(2) || { echo "Error: Failed to checkout tag $(2) in $(3)."; exit 1; }; \ + elif git ls-remote --heads origin | grep -q "refs/heads/$(2)$$"; then \ + git fetch origin $(2) --force && git -c advice.detachedHead=false checkout -B $(2) origin/$(2) && git reset --hard origin/$(2) || { echo "Error: Failed to checkout branch $(2) in $(3)."; exit 1; }; \ + else \ + git fetch origin --prune >/dev/null 2>&1 || true; \ + git -c advice.detachedHead=false checkout $(2) || git checkout --force $(2) || { echo "Error: Failed to checkout ref $(2) in $(3)."; exit 1; }; \ + fi; \ +fi endef @@ -507,6 +520,18 @@ $(XSIMD_DIR)/include/xsimd/xsimd.hpp: $(call clone_repo,$(XSIMD_URL),$(XSIMD_VERSION),$(XSIMD_DIR)) @echo "xsimd installed in deps/xsimd" +# download: POET header-only dependency (fetched like xsimd) +$(POET_DIR)/include/poet/poet.hpp: + mkdir -p $(DEPS_ROOT) + @echo "Checking POET external dependency..." + $(call clone_repo,$(POET_URL),$(POET_VERSION),$(POET_DIR)) + @# ensure branch/tag/commit is present and init submodules + @git -C $(POET_DIR) fetch --prune >/dev/null 2>&1 || true + @echo "Attempting to checkout $(POET_VERSION) in $(POET_DIR)..." + @git -C $(POET_DIR) -c advice.detachedHead=false checkout $(POET_VERSION) >/dev/null 2>&1 || true + @git -C $(POET_DIR) submodule update --init --recursive >/dev/null 2>&1 || true + @echo "POET installed in deps/poet" + # download DUCC... (an empty target just used to track if installed) $(DUCC_COOKIE): mkdir -p $(DEPS_ROOT) @@ -521,7 +546,7 @@ $(DUCC_SRCS): %.cc: $(DUCC_SETUP) $(DUCC_OBJS): %.o: %.cc $(CXX) -c $(DUCC_CXXFLAGS) $(DUCC_INCL) $< -o $@ -setup: $(XSIMD_DIR)/include/xsimd/xsimd.hpp $(DUCC_SETUP) +setup: $(XSIMD_DIR)/include/xsimd/xsimd.hpp $(POET_DIR)/include/poet/poet.hpp $(DUCC_SETUP) setupclean: rm -rf $(DEPS_ROOT) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d257b0db4..a1d740b96 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -47,7 +47,7 @@ if(FINUFFT_USE_DUCC0) target_compile_definitions(finufft PRIVATE FINUFFT_USE_DUCC0) endif() -target_link_libraries(finufft PRIVATE $) +target_link_libraries(finufft PRIVATE $) if(FINUFFT_USE_OPENMP) target_link_libraries(finufft PRIVATE OpenMP::OpenMP_CXX) if(NOT FINUFFT_STATIC_LINKING) diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt index 92825eeb5..8a760024d 100644 --- a/src/cuda/CMakeLists.txt +++ b/src/cuda/CMakeLists.txt @@ -81,7 +81,8 @@ if(FINUFFT_SHARED_LINKING) endif() endif() -target_link_libraries(cufinufft PRIVATE CUDA::cudart CUDA::cufft $) +target_link_libraries(cufinufft PRIVATE CUDA::cudart CUDA::cufft) +target_link_libraries(cufinufft PUBLIC $ poet::poet) # Expose only when not doing fully static linking if(NOT FINUFFT_STATIC_LINKING) target_link_libraries(cufinufft PUBLIC CUDA::cudart CUDA::cufft) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index d965c9c34..ef408b729 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -15,7 +16,7 @@ using namespace std; using namespace finufft::utils; // access to timer -using namespace finufft::common; // access to constants and dispatch +using namespace finufft::common; // access to constants using namespace finufft::kernel; // access to kernel evaluation funcs namespace finufft::spreadinterp { @@ -911,7 +912,7 @@ static void spread_subproblem_1d(BIGINT off1, UBIGINT size1, T *du, UBIGINT M, T using NcSeq = make_range; auto params = std::make_tuple(DispatchParam{opts.nspread}, DispatchParam{nc}); - dispatch(caller, params); + poet::dispatch(caller, params); } template @@ -1053,7 +1054,7 @@ static void spread_subproblem_2d( using NcSeq = make_range; auto params = std::make_tuple(DispatchParam{opts.nspread}, DispatchParam{nc}); - dispatch(caller, params); + poet::dispatch(caller, params); } template @@ -1179,7 +1180,7 @@ du (size size1*size2*size3) is uniform complex output array using NcSeq = make_range; auto params = std::make_tuple(DispatchParam{opts.nspread}, DispatchParam{nc}); - dispatch(caller, params); + poet::dispatch(caller, params); } template @@ -1870,7 +1871,7 @@ static int interpSorted( using NcSeq = make_range; auto params = std::make_tuple(DispatchParam{opts.nspread}, DispatchParam{nc}); - return dispatch(caller, params); + return poet::dispatch(caller, params); } template