diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml new file mode 100644 index 000000000..02549289d --- /dev/null +++ b/.github/workflows/codspeed.yml @@ -0,0 +1,47 @@ +name: CodSpeed Benchmarks + +on: + push: + branches: + - "master" + pull_request: + +permissions: + contents: read + id-token: write + +jobs: + benchmarks: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Cpp + uses: aminya/setup-cpp@v1 + with: + cmake: true + ninja: true + + - name: Detect CPU and Configure CMake + run: | + if grep -q avx512 /proc/cpuinfo; then + march="x86-64-v3" + else + march="native" + fi + cmake -S . -B ./build -G Ninja \ + -DFINUFFT_BUILD_TESTS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DFINUFFT_ARCH_FLAGS="-march=$march" \ + -DFINUFFT_USE_DUCC0=ON \ + -DCODSPEED_MODE=instrumentation + + - name: Build + run: cmake --build ./build --target codspeed_benchmark -j + + - name: Run benchmarks + uses: CodSpeedHQ/action@v4 + with: + mode: simulation + run: ./build/perftest/codspeed_benchmark diff --git a/README.md b/README.md index 29ffa5219..893946e06 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![PyPI - finufft](https://img.shields.io/pypi/dm/finufft?label=finufft%20(CPU))](https://pypi.org/project/finufft) [![PyPI - cufinufft](https://img.shields.io/pypi/dm/cufinufft?label=cufinufft%20(GPU))](https://pypi.org/project/cufinufft) [![Star Graph](https://img.shields.io/badge/GitHub-star%20history-blue?logo=github)](https://www.star-history.com/#flatironinstitute/finufft&Date) +[![CodSpeed](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json&repository=DiamonDinoia/poet)](https://codspeed.io/flatironinstitute/finufft) Principal author **Alex H. Barnett** (abarnett@flatironinstitute.org), diff --git a/perftest/CMakeLists.txt b/perftest/CMakeLists.txt index c5c98aaa3..cf06eb6f1 100644 --- a/perftest/CMakeLists.txt +++ b/perftest/CMakeLists.txt @@ -26,3 +26,42 @@ if(HAVE_GETOPT_H) endif() finufft_link_test(perftest) endif() + +if(CODSPEED_MODE) + # CodSpeed compatibility layer (wraps Google Benchmark with instrumentation). + # The instrument-hooks C file requires Clang; use this path only in CI. + CPMAddPackage( + NAME + google_benchmark + GIT_REPOSITORY + https://github.com/CodSpeedHQ/codspeed-cpp + GIT_TAG + v2.1.0 + SOURCE_SUBDIR + google_benchmark + OPTIONS + "BENCHMARK_ENABLE_TESTING OFF" + "BENCHMARK_ENABLE_INSTALL OFF" + "BENCHMARK_ENABLE_WERROR OFF" + ) +else() + # Upstream Google Benchmark for local development (works with any compiler). + CPMAddPackage( + NAME + google_benchmark + GIT_REPOSITORY + https://github.com/google/benchmark.git + GIT_TAG + v1.9.5 + OPTIONS + "BENCHMARK_ENABLE_TESTING OFF" + "BENCHMARK_ENABLE_INSTALL OFF" + ) +endif() + +add_executable(codspeed_benchmark codspeed_benchmark.cpp) +if(FINUFFT_USE_DUCC0) + target_compile_definitions(codspeed_benchmark PRIVATE -DFINUFFT_USE_DUCC0) +endif() +target_link_libraries(codspeed_benchmark PRIVATE benchmark::benchmark) +finufft_link_test(codspeed_benchmark) diff --git a/perftest/codspeed_benchmark.cpp b/perftest/codspeed_benchmark.cpp new file mode 100644 index 000000000..1aaace938 --- /dev/null +++ b/perftest/codspeed_benchmark.cpp @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#ifndef FINUFFT_USE_DUCC0 +#include +#endif + +static const double PI = 3.141592653589793238462643383279502884; +static const auto BENCH_NAME = "perftest/benchmarks/test_benchmark::FINUFFT"; + +struct Params { + std::tuple Nd; + int ntransf; + int64_t M; + double tol; + + Params(std::tuple Nd, int ntransf, int64_t M, double tol) + : Nd(Nd), ntransf(ntransf), M(M), tol(tol) {} + + friend std::ostream &operator<<(std::ostream &outs, const Params ¶ms) { + return outs << " N1 = " << std::get<0>(params.Nd) + << " N2 = " << std::get<1>(params.Nd) + << " N3 = " << std::get<2>(params.Nd) << " ntransf = " << params.ntransf + << " M = " << params.M << " tol = " << params.tol; + } +}; + +static int types[] = {1, 2, 3}; +static double sigmas[] = {1.25, 2.00}; + +static Params float_benchmarks[] = { + {std::make_tuple(10000, 1, 1), 1, (int64_t)1e7, 1e-4}, + {std::make_tuple(320, 320, 1), 1, (int64_t)1e7, 1e-5}, +}; +static Params double_benchmarks[] = { + {std::make_tuple(10000, 1, 1), 1, (int64_t)1e7, 1e-9}, + {std::make_tuple(320, 320, 1), 1, (int64_t)1e7, 1e-9}, +}; + +template void register_benchmark(Params test_opts) { + for (auto &type : types) { + for (auto &sigma : sigmas) { + std::stringstream benchmark_name; + benchmark_name << BENCH_NAME << test_opts << " type = " << type + << " sigma = " << sigma << " prec = " << typeid(T).name(); + benchmark::RegisterBenchmark(benchmark_name.str(), [=](benchmark::State &state) { + const int ntransf = test_opts.ntransf; + const int64_t M = test_opts.M; + const long int Nd[3] = {std::get<0>(test_opts.Nd), std::get<1>(test_opts.Nd), + std::get<2>(test_opts.Nd)}; + const long N = Nd[0] * Nd[1] * Nd[2]; + const int dim = Nd[2] > 1 ? 3 : Nd[1] > 1 ? 2 : 1; + constexpr int iflag = 1; + double tol = test_opts.tol; + + std::vector x(M * ntransf), y(M * ntransf), z(M * ntransf); + std::vector s(N * ntransf), t(N * ntransf), u(N * ntransf); + std::vector> c(M * ntransf), fk(N * ntransf); + + std::default_random_engine eng{42}; + std::uniform_real_distribution dist11(-1, 1); + auto randm11 = [&eng, &dist11]() { + return dist11(eng); + }; + + for (int64_t i = 0; i < M; i++) { + x[i] = PI * randm11(); + y[i] = PI * randm11(); + z[i] = PI * randm11(); + } + for (int64_t i = M; i < M * ntransf; ++i) { + int64_t j = i % M; + x[i] = x[j]; + y[i] = y[j]; + z[i] = z[j]; + } + + if (type == 1) { + for (int i = 0; i < M * ntransf; i++) { + c[i].real(randm11()); + c[i].imag(randm11()); + } + } else if (type == 2) { + for (int i = 0; i < N * ntransf; i++) { + fk[i].real(randm11()); + fk[i].imag(randm11()); + } + } else if (type == 3) { + for (int i = 0; i < M * ntransf; i++) { + c[i].real(randm11()); + c[i].imag(randm11()); + } + for (int i = 0; i < N * ntransf; i++) { + s[i] = PI * randm11(); + t[i] = PI * randm11(); + u[i] = PI * randm11(); + } + } + + T *x_p = dim >= 1 ? x.data() : nullptr; + T *y_p = dim >= 2 ? y.data() : nullptr; + T *z_p = dim == 3 ? z.data() : nullptr; + T *s_p = type == 3 && dim >= 1 ? s.data() : nullptr; + T *t_p = type == 3 && dim >= 2 ? t.data() : nullptr; + T *u_p = type == 3 && dim == 3 ? u.data() : nullptr; + finufft_opts opts; + finufft_default_opts(&opts); + opts.upsampfac = sigma; + opts.nthreads = 1; + opts.showwarn = 0; + for (auto _ : state) { + if constexpr (std::is_same_v) { + finufft_plan_s *plan{nullptr}; + finufft_makeplan(type, dim, Nd, iflag, ntransf, tol, &plan, &opts); + finufft_setpts(plan, M, x_p, y_p, z_p, N, s_p, t_p, u_p); + finufft_execute(plan, c.data(), fk.data()); + finufft_destroy(plan); + benchmark::ClobberMemory(); + } else if constexpr (std::is_same_v) { + finufftf_plan_s *plan{nullptr}; + finufftf_makeplan(type, dim, Nd, iflag, ntransf, tol, &plan, &opts); + finufftf_setpts(plan, M, x_p, y_p, z_p, N, s_p, t_p, u_p); + finufftf_execute(plan, c.data(), fk.data()); + finufftf_destroy(plan); + benchmark::ClobberMemory(); + } + state.SetItemsProcessed(N + M); + } + }); + } + } +} + +int main(int argc, char **argv) { + benchmark::Initialize(&argc, argv); + + for (auto &test_opts : float_benchmarks) { + register_benchmark(test_opts); + } + for (auto &test_opts : double_benchmarks) { + register_benchmark(test_opts); + } + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +}