Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions .github/workflows/cmake_sanitizers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ jobs:
fail-fast: false
matrix:
include:
- { os: ubuntu-22.04, toolchain: gcc-13 }
- { os: macos-14, toolchain: llvm }
- { os: ubuntu-22.04, toolchain: gcc-13, sanitizer: ON }
- { os: ubuntu-22.04, toolchain: gcc-13, sanitizer: TSAN }
- { os: macos-14, toolchain: llvm, sanitizer: ON }

steps:
- name: Show CPU info (Linux)
Expand Down Expand Up @@ -136,15 +137,21 @@ jobs:
for arch in "${arch_flags[@]}"; do
rm -rf $build_dir

ctest_args=(--output-on-failure -j)
# Keep the TSAN job focused on the concurrency-sensitive coverage.
if [[ "${{ matrix.sanitizer }}" == "TSAN" ]]; then
ctest_args=(--output-on-failure -R '^(run_testutils|run_threadsafe_execute)$')
fi

cmake -E make_directory "$build_dir"
cmake -S . -B "$build_dir" \
-DCMAKE_BUILD_TYPE=$build_type \
-DFINUFFT_ARCH_FLAGS="$arch" \
-DFINUFFT_BUILD_EXAMPLES=ON \
-DFINUFFT_BUILD_TESTS=ON \
-DFINUFFT_USE_DUCC0=ON \
-DFINUFFT_USE_SANITIZERS=ON
-DFINUFFT_ENABLE_SANITIZERS="${{ matrix.sanitizer }}"

cmake --build "$build_dir" --config "$build_type"
ctest --test-dir "$build_dir" --output-on-failure -j
ctest --test-dir "$build_dir" "${ctest_args[@]}"
done
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ If not stated, FINUFFT is assumed (old cuFINUFFT <=1.3 is listed separately).

v2.6.0-dev

* Added `threadsafe_execute` regression test verifying concurrent `execute()`
calls on the same plan produce correct results. Added sanitizer mode selection
via `FINUFFT_USE_SANITIZERS=OFF|ON|MEMSAN|TSAN`, and extended the sanitizer
GitHub workflow to run a focused Linux TSAN job. (Barbone)
* SIMD-vectorized bin sort with parallel prefix sum: uint32_t bin counts,
ndims dispatch for vectorized coordinate binning, std::exclusive_scan for
parallel prefix sum of offsets, restored single-threaded variant as
Expand Down
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,19 @@ option(FINUFFT_USE_CUDA "Whether to build CUDA accelerated FINUFFT library (libc
option(FINUFFT_USE_DUCC0 "Whether to use DUCC0 (instead of FFTW) for CPU FFTs" OFF)
option(FINUFFT_USE_IWYU "Set CXX_INCLUDE_WHAT_YOU_USE on target (checker-only)" OFF)
option(FINUFFT_USE_OPENMP "Whether to use OpenMP for parallelization. If disabled, the finufft library will be single threaded. This does not affect the choice of FFTW library." ON)
option(FINUFFT_USE_SANITIZERS "Whether to enable sanitizers, only effective for Debug configuration." OFF)
set(
FINUFFT_USE_SANITIZERS
"OFF"
CACHE STRING
"Sanitizer mode for Debug/RelWithDebInfo builds. Supported values: OFF, ON, MEMSAN, TSAN. ON and MEMSAN both select the default address/undefined/bounds bundle."
)
# if FINUFFT_USE_DUCC0 is ON, the following options are ignored
set(FINUFFT_FFTW_LIBRARIES "DEFAULT" CACHE STRING "Specify a custom FFTW library")
set(FINUFFT_FFTW_SUFFIX "DEFAULT" CACHE STRING "Suffix for FFTW libraries (e.g. OpenMP, Threads etc.) defaults to empty string if OpenMP is disabled, else uses OpenMP. Ignored if DUCC0 is used.")
# if FINUFFT_USE_CPU is OFF, the following options are ignored
set(FINUFFT_ARCH_FLAGS "native" CACHE STRING "Compiler flags for specifying target architecture, defaults to -march=native")
# sphinx tag (don't remove): @cmake_opts_end
set_property(CACHE FINUFFT_USE_SANITIZERS PROPERTY STRINGS OFF ON MEMSAN TSAN)
cmake_dependent_option(FINUFFT_ENABLE_INSTALL "Disable installation in the case of python builds" ON "NOT FINUFFT_BUILD_PYTHON" OFF)
cmake_dependent_option(FINUFFT_STATIC_LINKING "Disable static libraries in the case of python builds" ON "NOT FINUFFT_BUILD_PYTHON" OFF)
cmake_dependent_option(FINUFFT_SHARED_LINKING "Shared should be the opposite of static linking" ON "NOT FINUFFT_STATIC_LINKING" OFF)
Expand Down
14 changes: 13 additions & 1 deletion cmake/toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,26 @@ endif()

# ---- Sanitizers ---------------------------------------------------------------
set(FINUFFT_SANITIZER_FLAGS)
if(FINUFFT_USE_SANITIZERS)
string(TOUPPER "${FINUFFT_USE_SANITIZERS}" FINUFFT_USE_SANITIZERS_MODE)
if(FINUFFT_USE_SANITIZERS_MODE STREQUAL "OFF")
elseif(FINUFFT_USE_SANITIZERS_MODE STREQUAL "ON" OR FINUFFT_USE_SANITIZERS_MODE STREQUAL "MEMSAN")
set(FINUFFT_SANITIZER_FLAGS
-fsanitize=address
-fsanitize=undefined
-fsanitize=bounds-strict
/fsanitize=address
/RTC1
)
elseif(FINUFFT_USE_SANITIZERS_MODE STREQUAL "TSAN")
set(FINUFFT_SANITIZER_FLAGS -fsanitize=thread)
else()
message(
FATAL_ERROR
"Unsupported FINUFFT_USE_SANITIZERS value '${FINUFFT_USE_SANITIZERS}'. Use one of: OFF, ON, MEMSAN, TSAN."
)
endif()

if(FINUFFT_SANITIZER_FLAGS)
filter_supported_compiler_flags(FINUFFT_SANITIZER_FLAGS FINUFFT_SANITIZER_FLAGS)
set(FINUFFT_SANITIZER_FLAGS $<$<CONFIG:Debug,RelWithDebInfo>:${FINUFFT_SANITIZER_FLAGS}>)
endif()
Expand Down
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ target_compile_features(testutils PRIVATE cxx_std_17)
finufft_link_test(testutils)
add_test(NAME run_testutils COMMAND testutils WORKING_DIRECTORY ${CMAKE_BINARY_DIR})

add_executable(threadsafe_execute threadsafe_execute.cpp)
finufft_link_test(threadsafe_execute)
add_test(NAME run_threadsafe_execute COMMAND threadsafe_execute WORKING_DIRECTORY ${CMAKE_BINARY_DIR})

if(NOT FINUFFT_USE_DUCC0 AND FINUFFT_USE_OPENMP)
find_package(OpenMP COMPONENTS CXX REQUIRED)
add_executable(fftw_lock_test fftw_lock_test.cpp)
Expand Down
77 changes: 77 additions & 0 deletions test/threadsafe_execute.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include <finufft.h>
#include <finufft_common/constants.h>
#include <finufft_opts.h>

#include <algorithm>
#include <cmath>
#include <complex>
#include <cstdint>
#include <cstdio>
#include <thread>
#include <vector>

#include "utils/dirft1d.hpp"
#include "utils/norms.hpp"

int main() {
constexpr int nthreads = 4;
constexpr int nreps = 16;
constexpr int M = 400;
constexpr int64_t N1 = 2048;
constexpr double tol = 1e-12;

finufft_opts opts;
finufft_default_opts(&opts);
opts.nthreads = 1; // crucial: parallelism is across concurrent plan executes
opts.debug = 0;

std::vector<double> x(M);
std::vector<std::complex<double>> c(M), ref(N1);
for (int j = 0; j < M; ++j) {
double t = static_cast<double>(j) / M;
x[j] = -finufft::common::PI + 2.0 * finufft::common::PI * t;
c[j] = std::complex<double>(0.5 * std::cos(13.0 * t) + 0.25 * std::sin(7.0 * t),
0.75 * std::sin(11.0 * t) - 0.2 * std::cos(5.0 * t));
}

int64_t Ns[3] = {N1, 1, 1};
finufft_plan plan;
int ier = finufft_makeplan(1, 1, Ns, +1, 1, tol, &plan, &opts);
if (ier != 0) {
std::fprintf(stderr, "finufft_makeplan failed: ier=%d\n", ier);
return ier;
}
ier = finufft_setpts(plan, M, x.data(), nullptr, nullptr, 0, nullptr, nullptr, nullptr);
if (ier != 0) {
std::fprintf(stderr, "finufft_setpts failed: ier=%d\n", ier);
finufft_destroy(plan);
return ier;
}

dirft1d1<int64_t>(M, x, c, +1, N1, ref);

std::vector<int> failures(nthreads, 0);

std::vector<std::thread> workers;
workers.reserve(nthreads);
for (int tid = 0; tid < nthreads; ++tid) {
workers.emplace_back([&, tid]() {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I never tried this. Is this emplace_back non-block, so workers.reserve(nthreads) for nthreads==4 will execute 4 finufft_execute simultaneously, and that's how you test the thread safety of executing parallel execution?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A brief explanation here may be useful. workers.emplace_back(...) constructs each std::thread directly in the vector. The new thread begins running the lambda immediately, and the thread constructor returns without waiting for completion, so the loop launches all threads without blocking.

After that, I call join() on each thread to wait for them all to finish. So yes, for nthreads == 4, this is intended to execute 4 finufft_execute calls concurrently and test thread safety.

std::vector<std::complex<double>> out(N1);
for (int rep = 0; rep < nreps; ++rep) {
int local_ier = finufft_execute(plan, c.data(), out.data());
double relerr = relerrtwonorm(N1, ref.data(), out.data());
if (local_ier != 0 || relerr > 10.0 * tol) {
failures[tid] = 1;
std::fprintf(stderr, "thread %d rep %d failed: ier=%d relerr=%.3g\n", tid, rep,
local_ier, relerr);
return;
}
}
});
}

for (auto &worker : workers) worker.join();

finufft_destroy(plan);
return *std::max_element(failures.begin(), failures.end());
}
Loading