diff --git a/include/nvector/nvector_kokkos.hpp b/include/nvector/nvector_kokkos.hpp index 92287a4e8f..9f43b547eb 100644 --- a/include/nvector/nvector_kokkos.hpp +++ b/include/nvector/nvector_kokkos.hpp @@ -409,6 +409,15 @@ sunrealtype N_VMinQuotient_Kokkos(N_Vector num, N_Vector denom) }, Kokkos::Min(gpu_result)); + // Kokkos 5 (possibly earlier) ignores the initial value of gpu_result and + // uses the default for a min reduction, +infinity, so in the case where all + // denominators are zero this returns infinity instead of the max real + if (gpu_result == std::numeric_limits::infinity()) + { + // Align return result with the documentation + gpu_result = std::numeric_limits::max(); + } + return gpu_result; } diff --git a/test/unit_tests/nvector/kokkos/test_nvector_kokkos.cpp b/test/unit_tests/nvector/kokkos/test_nvector_kokkos.cpp index a628befd98..2551525c44 100644 --- a/test/unit_tests/nvector/kokkos/test_nvector_kokkos.cpp +++ b/test/unit_tests/nvector/kokkos/test_nvector_kokkos.cpp @@ -15,6 +15,16 @@ * SUNDIALS Copyright End * ----------------------------------------------------------------------------- * This is the testing routine for the NVector implementation using Kokkos. + * + * Three construction scenarios are exercised: + * + * A. Allocating constructor -- Vector(length, sunctx) + * B. Managed view constructor -- Vector(managed_view, sunctx) + * C. Unmanaged view constructor -- Vector(unmanaged_view, sunctx) + * + * Additional vectors are obtained via N_VClone, which always returns a managed + * Vector, to mirror the pattern in SUNDIALS packages where the user supplies + * an initial vector and the integrator clones it to create internal workspace. * ---------------------------------------------------------------------------*/ #include @@ -39,12 +49,71 @@ using ExecSpace = Kokkos::OpenMP; using ExecSpace = Kokkos::Serial; #endif -using VecType = sundials::kokkos::Vector; +using VecType = sundials::kokkos::Vector; +using UnmanagedVecType = + sundials::kokkos::Vector; using SizeType = VecType::size_type; -/* ---------------------------------------------------------------------- +/* ----------------------------------------------------------------------------- + * Run the vector operation tests + * ---------------------------------------------------------------------------*/ +static int run_all_tests(N_Vector X, N_Vector Y, N_Vector Z, sunindextype length) +{ + int fails{0}; + + printf("\nTesting standard vector operations:\n\n"); + + fails += Test_N_VAbs(X, Z, length, 0); + fails += Test_N_VAddConst(X, Z, length, 0); + fails += Test_N_VCompare(X, Z, length, 0); + fails += Test_N_VConst(X, length, 0); + fails += Test_N_VConstrMask(X, Y, Z, length, 0); + fails += Test_N_VDiv(X, Y, Z, length, 0); + fails += Test_N_VDotProd(X, Y, length, 0); + fails += Test_N_VInv(X, Z, length, 0); + fails += Test_N_VInvTest(X, Z, length, 0); + fails += Test_N_VL1Norm(X, length, 0); + fails += Test_N_VLinearSum(X, Y, Z, length, 0); + fails += Test_N_VMaxNorm(X, length, 0); + fails += Test_N_VMin(X, length, 0); + fails += Test_N_VMinQuotient(X, Y, length, 0); + fails += Test_N_VProd(X, Y, Z, length, 0); + fails += Test_N_VScale(X, Z, length, 0); + fails += Test_N_VWL2Norm(X, Y, length, 0); + fails += Test_N_VWrmsNorm(X, Y, length, 0); + fails += Test_N_VWrmsNormMask(X, Y, Z, length, 0); + + printf("\nTesting fused and vector array operations (disabled):\n\n"); + + fails += Test_N_VLinearCombination(X, length, 0); + fails += Test_N_VScaleAddMulti(X, length, 0); + fails += Test_N_VDotProdMulti(X, length, 0); + fails += Test_N_VLinearSumVectorArray(X, length, 0); + fails += Test_N_VScaleVectorArray(X, length, 0); + fails += Test_N_VConstVectorArray(X, length, 0); + fails += Test_N_VWrmsNormVectorArray(X, length, 0); + fails += Test_N_VWrmsNormMaskVectorArray(X, length, 0); + fails += Test_N_VScaleAddMultiVectorArray(X, length, 0); + fails += Test_N_VLinearCombinationVectorArray(X, length, 0); + + printf("\nTesting local reduction operations:\n\n"); + + fails += Test_N_VDotProdLocal(X, Y, length, 0); + fails += Test_N_VMaxNormLocal(X, length, 0); + fails += Test_N_VMinLocal(X, length, 0); + fails += Test_N_VL1NormLocal(X, length, 0); + fails += Test_N_VWSqrSumLocal(X, Y, length, 0); + fails += Test_N_VWSqrSumMaskLocal(X, Y, Z, length, 0); + fails += Test_N_VInvTestLocal(X, Z, length, 0); + fails += Test_N_VConstrMaskLocal(X, Y, Z, length, 0); + fails += Test_N_VMinQuotientLocal(X, Y, length, 0); + + return fails; +} + +/* ----------------------------------------------------------------------------- * Main NVector Testing Routine - * --------------------------------------------------------------------*/ + * ---------------------------------------------------------------------------*/ int main(int argc, char* argv[]) { int fails{0}; /* counter for test failures */ @@ -70,86 +139,171 @@ int main(int argc, char* argv[]) print_timing = atoi(argv[2]); SetTiming(print_timing, 0); - printf("Testing KOKKOS N_Vector \n"); - - printf("Vector length %ld \n\n", (long int)length); - Kokkos::initialize(argc, argv); { - VecType X{static_cast(length), sunctx}; - - /* Check vector ID */ - fails += Test_N_VGetVectorID(X, SUNDIALS_NVEC_KOKKOS, 0); - - /* Test clone functions */ - fails += Test_N_VClone(X, length, 0); - fails += Test_N_VCloneVectorArray(5, X, length, 0); - - /* Check vector length */ - fails += Test_N_VGetLength(X, 0); - - /* Check vector communicator */ - fails += Test_N_VGetCommunicator(X, SUN_COMM_NULL, 0); - - /* Clone additional vectors for testing */ - VecType Y{X}; - VecType Z{X}; - - /* Standard vector operation tests */ - printf("\nTesting standard vector operations:\n\n"); - - fails += Test_N_VAbs(X, Z, length, 0); - fails += Test_N_VAddConst(X, Z, length, 0); - fails += Test_N_VCompare(X, Z, length, 0); - fails += Test_N_VConst(X, length, 0); - fails += Test_N_VConstrMask(X, Y, Z, length, 0); - fails += Test_N_VDiv(X, Y, Z, length, 0); - fails += Test_N_VDotProd(X, Y, length, 0); - fails += Test_N_VInv(X, Z, length, 0); - fails += Test_N_VInvTest(X, Z, length, 0); - fails += Test_N_VL1Norm(X, length, 0); - fails += Test_N_VLinearSum(X, Y, Z, length, 0); - fails += Test_N_VMaxNorm(X, length, 0); - fails += Test_N_VMin(X, length, 0); - fails += Test_N_VMinQuotient(X, Y, length, 0); - fails += Test_N_VProd(X, Y, Z, length, 0); - fails += Test_N_VScale(X, Z, length, 0); - fails += Test_N_VWL2Norm(X, Y, length, 0); - fails += Test_N_VWrmsNorm(X, Y, length, 0); - fails += Test_N_VWrmsNormMask(X, Y, Z, length, 0); - - /* Fused and vector array operations tests (disabled) */ - printf("\nTesting fused and vector array operations (disabled):\n\n"); - - /* create vector and test vector array operations */ - VecType U{X}; - - /* fused operations */ - fails += Test_N_VLinearCombination(U, length, 0); - fails += Test_N_VScaleAddMulti(U, length, 0); - fails += Test_N_VDotProdMulti(U, length, 0); - - /* vector array operations */ - fails += Test_N_VLinearSumVectorArray(U, length, 0); - fails += Test_N_VScaleVectorArray(U, length, 0); - fails += Test_N_VConstVectorArray(U, length, 0); - fails += Test_N_VWrmsNormVectorArray(U, length, 0); - fails += Test_N_VWrmsNormMaskVectorArray(U, length, 0); - fails += Test_N_VScaleAddMultiVectorArray(U, length, 0); - fails += Test_N_VLinearCombinationVectorArray(U, length, 0); - - /* local reduction operations */ - printf("\nTesting local reduction operations:\n\n"); - - fails += Test_N_VDotProdLocal(X, Y, length, 0); - fails += Test_N_VMaxNormLocal(X, length, 0); - fails += Test_N_VMinLocal(X, length, 0); - fails += Test_N_VL1NormLocal(X, length, 0); - fails += Test_N_VWSqrSumLocal(X, Y, length, 0); - fails += Test_N_VWSqrSumMaskLocal(X, Y, Z, length, 0); - fails += Test_N_VInvTestLocal(X, Z, length, 0); - fails += Test_N_VConstrMaskLocal(X, Y, Z, length, 0); - fails += Test_N_VMinQuotientLocal(X, Y, length, 0); + /* ------------------------------------------------------------------------- + * Scenario A: allocating constructor + * + * X owns its memory. Y and Z are managed clones of X. All three vectors + * share the same VecType specialisation so there is no managed/unmanaged + * mixing. + * -----------------------------------------------------------------------*/ + printf("====================================================\n"); + printf("Testing KOKKOS N_Vector: allocating constructor\n"); + printf("Vector length %ld\n", (long int)length); + printf("====================================================\n"); + + { + VecType X{static_cast(length), sunctx}; + + fails += Test_N_VGetVectorID(X, SUNDIALS_NVEC_KOKKOS, 0); + fails += Test_N_VClone(X, length, 0); + fails += Test_N_VCloneVectorArray(5, X, length, 0); + fails += Test_N_VGetLength(X, 0); + fails += Test_N_VGetCommunicator(X, SUN_COMM_NULL, 0); + + N_Vector Y_nv = N_VClone(X); + N_Vector Z_nv = N_VClone(X); + + fails += run_all_tests(X, Y_nv, Z_nv, length); + + N_VDestroy(Y_nv); + N_VDestroy(Z_nv); + } + + /* ------------------------------------------------------------------------- + * Scenario B: managed view constructor + * + * X is constructed from a pre-existing managed Kokkos view. The view is + * shared between the caller and the Vector (reference counted). Y and Z are + * managed clones. All three vectors are the same VecType specialisation; no + * managed/unmanaged mixing occurs. + * + * This exercises the view constructor path and verifies that constructing + * from a managed view does not allocate a second buffer. + * -----------------------------------------------------------------------*/ + printf("\n====================================================\n"); + printf("Testing KOKKOS N_Vector: managed view constructor\n"); + printf("Vector length %ld\n", (long int)length); + printf("====================================================\n"); + + { + VecType::view_type managed_view("managed_view", + static_cast(length)); + VecType X{managed_view, sunctx}; + + /* Verify zero-copy: X wraps the same allocation, no copy occurred */ + if (X.View().data() != managed_view.data()) + { + printf("FAILED: managed view constructor: data pointer mismatch -- " + "Vector did not wrap the provided view\n"); + fails++; + } + else + { + printf("PASSED: managed view constructor: data pointer matches " + "provided view\n"); + } + + fails += Test_N_VGetVectorID(X, SUNDIALS_NVEC_KOKKOS, 0); + fails += Test_N_VClone(X, length, 0); + fails += Test_N_VCloneVectorArray(5, X, length, 0); + fails += Test_N_VGetLength(X, 0); + fails += Test_N_VGetCommunicator(X, SUN_COMM_NULL, 0); + + N_Vector Y_nv = N_VClone(X); + N_Vector Z_nv = N_VClone(X); + + fails += run_all_tests(X, Y_nv, Z_nv, length); + + N_VDestroy(Y_nv); + N_VDestroy(Z_nv); + } + +#if KOKKOS_VERSION / 10000 > 4 +#warning "Unmanaged views are not currently supported with Kokkos 5+" +#else + /* ------------------------------------------------------------------------- + * Scenario C: unmanaged view constructor + * + * X is constructed from an unmanaged view wrapping a raw pointer. Y and Z + * are managed clones produced by N_VClone, which always returns a managed + * VecType regardless of the type of the vector being cloned. + * + * The user supplies X and the integrator clones it to create managed + * internal workspace vectors. Operations such as N_VScale(c, X, Z) then + * dispatch through Z->ops (managed), operating on X (unmanaged) and Z + * (managed) simultaneously. + * -----------------------------------------------------------------------*/ + printf("\n====================================================\n"); + printf("Testing KOKKOS N_Vector: unmanaged view constructor\n"); + printf("Vector length %ld\n", (long int)length); + printf("====================================================\n"); + + /* Raw pointer allocated in the correct memory space for the execution + * backend. Lifetime spans the op tests so that it outlives the Vector + * constructed over it. */ + sunrealtype* unmanaged_ptr = static_cast( + Kokkos::kokkos_malloc("unmanaged_storage", + static_cast(length) * + sizeof(sunrealtype))); + + { + UnmanagedVecType::view_type unmanaged_view(unmanaged_ptr, + static_cast(length)); + UnmanagedVecType X{unmanaged_view, sunctx}; + + /* Verify zero-copy: X wraps the raw pointer without copying */ + if (X.View().data() != unmanaged_ptr) + { + printf("FAILED: unmanaged view constructor: data pointer mismatch -- " + "Vector did not wrap the provided view\n"); + fails++; + } + else + { + printf("PASSED: unmanaged view constructor: data pointer matches " + "unmanaged storage\n"); + } + + fails += Test_N_VGetVectorID(X, SUNDIALS_NVEC_KOKKOS, 0); + + /* N_VClone on an unmanaged vector must return a managed clone that + * owns separate memory -- it must not alias the raw pointer. */ + { + N_Vector clone = N_VClone(X); + auto clone_vec{sundials::kokkos::GetVec(clone)}; + if (clone_vec->View().data() == unmanaged_ptr) + { + printf("FAILED: N_VClone of unmanaged vector aliased the original " + "unmanaged storage -- clone must own independent memory\n"); + fails++; + } + else + { + printf("PASSED: N_VClone of unmanaged vector owns independent " + "memory\n"); + } + N_VDestroy(clone); + } + + fails += Test_N_VCloneVectorArray(5, X, length, 0); + fails += Test_N_VGetLength(X, 0); + fails += Test_N_VGetCommunicator(X, SUN_COMM_NULL, 0); + + /* Y and Z are managed clones of the unmanaged X. Vector operations will + * mix unmanaged (X) and managed views (Y, Z), exercising the type-mixing + * code path. */ + N_Vector Y_nv = N_VClone(X); + N_Vector Z_nv = N_VClone(X); + + fails += run_all_tests(X, Y_nv, Z_nv, length); + + N_VDestroy(Y_nv); + N_VDestroy(Z_nv); + } + Kokkos::kokkos_free(unmanaged_ptr); +#endif } Kokkos::finalize();