diff --git a/src/opflow/model/power_bal_hiop/pbpolhiopkernels.cpp b/src/opflow/model/power_bal_hiop/pbpolhiopkernels.cpp index c1f6e70c..607cd264 100644 --- a/src/opflow/model/power_bal_hiop/pbpolhiopkernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolhiopkernels.cpp @@ -1050,7 +1050,7 @@ PetscErrorCode OPFLOWComputeDenseInequalityConstraintJacobian_PBPOLHIOP( PetscFunctionReturn(0); } -PetscErrorCode OPFLOWComputeDenseEqualityConstraintHessian_PBPOLHIOP( +PetscErrorCode OPFLOWComputeDenseEqualityConstraintsHessian_PBPOLHIOP( OPFLOW opflow, const double *x, const double *lambda, double *HDD) { PetscErrorCode ierr; PBPOLHIOP pbpolhiop = (PBPOLHIOP)opflow->model; @@ -1379,7 +1379,7 @@ PetscErrorCode OPFLOWComputeDenseEqualityConstraintHessian_PBPOLHIOP( PetscFunctionReturn(0); } -PetscErrorCode OPFLOWComputeDenseInequalityConstraintHessian_PBPOLHIOP( +PetscErrorCode OPFLOWComputeDenseInequalityConstraintsHessian_PBPOLHIOP( OPFLOW opflow, const double *x, const double *lambda, double *HDD) { int i; PBPOLHIOP pbpolhiop = (PBPOLHIOP)opflow->model; @@ -1821,12 +1821,12 @@ PetscErrorCode OPFLOWComputeDenseHessian_PBPOLHIOP(OPFLOW opflow, HDD[i] = 0.0; /* Equality constraint Hessian */ - ierr = OPFLOWComputeDenseEqualityConstraintHessian_PBPOLHIOP(opflow, x, - lambda, HDD); + ierr = OPFLOWComputeDenseEqualityConstraintsHessian_PBPOLHIOP(opflow, x, + lambda, HDD); CHKERRQ(ierr); if (opflow->nconineq) { - ierr = OPFLOWComputeDenseInequalityConstraintHessian_PBPOLHIOP( + ierr = OPFLOWComputeDenseInequalityConstraintsHessian_PBPOLHIOP( opflow, x, lambda + opflow->nconeq, HDD); CHKERRQ(ierr); } diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopkernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopkernels.cpp index 37772894..c79c572a 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopkernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopkernels.cpp @@ -1204,7 +1204,7 @@ PetscErrorCode OPFLOWComputeDenseInequalityConstraintJacobian_PBPOLRAJAHIOP( /** * @param[inout] HDD_dev Hessian matrix with size nxdense x nxdense */ -PetscErrorCode OPFLOWComputeDenseEqualityConstraintHessian_PBPOLRAJAHIOP( +PetscErrorCode OPFLOWComputeDenseEqualityConstraintsHessian_PBPOLRAJAHIOP( OPFLOW opflow, const double *x_dev, const double *lambda_dev, double *HDD_dev) { PbpolModelRajaHiop *pbpolrajahiop = @@ -1594,7 +1594,7 @@ PetscErrorCode OPFLOWComputeDenseEqualityConstraintHessian_PBPOLRAJAHIOP( /** * @param[inout] HDD_dev Hessian matrix with size nxdense x nxdense */ -PetscErrorCode OPFLOWComputeDenseInequalityConstraintHessian_PBPOLRAJAHIOP( +PetscErrorCode OPFLOWComputeDenseInequalityConstraintsHessian_PBPOLRAJAHIOP( OPFLOW opflow, const double *x_dev, const double *lambda_dev, double *HDD_dev) { PbpolModelRajaHiop *pbpolrajahiop = @@ -2085,12 +2085,12 @@ PetscErrorCode OPFLOWComputeDenseHessian_PBPOLRAJAHIOP(OPFLOW opflow, RAJA_LAMBDA(RAJA::Index_type i) { HDD_dev[i] = 0.0; }); /* Equality constraint Hessian */ - ierr = OPFLOWComputeDenseEqualityConstraintHessian_PBPOLRAJAHIOP( + ierr = OPFLOWComputeDenseEqualityConstraintsHessian_PBPOLRAJAHIOP( opflow, x_dev, lambda_dev, HDD_dev); CHKERRQ(ierr); if (opflow->nconineq) { - ierr = OPFLOWComputeDenseInequalityConstraintHessian_PBPOLRAJAHIOP( + ierr = OPFLOWComputeDenseInequalityConstraintsHessian_PBPOLRAJAHIOP( opflow, x_dev, lambda_dev + opflow->nconeq, HDD_dev); CHKERRQ(ierr); } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index b052d727..06eebef5 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -1,29 +1,7 @@ add_subdirectory(opflow) +add_subdirectory(pflow) add_subdirectory(ps) -if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_CUDA) - set_source_files_properties(test_acopf.cpp PROPERTIES LANGUAGE CUDA) -endif() - -add_executable(test_acopf test_acopf.cpp utils/test_acopf_utils.cpp) -target_link_libraries(test_acopf ExaGO::OPFLOW) -target_include_directories(test_acopf PRIVATE ./utils) - -add_executable(test_pflow test_pflow.cpp utils/test_acopf_utils.cpp) -target_link_libraries(test_pflow ExaGO::PFLOW) -target_include_directories(test_pflow PRIVATE ./utils) - -if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_HIOP_SPARSE) - if(EXAGO_ENABLE_CUDA) - set_source_files_properties(test_ineqjac_gpu.cpp PROPERTIES LANGUAGE CUDA) - endif() - add_executable(test_ineqjac_gpu test_ineqjac_gpu.cpp) - target_link_libraries(test_ineqjac_gpu ExaGO::OPFLOW) - target_include_directories( - test_ineqjac_gpu PRIVATE ${CMAKE_SOURCE_DIR}/src/opflow - ) -endif() - add_executable(test_error_handler test_error_handler.cpp) target_link_libraries(test_error_handler ExaGO::UTILS) @@ -31,51 +9,6 @@ add_executable(test_logger test_logger.cpp) target_link_libraries(test_logger ExaGO::UTILS) if(EXAGO_RUN_TESTS) - # Unit tests - exago_add_test( - NAME - "UNIT_TESTS_OPFLOW" - DEPENDS - HIOP - COMMAND - ${RUNCMD} - $ - -opflow_genbusvoltage - VARIABLE_WITHIN_BOUNDS - -validation_dir - ${CMAKE_SOURCE_DIR}/datafiles/test_validation - NETFILES - ${network_files} - ) - - exago_add_test( - NAME - "UNIT_TESTS_PFLOW" - COMMAND - ${RUNCMD} - $ - -validation_dir - ${CMAKE_SOURCE_DIR}/datafiles/test_validation - NETFILES - ${network_files} - ) - - if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_HIOP_SPARSE) - exago_add_test( - NAME - "UNIT_TEST_INEQJAC_GPU" - DEPENDS - HIOP - COMMAND - ${RUNCMD} - $ - -opflow_genbusvoltage - VARIABLE_WITHIN_BOUNDS - NETFILES - ${network_files} - ) - endif() - exago_add_test( NAME UNIT_TESTS_UTILS COMMAND $ ) @@ -92,5 +25,4 @@ if(EXAGO_RUN_TESTS) DEPENDS LOGGING ) - endif() diff --git a/tests/unit/opflow/CMakeLists.txt b/tests/unit/opflow/CMakeLists.txt index a312cf31..13673dd7 100644 --- a/tests/unit/opflow/CMakeLists.txt +++ b/tests/unit/opflow/CMakeLists.txt @@ -1,2 +1,83 @@ +# Tests with "analytical" solutions add_subdirectory(objective) add_subdirectory(constraint_jacobian) + +# Tests using PETSc solution as reference +if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_CUDA) + set_source_files_properties(test_acopf.cpp PROPERTIES LANGUAGE CUDA) + if(EXAGO_ENABLE_HIOP_SPARSE) + set_source_files_properties(test_eqjac_gpu.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(test_ineqjac_gpu.cpp PROPERTIES LANGUAGE CUDA) + endif() +endif() + +add_executable( + test_acopf test_acopf.cpp + ${CMAKE_SOURCE_DIR}/tests/unit/utils/test_acopf_utils.cpp +) +target_link_libraries(test_acopf ExaGO::OPFLOW) +target_include_directories( + test_acopf PRIVATE ${CMAKE_SOURCE_DIR}/tests/unit/utils +) + +if(EXAGO_ENABLE_HIOP_SPARSE AND EXAGO_ENABLE_GPU) + add_executable(test_ineqjac_gpu test_ineqjac_gpu.cpp) + target_link_libraries(test_ineqjac_gpu ExaGO::OPFLOW) + target_include_directories( + test_ineqjac_gpu PRIVATE ${CMAKE_SOURCE_DIR}/src/opflow + ) + + add_executable(test_eqjac_gpu test_eqjac_gpu.cpp) + target_link_libraries(test_eqjac_gpu ExaGO::OPFLOW) + target_include_directories( + test_eqjac_gpu PRIVATE ${CMAKE_SOURCE_DIR}/tests/unit/opflow + ) +endif() + +if(EXAGO_RUN_TESTS) + exago_add_test( + NAME + "UNIT_TESTS_OPFLOW" + DEPENDS + HIOP + COMMAND + ${RUNCMD} + $ + -opflow_genbusvoltage + VARIABLE_WITHIN_BOUNDS + -validation_dir + ${CMAKE_SOURCE_DIR}/datafiles/test_validation + NETFILES + ${network_files} + ) + + exago_add_test( + NAME + "UNIT_TESTS_OPFLOW_EQUALITY_CONSTRAINT_JACOBIAN_GPU" + DEPENDS + GPU + HIOP_SPARSE + RAJA + COMMAND + ${RUNCMD} + $ + NETFILES + ${network_files} + ) + + exago_add_test( + NAME + "UNIT_TESTS_OPFLOW_INEQUALITY_CONSTRAINT_JACOBIAN_GPU" + DEPENDS + GPU + HIOP_SPARSE + RAJA + COMMAND + ${RUNCMD} + $ + -opflow_genbusvoltage + VARIABLE_WITHIN_BOUNDS + NETFILES + ${network_files} + ) +endif() diff --git a/tests/unit/opflow/constraint_jacobian/equality/CMakeLists.txt b/tests/unit/opflow/constraint_jacobian/equality/CMakeLists.txt index 0131169b..f16c9af1 100644 --- a/tests/unit/opflow/constraint_jacobian/equality/CMakeLists.txt +++ b/tests/unit/opflow/constraint_jacobian/equality/CMakeLists.txt @@ -1,28 +1,6 @@ if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_CUDA) set_source_files_properties(jac_eq_acopf.cpp PROPERTIES LANGUAGE CUDA) - set_source_files_properties(test_eqjac_compare.cpp PROPERTIES LANGUAGE CUDA) endif() -if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_HIP) - set_source_files_properties(test_eqjac_compare.cpp PROPERTIES LANGUAGE HIP) -endif() - -add_executable(test_eqjac_compare test_eqjac_compare.cpp) -target_link_libraries(test_eqjac_compare ExaGO::OPFLOW) -target_include_directories( - test_eqjac_compare PRIVATE ${CMAKE_SOURCE_DIR}/tests/unit/opflow -) - -if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_HIP) - set_source_files_properties(test_eqjac_perf.cpp PROPERTIES LANGUAGE HIP) -endif() -if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_CUDA) - set_source_files_properties(test_eqjac_perf.cpp PROPERTIES LANGUAGE CUDA) -endif() -add_executable(test_eqjac_perf test_eqjac_perf.cpp) -target_link_libraries(test_eqjac_perf ExaGO::OPFLOW) -target_include_directories( - test_eqjac_perf PRIVATE ${CMAKE_SOURCE_DIR}/tests/unit/opflow -) add_executable( jac_eq_acopf jac_eq_acopf.cpp @@ -76,23 +54,6 @@ if(EXAGO_ENABLE_RAJA) list(APPEND dependencies HIOP) endif() -if(EXAGO_RUN_TESTS) - if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_HIOP) - set(prefix_net ${CMAKE_SOURCE_DIR}/datafiles/unit/opflow/objective/) - exago_add_test( - NAME - UNIT_TESTS_EQJAC_GPU_VS_PETSC_testx3 - DEPENDS - HIOP_SPARSE - COMMAND - ${RUNCMD} - $ - -netfile - ${prefix_net}testx3.m - ) - endif() -endif() - if(EXAGO_RUN_TESTS) foreach( model @@ -108,7 +69,7 @@ if(EXAGO_RUN_TESTS) foreach(network num IN ZIP_LISTS obj_network_files num_copies) get_filename_component(net ${network} NAME) set(testname - "UNIT_TESTS_EQUALITY_CONSTRAINT_JACOBIAN_${net}_${solver}_${model}" + "UNIT_TESTS_OPFLOW_EQUALITY_CONSTRAINT_JACOBIAN_${net}_${solver}_${model}" ) exago_add_test( NAME diff --git a/tests/unit/opflow/constraint_jacobian/equality/test_eqjac_perf.cpp b/tests/unit/opflow/constraint_jacobian/equality/test_eqjac_perf.cpp deleted file mode 100644 index f42223a2..00000000 --- a/tests/unit/opflow/constraint_jacobian/equality/test_eqjac_perf.cpp +++ /dev/null @@ -1,241 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include - -#if defined(EXAGO_ENABLE_RAJA) -#include -#include -#include -#endif - -using Clock = std::chrono::high_resolution_clock; -using Ms = std::chrono::duration; - -static double benchmarkPETSc(OPFLOW opflow, Vec X, int niters) { - PetscErrorCode ierr; - PetscScalar *x_arr; - - ierr = VecGetArray(X, &x_arr); - auto t0 = Clock::now(); - for (int iter = 0; iter < niters; iter++) { - ierr = (*opflow->modelops.computeequalityconstraintjacobian)( - opflow, X, opflow->Jac_Ge); - } - auto t1 = Clock::now(); - ierr = VecRestoreArray(X, &x_arr); - - return Ms(t1 - t0).count() / niters; -} - -int main(int argc, char **argv) { - PetscErrorCode ierr; - PetscBool flg; - char file_c_str[PETSC_MAX_PATH_LEN]; - std::string file; - char appname[] = "opflow"; - MPI_Comm comm = MPI_COMM_WORLD; - int niters = 100; - - char help[] = "Benchmark PETSc vs GPU equality constraint Jacobian\n"; - - ierr = ExaGOInitialize(comm, &argc, &argv, appname, help); - if (ierr) - return ierr; - - ierr = PetscOptionsGetString(NULL, NULL, "-netfile", file_c_str, - PETSC_MAX_PATH_LEN, &flg); - if (!flg) - file = "../datafiles/case9/case9mod.m"; - else - file.assign(file_c_str); - - ierr = PetscOptionsGetInt(NULL, NULL, "-niters", &niters, &flg); - - /* ---------------------------------------------------------------- - * PETSc path: IPOPT + PBPOL - * ---------------------------------------------------------------- */ - OPFLOW opflow_ref; - ierr = OPFLOWCreate(PETSC_COMM_WORLD, &opflow_ref); - CHKERRQ(ierr); - ierr = OPFLOWReadMatPowerData(opflow_ref, file.c_str()); - CHKERRQ(ierr); - ierr = OPFLOWSetModel(opflow_ref, OPFLOWMODEL_PBPOL); - CHKERRQ(ierr); - ierr = OPFLOWSetSolver(opflow_ref, OPFLOWSOLVER_IPOPT); - CHKERRQ(ierr); - ierr = OPFLOWSetInitializationType(opflow_ref, OPFLOWINIT_FROMFILE); - CHKERRQ(ierr); - ierr = OPFLOWSetUp(opflow_ref); - CHKERRQ(ierr); - - Vec X_ref; - ierr = OPFLOWGetSolution(opflow_ref, &X_ref); - CHKERRQ(ierr); - - /* Warmup */ - benchmarkPETSc(opflow_ref, X_ref, 5); - double petsc_ms = benchmarkPETSc(opflow_ref, X_ref, niters); - - int petsc_nnz = 0; - { - MatInfo info; - ierr = MatGetInfo(opflow_ref->Jac_Ge, MAT_LOCAL, &info); - petsc_nnz = (int)info.nz_used; - } - - /* ---------------------------------------------------------------- - * GPU path: HIOPSPARSEGPU + PBPOLRAJAHIOPSPARSE - * ---------------------------------------------------------------- */ - OPFLOW opflow_gpu; - ierr = OPFLOWCreate(PETSC_COMM_WORLD, &opflow_gpu); - CHKERRQ(ierr); - ierr = OPFLOWReadMatPowerData(opflow_gpu, file.c_str()); - CHKERRQ(ierr); - ierr = OPFLOWSetModel(opflow_gpu, OPFLOWMODEL_PBPOLRAJAHIOPSPARSE); - CHKERRQ(ierr); - ierr = OPFLOWSetSolver(opflow_gpu, OPFLOWSOLVER_HIOPSPARSEGPU); - CHKERRQ(ierr); - ierr = OPFLOWSetInitializationType(opflow_gpu, OPFLOWINIT_FROMFILE); - CHKERRQ(ierr); -#ifdef EXAGO_ENABLE_GPU - ierr = OPFLOWSetHIOPComputeMode(opflow_gpu, "GPU"); - CHKERRQ(ierr); -#endif - ierr = OPFLOWSetUp(opflow_gpu); - CHKERRQ(ierr); - - Vec X_gpu; - ierr = OPFLOWGetSolution(opflow_gpu, &X_gpu); - CHKERRQ(ierr); - - int nx = opflow_gpu->nx; - int nnz_eq = opflow_gpu->nnz_eqjacsp; - -#if defined(EXAGO_ENABLE_RAJA) - auto &resmgr = umpire::ResourceManager::getInstance(); - umpire::Allocator h_allocator = resmgr.getAllocator("HOST"); - - double *x_host; - ierr = VecGetArray(X_gpu, &x_host); - CHKERRQ(ierr); - - double *values_dev; - double *x_dev; - -#ifdef EXAGO_ENABLE_GPU - umpire::Allocator d_allocator = resmgr.getAllocator("DEVICE"); - x_dev = static_cast(d_allocator.allocate(nx * sizeof(double))); - values_dev = - static_cast(d_allocator.allocate(nnz_eq * sizeof(double))); - - umpire::util::AllocationRecord record_x{x_host, sizeof(double) * nx, - h_allocator.getAllocationStrategy()}; - resmgr.registerAllocation(x_host, record_x); - resmgr.copy(x_dev, x_host); -#else - x_dev = x_host; - values_dev = - static_cast(h_allocator.allocate(nnz_eq * sizeof(double))); -#endif - - /* Run sparsity phase once (not timed — this is one-time setup) */ - { - int *iRow_dev, *jCol_dev; -#ifdef EXAGO_ENABLE_GPU - iRow_dev = static_cast(d_allocator.allocate(nnz_eq * sizeof(int))); - jCol_dev = static_cast(d_allocator.allocate(nnz_eq * sizeof(int))); -#else - iRow_dev = static_cast(h_allocator.allocate(nnz_eq * sizeof(int))); - jCol_dev = static_cast(h_allocator.allocate(nnz_eq * sizeof(int))); -#endif - ierr = (*opflow_gpu->modelops.computesparseequalityconstraintjacobianhiop)( - opflow_gpu, x_dev, iRow_dev, jCol_dev, NULL); - CHKERRQ(ierr); -#ifdef EXAGO_ENABLE_GPU - d_allocator.deallocate(iRow_dev); - d_allocator.deallocate(jCol_dev); -#else - h_allocator.deallocate(iRow_dev); - h_allocator.deallocate(jCol_dev); -#endif - } - - /* Warmup the values kernel */ - for (int i = 0; i < 5; i++) { - ierr = (*opflow_gpu->modelops.computesparseequalityconstraintjacobianhiop)( - opflow_gpu, x_dev, NULL, NULL, values_dev); - CHKERRQ(ierr); - } - - // HIP kernels do not synchronize by default -#ifdef EXAGO_ENABLE_HIP - int status = hipDeviceSynchronize(); -#endif - - /* Timed runs */ - auto t0 = Clock::now(); - for (int iter = 0; iter < niters; iter++) { - ierr = (*opflow_gpu->modelops.computesparseequalityconstraintjacobianhiop)( - opflow_gpu, x_dev, NULL, NULL, values_dev); - CHKERRQ(ierr); - } - - // HIP kernels do not synchronize by default -#ifdef EXAGO_ENABLE_HIP - status = hipDeviceSynchronize(); -#endif - - auto t1 = Clock::now(); - double gpu_ms = Ms(t1 - t0).count() / niters; - -#ifdef EXAGO_ENABLE_GPU - d_allocator.deallocate(x_dev); - d_allocator.deallocate(values_dev); -#else - h_allocator.deallocate(values_dev); -#endif - - ierr = VecRestoreArray(X_gpu, &x_host); - CHKERRQ(ierr); -#else - double gpu_ms = 0.0; -#endif - - /* ---------------------------------------------------------------- - * Print results - * ---------------------------------------------------------------- */ - printf("\n"); - printf("================================================================\n"); - printf(" Equality Constraint Jacobian — Performance Comparison\n"); - printf("================================================================\n"); - printf(" Network: %s\n", file.c_str()); - printf(" Buses: %d\n", opflow_gpu->ps->nbus); - printf(" Variables (nx): %d\n", nx); - printf(" Eq constraints: %d\n", opflow_gpu->nconeq); - printf(" nnz (PETSc): %d\n", petsc_nnz); - printf(" nnz (GPU): %d\n", nnz_eq); - printf(" Iterations: %d\n", niters); - printf("----------------------------------------------------------------\n"); - printf(" %-20s %12s %12s\n", "", "PETSc (CPU)", "RAJA (GPU)"); - printf(" %-20s %12s %12s\n", "", "-----------", "----------"); - printf(" %-20s %10.4f ms %10.4f ms\n", "Avg time/call", petsc_ms, gpu_ms); - if (gpu_ms > 0.0) { - double speedup = petsc_ms / gpu_ms; - printf(" %-20s %10s %9.2fx\n", "Speedup", "", speedup); - } - printf( - "================================================================\n\n"); - - ierr = OPFLOWDestroy(&opflow_ref); - CHKERRQ(ierr); - ierr = OPFLOWDestroy(&opflow_gpu); - CHKERRQ(ierr); - - ExaGOFinalize(); - return 0; -} diff --git a/tests/unit/opflow/constraint_jacobian/inequality/CMakeLists.txt b/tests/unit/opflow/constraint_jacobian/inequality/CMakeLists.txt index 18177699..57cba747 100644 --- a/tests/unit/opflow/constraint_jacobian/inequality/CMakeLists.txt +++ b/tests/unit/opflow/constraint_jacobian/inequality/CMakeLists.txt @@ -65,7 +65,7 @@ if(EXAGO_RUN_TESTS) foreach(network num IN ZIP_LISTS obj_network_files num_copies) get_filename_component(net ${network} NAME) set(testname - "UNIT_TESTS_INEQUALITY_CONSTRAINT_JACOBIAN_${net}_${solver}_${model}" + "UNIT_TESTS_OPFLOW_INEQUALITY_CONSTRAINT_JACOBIAN_${net}_${solver}_${model}" ) exago_add_test( NAME diff --git a/tests/unit/opflow/Hessian/README.md b/tests/unit/opflow/hessian/README.md similarity index 92% rename from tests/unit/opflow/Hessian/README.md rename to tests/unit/opflow/hessian/README.md index 9cb6f382..102c8e29 100644 --- a/tests/unit/opflow/Hessian/README.md +++ b/tests/unit/opflow/hessian/README.md @@ -14,4 +14,4 @@ A 5-bus system **CICJ-unittestx1.m** will be used as a basis for this test. In a ### Hessian sparsity structure: The Hessian has the following sparsity structure for our test: -![Hessian_sparsity.png](/tests/unit/opflow/Hessian/Hessian_sparsity.png) +![hessian_sparsity.png](/tests/unit/opflow/hessian/hessian_sparsity.png) diff --git a/tests/unit/opflow/Hessian/Hessian_sparsity.png b/tests/unit/opflow/hessian/hessian_sparsity.png similarity index 100% rename from tests/unit/opflow/Hessian/Hessian_sparsity.png rename to tests/unit/opflow/hessian/hessian_sparsity.png diff --git a/tests/unit/opflow/objective/CMakeLists.txt b/tests/unit/opflow/objective/CMakeLists.txt index 07b52079..f03f1ee3 100644 --- a/tests/unit/opflow/objective/CMakeLists.txt +++ b/tests/unit/opflow/objective/CMakeLists.txt @@ -3,11 +3,16 @@ if(EXAGO_ENABLE_RAJA AND EXAGO_ENABLE_CUDA) endif() add_executable( - objective_acopf objective_acopf.cpp ../../utils/test_acopf_utils.cpp + objective_acopf objective_acopf.cpp + ${CMAKE_SOURCE_DIR}/tests/unit/utils/test_acopf_utils.cpp ) target_link_libraries(objective_acopf ExaGO::OPFLOW) -target_include_directories(objective_acopf PRIVATE ../../utils) -target_include_directories(objective_acopf PRIVATE ../) +target_include_directories( + objective_acopf PRIVATE ${CMAKE_SOURCE_DIR}/tests/unit/utils +) +target_include_directories( + objective_acopf PRIVATE ${CMAKE_SOURCE_DIR}/tests/unit/opflow +) # Network files to run on - doing 3 and 600 bus examples. set(prefix ${CMAKE_SOURCE_DIR}/datafiles/unit/opflow/objective/) @@ -40,7 +45,7 @@ if(EXAGO_RUN_TESTS) # Iterate over networks, matching network file to num_copies foreach(network num IN ZIP_LISTS obj_network_files num_copies) get_filename_component(net ${network} NAME) - set(testname "UNIT_TESTS_OBJECTIVE_${net}_${solver}_${model}") + set(testname "UNIT_TESTS_OPFLOW_OBJECTIVE_${net}_${solver}_${model}") exago_add_test( NAME ${testname} diff --git a/tests/unit/test_acopf.cpp b/tests/unit/opflow/test_acopf.cpp similarity index 99% rename from tests/unit/test_acopf.cpp rename to tests/unit/opflow/test_acopf.cpp index 835a829f..98a96263 100644 --- a/tests/unit/test_acopf.cpp +++ b/tests/unit/opflow/test_acopf.cpp @@ -6,7 +6,7 @@ #include #include -#include "opflow/opflow_tests.h" +#include "opflow_tests.h" #include "test_acopf_utils.h" #if defined(EXAGO_ENABLE_RAJA) diff --git a/tests/unit/opflow/constraint_jacobian/equality/test_eqjac_compare.cpp b/tests/unit/opflow/test_eqjac_gpu.cpp similarity index 75% rename from tests/unit/opflow/constraint_jacobian/equality/test_eqjac_compare.cpp rename to tests/unit/opflow/test_eqjac_gpu.cpp index c2fd7cb9..0665a31b 100644 --- a/tests/unit/opflow/constraint_jacobian/equality/test_eqjac_compare.cpp +++ b/tests/unit/opflow/test_eqjac_gpu.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,9 @@ #include #endif +using Clock = std::chrono::high_resolution_clock; +using Ms = std::chrono::duration; + struct TripletEntry { int row, col; double val; @@ -40,6 +44,22 @@ static void computeReferenceJacobian(OPFLOW opflow, Vec X, } } +static double benchmarkPETSc(OPFLOW opflow, Vec X, int niters) { + PetscErrorCode ierr; + PetscScalar *x_arr; + + ierr = VecGetArray(X, &x_arr); + auto t0 = Clock::now(); + for (int iter = 0; iter < niters; iter++) { + ierr = (*opflow->modelops.computeequalityconstraintjacobian)( + opflow, X, opflow->Jac_Ge); + } + auto t1 = Clock::now(); + ierr = VecRestoreArray(X, &x_arr); + + return Ms(t1 - t0).count() / niters; +} + int main(int argc, char **argv) { PetscErrorCode ierr; PetscBool flg; @@ -47,8 +67,10 @@ int main(int argc, char **argv) { std::string file; char appname[] = "opflow"; MPI_Comm comm = MPI_COMM_WORLD; + int niters = 1000; - char help[] = "Compare PETSc vs GPU equality constraint Jacobian\n"; + char help[] = + "Compare and benchmark PETSc vs GPU equality constraint Jacobian\n"; ierr = ExaGOInitialize(comm, &argc, &argv, appname, help); if (ierr) { @@ -64,8 +86,8 @@ int main(int argc, char **argv) { file.assign(file_c_str); /* ---------------------------------------------------------------- - * Step 1: Set up OPFLOW with PBPOL model (PETSc path) to get - * the reference Jacobian and the initial guess X. + * Set up OPFLOW with PBPOL model (PETSc path) to get + * the reference Jacobian and the initial guess X. * ---------------------------------------------------------------- */ OPFLOW opflow_ref; ierr = OPFLOWCreate(PETSC_COMM_WORLD, &opflow_ref); @@ -89,7 +111,7 @@ int main(int argc, char **argv) { computeReferenceJacobian(opflow_ref, X_ref, ref_entries); /* ---------------------------------------------------------------- - * Step 2: Set up OPFLOW with HIOPSPARSE to exercise the GPU path. + * Set up OPFLOW with HIOPSPARSE to exercise the GPU path. * ---------------------------------------------------------------- */ OPFLOW opflow_gpu; ierr = OPFLOWCreate(PETSC_COMM_WORLD, &opflow_gpu); @@ -119,7 +141,7 @@ int main(int argc, char **argv) { printf("\n"); printf("============================================================\n"); - printf(" Equality Constraint Jacobian: PETSc vs GPU Comparison\n"); + printf(" Equality constraint Jacobian: PETSc vs GPU Comparison\n"); printf(" Network: %s\n", file.c_str()); printf(" nx = %d, nconeq = %d, nnz_eqjac(GPU) = %d, nnz_eqjac(PETSc) = %d\n", nx, opflow_gpu->nconeq, nnz_eq, (int)ref_entries.size()); @@ -190,13 +212,13 @@ int main(int argc, char **argv) { } /* ---------------------------------------------------------------- - * Step 3: Compare and print results + * Compare and print results * ---------------------------------------------------------------- */ int n_match = 0, n_mismatch = 0, n_missing_gpu = 0, n_extra_gpu = 0; double max_abs_err = 0.0, max_rel_err = 0.0; int worst_row = -1, worst_col = -1; double worst_ref = 0, worst_gpu = 0; - const double tol = 1e-6; + const double tol = 1e-8; printf(" %-8s %-8s %16s %16s %12s %s\n", "Row", "Col", "PETSc (ref)", "GPU", "AbsErr", "Status"); @@ -255,7 +277,7 @@ int main(int argc, char **argv) { printf("\n"); printf("============================================================\n"); - printf(" SUMMARY\n"); + printf(" Validation summary\n"); printf("============================================================\n"); printf(" PETSc nnz: %d\n", (int)ref_entries.size()); printf(" GPU nnz: %d\n", nnz_eq); @@ -275,6 +297,41 @@ int main(int argc, char **argv) { int result = (n_mismatch == 0 && n_missing_gpu == 0 && n_extra_gpu == 0) ? 0 : 1; + /* ---------------------------------------------------------------- + * Benchmark performance + * ---------------------------------------------------------------- */ + /* Warmup and benchmark PETSc*/ + benchmarkPETSc(opflow_ref, X_ref, 5); + double petsc_ms = benchmarkPETSc(opflow_ref, X_ref, niters); + + /* Warmup the GPU values kernel */ + for (int i = 0; i < 5; i++) { + ierr = (*opflow_gpu->modelops.computesparseequalityconstraintjacobianhiop)( + opflow_gpu, x_dev, NULL, NULL, values_dev); + CHKERRQ(ierr); + } + + // HIP kernels do not synchronize by default +#ifdef EXAGO_ENABLE_HIP + int status = hipDeviceSynchronize(); +#endif + + /* Timed runs */ + auto t0 = Clock::now(); + for (int iter = 0; iter < niters; iter++) { + ierr = (*opflow_gpu->modelops.computesparseequalityconstraintjacobianhiop)( + opflow_gpu, x_dev, NULL, NULL, values_dev); + CHKERRQ(ierr); + } + + // HIP kernels do not synchronize by default +#ifdef EXAGO_ENABLE_HIP + status = hipDeviceSynchronize(); +#endif + + auto t1 = Clock::now(); + double gpu_ms = Ms(t1 - t0).count() / niters; + h_allocator.deallocate(iRow); h_allocator.deallocate(jCol); h_allocator.deallocate(values); @@ -287,6 +344,25 @@ int main(int argc, char **argv) { ierr = VecRestoreArray(X_gpu, &x_host); CHKERRQ(ierr); + + /* ---------------------------------------------------------------- + * Print benchmark results + * ---------------------------------------------------------------- */ + printf("\n"); + printf("================================================================\n"); + printf(" Equality constraint Jacobian — performance comparison\n"); + printf("================================================================\n"); + printf(" Iterations: %d\n", niters); + printf("----------------------------------------------------------------\n"); + printf(" %-20s %12s %12s\n", "", "PETSc (CPU)", "RAJA (GPU)"); + printf(" %-20s %12s %12s\n", "", "-----------", "----------"); + printf(" %-20s %10.4f ms %10.4f ms\n", "Avg time/call", petsc_ms, gpu_ms); + if (gpu_ms > 0.0) { + double speedup = petsc_ms / gpu_ms; + printf(" %-20s %10s %9.2fx\n", "Speedup", "", speedup); + } + printf( + "================================================================\n\n"); #endif // EXAGO_ENABLE_RAJA ierr = OPFLOWDestroy(&opflow_ref); diff --git a/tests/unit/test_ineqjac_gpu.cpp b/tests/unit/opflow/test_ineqjac_gpu.cpp similarity index 88% rename from tests/unit/test_ineqjac_gpu.cpp rename to tests/unit/opflow/test_ineqjac_gpu.cpp index bc473c70..0d54a98d 100644 --- a/tests/unit/test_ineqjac_gpu.cpp +++ b/tests/unit/opflow/test_ineqjac_gpu.cpp @@ -49,7 +49,7 @@ int main(int argc, char **argv) { std::string file; char appname[] = "opflow"; MPI_Comm comm = MPI_COMM_WORLD; - char help[] = "Test 8: GPU ineq Jacobian validation\n"; + char help[] = "GPU inequality constraints Jacobian validation\n"; int fail = 0; ierr = ExaGOInitialize(comm, &argc, &argv, appname, help); @@ -66,12 +66,10 @@ int main(int argc, char **argv) { else file.assign(file_c_str); - std::cout << "=== Test 8: GPU Inequality Jacobian Validation ===" - << std::endl; std::cout << "Network file: " << file << std::endl; /* ------------------------------------------------------------------ - * Step 1: Solve with IPOPT/PBPOL to get a realistic solution + * Solve with IPOPT/PBPOL to get a realistic solution * ------------------------------------------------------------------ */ OPFLOW opflow_ref; Vec Xsol; @@ -99,8 +97,11 @@ int main(int argc, char **argv) { ierr = VecGetArray(Xsol, &xsol_nat); CHKERRQ(ierr); + std::cout << "\n=== GPU inequality constraints Jacobian validation ===" + << std::endl; + /* ------------------------------------------------------------------ - * Step 2: Create the PBPOLRAJAHIOPSPARSE model and set up + * Create the PBPOLRAJAHIOPSPARSE model and set up * ------------------------------------------------------------------ */ OPFLOW opflow; ierr = OPFLOWCreate(PETSC_COMM_WORLD, &opflow); @@ -118,11 +119,12 @@ int main(int argc, char **argv) { ierr = OPFLOWGetSizes(opflow, &nx, &nconeq, &nconineq); CHKERRQ(ierr); - std::cout << "nx=" << nx << " nconeq=" << nconeq << " nconineq=" << nconineq - << " nnz_ineqjacsp=" << opflow->nnz_ineqjacsp << std::endl; + std::cout << " nx = " << nx << " , nconeq = " << nconeq + << " , nconineq = " << nconineq + << " , nnz_ineqjacsp=" << opflow->nnz_ineqjacsp << std::endl; if (!nconineq) { - std::cout << "No inequality constraints -- nothing to test. PASS." + std::cout << " No inequality constraints -- nothing to test. PASS." << std::endl; ierr = VecRestoreArray(Xsol, &xsol_nat); CHKERRQ(ierr); @@ -135,7 +137,7 @@ int main(int argc, char **argv) { } /* ------------------------------------------------------------------ - * Step 3: Copy IPOPT solution into opflow->X (natural ordering) + * Copy IPOPT solution into opflow->X (natural ordering) * ------------------------------------------------------------------ */ double *x_nat; ierr = VecGetArray(opflow->X, &x_nat); @@ -148,10 +150,11 @@ int main(int argc, char **argv) { ierr = VecRestoreArray(Xsol, &xsol_nat); CHKERRQ(ierr); - std::cout << "Evaluating Jacobian at IPOPT-converged solution." << std::endl; + std::cout << " Evaluating Jacobian at IPOPT-converged solution..." + << std::endl; /* ------------------------------------------------------------------ - * Step 4: Compute reference inequality Jacobian via PETSc. + * Compute reference inequality Jacobian via PETSc. * The first call establishes the sparsity pattern, the second * computes the actual values at the converged solution. * ------------------------------------------------------------------ */ @@ -191,17 +194,17 @@ int main(int argc, char **argv) { } int ref_count = (int)(vptr - ref_vals); - std::cout << "PETSc extracted " << ref_count << " ineq Jacobian values" + std::cout << " PETSc extracted " << ref_count << " ineq Jacobian values" << " (expected " << nnz << ")" << std::endl; if (ref_count != nnz) { - std::cout << "FAIL: NNZ mismatch! PETSc=" << ref_count + std::cout << " FAIL: NNZ mismatch! PETSc=" << ref_count << " analytical=" << nnz << std::endl; fail++; } /* ------------------------------------------------------------------ - * Step 5: Compute GPU inequality Jacobian at the same solution + * Compute GPU inequality Jacobian at the same solution * ------------------------------------------------------------------ */ double *x_host; ierr = VecGetArray(opflow->X, &x_host); @@ -238,7 +241,7 @@ int main(int argc, char **argv) { resmgr.copy(x_dev, x_sd); #endif - std::cout << "Running RAJA GPU inequality Jacobian kernel..." << std::endl; + std::cout << " Running RAJA GPU inequality Jacobian kernel..." << std::endl; ComputeIneqJacValuesGPU_PBPOLRAJAHIOPSPARSE(opflow, x_dev, gpu_vals_dev); gpu_vals = static_cast(h_allocator.allocate(nnz * sizeof(double))); @@ -249,29 +252,27 @@ int main(int argc, char **argv) { #endif /* ------------------------------------------------------------------ - * Step 6: Compare + * Compare for correctness * ------------------------------------------------------------------ */ - std::cout << "Comparing " << nnz << " inequality Jacobian values..." + std::cout << " Comparing " << nnz << " inequality Jacobian values..." << std::endl; int cmp_fail = compare_arrays(ref_vals, gpu_vals, nnz, "ineqjac"); fail += cmp_fail; if (cmp_fail == 0) - std::cout << "PASS: All " << nnz + std::cout << " PASS: All " << nnz << " inequality Jacobian values match within tol=" << TOL << std::endl; else - std::cout << "FAIL: " << cmp_fail << " of " << nnz << " values differ" + std::cout << " FAIL: " << cmp_fail << " of " << nnz << " values differ" << std::endl; + std::cout << "=== End validation ===" << std::endl; + /* ------------------------------------------------------------------ - * Step 7: Performance comparison (enabled with -benchmark flag) + * Performance comparison * ------------------------------------------------------------------ */ - PetscBool run_benchmark = PETSC_FALSE; - ierr = PetscOptionsGetBool(NULL, NULL, "-benchmark", NULL, &run_benchmark); - CHKERRQ(ierr); - - if (run_benchmark) { + { int niters = 1000; PetscInt bench_nrow, bench_ncol; ierr = MatGetSize(opflow->Jac_Gi, &bench_nrow, &bench_ncol); @@ -280,7 +281,7 @@ int main(int argc, char **argv) { double *bench_vals = static_cast(h_allocator.allocate(nnz * sizeof(double))); - std::cout << "\n=== Performance Benchmark (" << niters + std::cout << "\n=== Performance benchmark (" << niters << " iterations) ===" << std::endl; /* --- PETSc path: compute + MatGetRow extraction + copy to device --- */ @@ -380,7 +381,7 @@ int main(int argc, char **argv) { } h_allocator.deallocate(bench_vals); - std::cout << "=== End Benchmark ===" << std::endl; + std::cout << "=== End benchmark ===" << std::endl; } /* ------------------------------------------------------------------ diff --git a/tests/unit/pflow/CMakeLists.txt b/tests/unit/pflow/CMakeLists.txt new file mode 100644 index 00000000..238821f9 --- /dev/null +++ b/tests/unit/pflow/CMakeLists.txt @@ -0,0 +1,22 @@ +add_executable( + test_pflow test_pflow.cpp + ${CMAKE_SOURCE_DIR}/tests/unit/utils/test_acopf_utils.cpp +) +target_link_libraries(test_pflow ExaGO::PFLOW) +target_include_directories( + test_pflow PRIVATE ${CMAKE_SOURCE_DIR}/tests/unit/utils +) + +if(EXAGO_RUN_TESTS) + exago_add_test( + NAME + "UNIT_TESTS_PFLOW" + COMMAND + ${RUNCMD} + $ + -validation_dir + ${CMAKE_SOURCE_DIR}/datafiles/test_validation + NETFILES + ${network_files} + ) +endif() diff --git a/tests/unit/test_pflow.cpp b/tests/unit/pflow/test_pflow.cpp similarity index 99% rename from tests/unit/test_pflow.cpp rename to tests/unit/pflow/test_pflow.cpp index 3493d89c..db8ed0e9 100644 --- a/tests/unit/test_pflow.cpp +++ b/tests/unit/pflow/test_pflow.cpp @@ -7,7 +7,7 @@ #include #include -#include "pflow/pflow_tests.h" +#include "pflow_tests.h" #include "test_acopf_utils.h" //#if defined(EXAGO_ENABLE_RAJA) diff --git a/tests/unit/ps/CMakeLists.txt b/tests/unit/ps/CMakeLists.txt index 0f093d4b..b6071ac0 100644 --- a/tests/unit/ps/CMakeLists.txt +++ b/tests/unit/ps/CMakeLists.txt @@ -5,7 +5,7 @@ execute_process( ${CMAKE_CURRENT_BINARY_DIR}/data ) add_test( - NAME psse_parser + NAME UNIT_TESTS_PSSE_PARSER COMMAND test_psse_parser WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/data )