From d3e0ac09c219678995141d786c569d47a6624dc7 Mon Sep 17 00:00:00 2001 From: "Abhyankar, Shrirang G" Date: Sat, 8 May 2021 19:55:26 +0000 Subject: [PATCH 01/35] OPFLOW: initial implementation of RAJA/HiOp sparse GPU-based solver WIP - HIOP Sparse solver with GPU model OPFLOW: Started work on support for HIOP sparse solver interface for GPUs. Added a copy of hiop sparse solver interface. OPFLOW: Added model skeleton for GPU sparse version (copying from pbpolrajahiop) Fixed build Did some copy paste to add a test for HIOPSPARSE. This test is not actually functional yet. Started updating the hiopsparse model and solver code. More work on updating the solver and model Added scalar and vector unit tests for model to be used with HIOP sparse solver on GPU Apply cmake lint Fix unit tests. Set the size of array when using Umpire memset. Code formatting Some minor changes to get PBPOLRAJAHIOPSPARSE model code to compile Separate BUS/LINE/GEN/.../Param structs into reusable module Minor edit Rename files Fix typo Use BUS/LINE/GEN/.../Param structs in Raja HiOp Sparse model (compiles) Updating HIOP sparse solver GPU API Completed bounds kernels Completed scalar and vector functions WIP - HIOP Sparse solver with GPU model OPFLOW: Started work on support for HIOP sparse solver interface for GPUs. Added a copy of hiop sparse solver interface. OPFLOW: Added model skeleton for GPU sparse version (copying from pbpolrajahiop) Fixed build Did some copy paste to add a test for HIOPSPARSE. This test is not actually functional yet. Started updating the hiopsparse model and solver code. More work on updating the solver and model Added scalar and vector unit tests for model to be used with HIOP sparse solver on GPU Apply cmake lint Fix unit tests. Set the size of array when using Umpire memset. Code formatting Rename files Use BUS/LINE/GEN/.../Param structs in Raja HiOp Sparse model (compiles) Updating HIOP sparse solver GPU API Completed bounds kernels Jacobian and Hessian for sparse model (CPU --> GPU copy) Use correct array lengths in Eq. Jacobian Fix bug in Jacobian. Fix unused variable/parameter errors OPFLOW: rework solution callback for RAJA/HIOP GPU-based solver Formatting changes --- tpl/pybind11 | 2 +- tpl/spack | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tpl/pybind11 b/tpl/pybind11 index 6c772085..59aa9986 160000 --- a/tpl/pybind11 +++ b/tpl/pybind11 @@ -1 +1 @@ -Subproject commit 6c77208561f23c255bf73e3d76674a6a9496179f +Subproject commit 59aa99860c60bd171b9565e9920f125fdb749267 diff --git a/tpl/spack b/tpl/spack index 16bc58ea..b85c31f9 160000 --- a/tpl/spack +++ b/tpl/spack @@ -1 +1 @@ -Subproject commit 16bc58ea49256b061c7308565fe41c446e748881 +Subproject commit b85c31f946e4845d3d6efcc161bc50032f197174 From 80e8599cb3a33c62e060c1f53072273042edcf8b Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Mon, 18 Sep 2023 14:30:14 -0700 Subject: [PATCH 02/35] Spit out sparse Jacobian indexes for debugging --- .../pbpolrajahiopsparsekernels.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index adc10c8e..b3e8a1c6 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1,4 +1,7 @@ +#include +#include + #include #if defined(EXAGO_ENABLE_RAJA) @@ -531,6 +534,14 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( CHKERRQ(ierr); } + // Dump out the matrix indexes as a check + std::cout << "Nonzero indexes for Inequality Constraint Jacobian:" << std::endl; + for (int idx = 0; idx < opflow->nnz_ineqjacsp; ++idx) { + std::cout << std::setw(5) << idx << " " + << std::setw(5) << pbpolrajahiopsparse->i_jacineq[idx] << " " + << std::setw(5) << pbpolrajahiopsparse->j_jacineq[idx] << std::endl; + } + // Copy over i_jacineq and j_jacineq arrays to device resmgr.copy(iJacS_dev + opflow->nnz_eqjacsp, pbpolrajahiopsparse->i_jacineq); @@ -648,6 +659,13 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( CHKERRQ(ierr); } + std::cout << "Zero indexes for Equality Constraint Jacobian:" << std::endl; + for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { + std::cout << std::setw(5) << idx << " " + << std::setw(5) << pbpolrajahiopsparse->i_jaceq[idx] << " " + << std::setw(5) << pbpolrajahiopsparse->j_jaceq[idx] << std::endl; + } + // Copy over i_jaceq and j_jaceq arrays to device resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); From 867072493eb3caca5d67ff6f433a0067e0abd452 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 19 Sep 2023 07:35:54 -0700 Subject: [PATCH 03/35] Count equality Jacobian nonzeros and assign bus/gen/load locations --- .../model/power_bal_hiop/paramsrajahiop.cpp | 20 ++--- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 84 ++++++++++++++++++- 2 files changed, 90 insertions(+), 14 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index 27e2cfa0..244076cd 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -21,11 +21,11 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(bl); h_allocator_.deallocate(xidx); h_allocator_.deallocate(gidx); + h_allocator_.deallocate(jacsp_idx); + h_allocator_.deallocate(jacsq_idx); if (opflow->include_powerimbalance_variables) { h_allocator_.deallocate(xidxpimb); h_allocator_.deallocate(powerimbalance_penalty); - h_allocator_.deallocate(jacsp_idx); - h_allocator_.deallocate(jacsq_idx); } #ifdef EXAGO_ENABLE_GPU @@ -40,11 +40,11 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(bl_dev_); d_allocator_.deallocate(xidx_dev_); d_allocator_.deallocate(gidx_dev_); + d_allocator_.deallocate(jacsp_idx_dev_); + d_allocator_.deallocate(jacsq_idx_dev_); if (opflow->include_powerimbalance_variables) { d_allocator_.deallocate(xidxpimb_dev_); d_allocator_.deallocate(powerimbalance_penalty_dev_); - d_allocator_.deallocate(jacsp_idx_dev_); - d_allocator_.deallocate(jacsq_idx_dev_); } #endif @@ -74,10 +74,10 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(xidx_dev_, xidx); resmgr.copy(gidx_dev_, gidx); + resmgr.copy(jacsp_idx_dev_, jacsp_idx); + resmgr.copy(jacsq_idx_dev_, jacsq_idx); if (opflow->include_powerimbalance_variables) { resmgr.copy(xidxpimb_dev_, xidxpimb); - resmgr.copy(jacsp_idx_dev_, jacsp_idx); - resmgr.copy(jacsq_idx_dev_, jacsq_idx); resmgr.copy(powerimbalance_penalty_dev_, powerimbalance_penalty); } #else @@ -126,11 +126,11 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) { xidx = paramAlloc(h_allocator_, nbus); gidx = paramAlloc(h_allocator_, nbus); + jacsp_idx = paramAlloc(h_allocator_, nbus); + jacsq_idx = paramAlloc(h_allocator_, nbus); if (opflow->include_powerimbalance_variables) { xidxpimb = paramAlloc(h_allocator_, nbus); powerimbalance_penalty = paramAlloc(h_allocator_, nbus); - jacsp_idx = paramAlloc(h_allocator_, nbus); - jacsq_idx = paramAlloc(h_allocator_, nbus); } /* Memzero arrays */ @@ -194,11 +194,11 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) { xidx_dev_ = paramAlloc(d_allocator_, nbus); gidx_dev_ = paramAlloc(d_allocator_, nbus); + jacsp_idx_dev_ = paramAlloc(d_allocator_, nbus); + jacsq_idx_dev_ = paramAlloc(d_allocator_, nbus); if (opflow->include_powerimbalance_variables) { xidxpimb_dev_ = paramAlloc(d_allocator_, nbus); powerimbalance_penalty_dev_ = paramAlloc(d_allocator_, nbus); - jacsp_idx_dev_ = paramAlloc(d_allocator_, nbus); - jacsq_idx_dev_ = paramAlloc(d_allocator_, nbus); } #endif return 0; diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index fa609378..b79fe6fa 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -223,10 +223,86 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { /* Need to compute the number of nonzeros in equality, inequality constraint * Jacobians and Hessian */ - int nnz_eqjacsp = 0, nnz_ineqjacsp = 0, nnz_hesssp = 0; - opflow->nnz_eqjacsp = nnz_eqjacsp; - opflow->nnz_ineqjacsp = nnz_ineqjacsp; - opflow->nnz_hesssp = nnz_hesssp; + int nnz_eqjac = 0; + + // Find nonzero entries in equality constraint Jacobian by row. Using + // OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide. + + PS ps = (PS)opflow->ps; + + for (int ibus = 0; ibus < ps->nbus; ++ibus) { + + PSBUS bus = &(ps->bus[ibus]); + + // Nonzero entries used by each *bus* starts here + + // no matter what, each bus uses 2 rows and 2 columns + // row 1 = real, row2 = reactive + + busparams->jacsp_idx[ibus] = nnz_eqjac; + nnz_eqjac += 2; + busparams->jacsq_idx[ibus] = nnz_eqjac; + nnz_eqjac += 2; + + if (bus->ide == ISOLATED_BUS) { + continue; + } + + if (opflow->include_powerimbalance_variables) { + // 2 more entries on both real and reactive + nnz_eqjac += 4; + } + + for (int igen = 0; igen < bus->ngen; ++igen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, igen, &gen); + CHKERRQ(ierr); + if (!gen->status) + continue; + // each active generator uses 1 real and reactive entry on each bus + genparams->eqjacspbus_idx[igen] = nnz_eqjac; + nnz_eqjac += 2; + } + + if (opflow->include_loadloss_variables) { + // each load adds one real and reactive entry on each bus row + for (int iload = 0; iload < bus->nload; ++iload) { + loadparams->jacsp_idx[iload] = nnz_eqjac; + nnz_eqjac += 2; + } + } + + const PSLINE *connlines; + int nconnlines; + ierr = PSBUSGetSupportingLines(bus, &nconnlines, &connlines); + CHKERRQ(ierr); + + for (int iconn = 0; iconn < nconnlines; iconn++) { + // each *active* connected line uses 4 entries total in each bus row + PSLINE line = connlines[iconn]; + if (!line->status) + continue; + nnz_eqjac += 8; + } + + if (opflow->has_gensetpoint) { + for (int igen = 0; igen < bus->ngen; ++igen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, igen, &gen); + CHKERRQ(ierr); + + if (!gen->status || gen->isrenewable) + continue; + + // each generator uses 2 rows, 3 columns real, 1 column reactive + genparams->eqjacspbus_idx[igen] = nnz_eqjac; + nnz_eqjac += 4; + } + } + } + + printf("Equality Jacobian nonzero count: %d vs %d\n", + opflow->nnz_eqjacsp, nnz_eqjac); ierr = busparams->copy(opflow); ierr = genparams->copy(opflow); From 0287bd72eb5fd5dd1bfdf24cfc0b31d553b6790e Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 19 Sep 2023 08:30:57 -0700 Subject: [PATCH 04/35] Equality Jacobian nonzero count matches --- src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index b79fe6fa..49466527 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -282,7 +282,11 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { PSLINE line = connlines[iconn]; if (!line->status) continue; - nnz_eqjac += 8; + + // each line adds 4 entries for the to bus and 4 entries for the + // from bus. The current bus is one of these and those entries + // have already been counted. + nnz_eqjac += 4; } if (opflow->has_gensetpoint) { From 02d6619906696e569518dd7a8867f21eb84a635d Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 19 Sep 2023 09:13:57 -0700 Subject: [PATCH 05/35] Inequality Jacobian nonzero count matches --- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index 49466527..1119d0e6 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -223,7 +223,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { /* Need to compute the number of nonzeros in equality, inequality constraint * Jacobians and Hessian */ - int nnz_eqjac = 0; + int nnz_eqjac = 0, nnz_ineqjac = 0; // Find nonzero entries in equality constraint Jacobian by row. Using // OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide. @@ -305,8 +305,51 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } } - printf("Equality Jacobian nonzero count: %d vs %d\n", - opflow->nnz_eqjacsp, nnz_eqjac); + printf("Equality Jacobian nonzero count: %d\n", nnz_eqjac); + + if (opflow->has_gensetpoint) { + for (int ibus = 0; ibus < ps->nbus; ++ibus) { + PSBUS bus = &(ps->bus[ibus]); + for (int igen = 0; igen < bus->ngen; ++igen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, igen, &gen); + CHKERRQ(ierr); + if (!gen->status) + continue; + genparams->ineqjacspgen_idx[igen] = nnz_ineqjac; + nnz_ineqjac += 6; + } + } + } + + if (opflow->genbusvoltagetype == FIXED_WITHIN_QBOUNDS) { + for (int ibus = 0; ibus < ps->nbus; ++ibus) { + PSBUS bus = &(ps->bus[ibus]); + if (bus->ide == PV_BUS || bus->ide == REF_BUS) { + for (int igen = 0; igen < bus->ngen; ++igen) { + PSGEN gen; + ierr = PSBUSGetGen(bus, igen, &gen); + CHKERRQ(ierr); + if (!gen->status) + continue; + nnz_ineqjac += 2; + } + nnz_ineqjac += 2; + } + } + } + + if (!opflow->ignore_lineflow_constraints) { + for (int iline = 0; iline < opflow->nlinesmon; ++iline) { + // PSLINE line = &ps->line[opflow->linesmon[iline]]; + nnz_ineqjac += 8; + } + } + + printf("Inequality Jacobian nonzero count: %d\n", nnz_ineqjac); + + + // opflow->nnz_eqjacsp = nnz_eqjac; ierr = busparams->copy(opflow); ierr = genparams->copy(opflow); From 33902a53d2150defc09a7ede4420ba157445c7df Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 17 Oct 2023 08:12:24 -0700 Subject: [PATCH 06/35] Log event begin/end unbalanced in equality Jacobian for some reason --- src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index b3e8a1c6..4b7253b0 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -494,6 +494,8 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( /* Set locations only */ if (opflow->Nconineq) { + ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); + // Create arrays on host to store i,j, and val arrays umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); From cd0bd762d3d7fd29f16f7703889d480b3be8eb6a Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 24 Oct 2023 08:27:24 -0700 Subject: [PATCH 07/35] Checkpoint - Equality Jacobian assembly runs --- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 34 ++- .../pbpolrajahiopsparsekernels.cpp | 227 +++++++++++++++++- 2 files changed, 248 insertions(+), 13 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index 1119d0e6..ff97dd5e 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -230,7 +230,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { PS ps = (PS)opflow->ps; - for (int ibus = 0; ibus < ps->nbus; ++ibus) { + for (int ibus = 0, igen = 0, iline = 0, iload = 0; ibus < ps->nbus; ++ibus) { PSBUS bus = &(ps->bus[ibus]); @@ -253,20 +253,22 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { nnz_eqjac += 4; } - for (int igen = 0; igen < bus->ngen; ++igen) { + for (int bgen = 0; bgen < bus->ngen; ++bgen) { PSGEN gen; - ierr = PSBUSGetGen(bus, igen, &gen); + ierr = PSBUSGetGen(bus, bgen, &gen); CHKERRQ(ierr); if (!gen->status) continue; // each active generator uses 1 real and reactive entry on each bus - genparams->eqjacspbus_idx[igen] = nnz_eqjac; - nnz_eqjac += 2; + genparams->eqjacspbus_idx[igen] = nnz_eqjac++; + genparams->eqjacsqbus_idx[igen] = nnz_eqjac++; + igen++; } if (opflow->include_loadloss_variables) { // each load adds one real and reactive entry on each bus row - for (int iload = 0; iload < bus->nload; ++iload) { + // NOTE: iload is a system load counter + for (int bload = 0; bload < bus->nload; bload++, iload++) { loadparams->jacsp_idx[iload] = nnz_eqjac; nnz_eqjac += 2; } @@ -286,7 +288,22 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { // each line adds 4 entries for the to bus and 4 entries for the // from bus. The current bus is one of these and those entries // have already been counted. + + const PSBUS *connbuses; + ierr = PSLINEGetConnectedBuses(line, &connbuses); + CHKERRQ(ierr); + PSBUS busf = connbuses[0]; + PSBUS bust = connbuses[1]; + + if (bus == busf) { + lineparams->geqidxf[iline] = busparams->jacsp_idx[ibus]; + } else if (bus == bust) { + lineparams->geqidxt[iline] = busparams->jacsp_idx[ibus]; + } else { + PetscFunctionReturn(PETSC_ERR_SUP); + } nnz_eqjac += 4; + iline++; } if (opflow->has_gensetpoint) { @@ -305,7 +322,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } } - printf("Equality Jacobian nonzero count: %d\n", nnz_eqjac); + std::cout << "Equality Jacobian nonzero count: " << nnz_eqjac << std::endl; if (opflow->has_gensetpoint) { for (int ibus = 0; ibus < ps->nbus; ++ibus) { @@ -332,7 +349,6 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { CHKERRQ(ierr); if (!gen->status) continue; - nnz_ineqjac += 2; } nnz_ineqjac += 2; } @@ -346,7 +362,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } } - printf("Inequality Jacobian nonzero count: %d\n", nnz_ineqjac); + std::cout << "Inequality Jacobian nonzero count: " << nnz_ineqjac << std::endl; // opflow->nnz_eqjacsp = nnz_eqjac; diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 4b7253b0..6adfaf38 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -537,7 +537,9 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( } // Dump out the matrix indexes as a check - std::cout << "Nonzero indexes for Inequality Constraint Jacobian:" << std::endl; + std::cout << "Nonzero indexes for Inequality Constraint Jacobian: " + << opflow->nnz_ineqjacsp + << std::endl; for (int idx = 0; idx < opflow->nnz_ineqjacsp; ++idx) { std::cout << std::setw(5) << idx << " " << std::setw(5) << pbpolrajahiopsparse->i_jacineq[idx] << " " @@ -607,6 +609,11 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( double *MJacS_dev) { PbpolModelRajaHiop *pbpolrajahiopsparse = reinterpret_cast(opflow->model); + GENParamsRajaHiop *genparams = &pbpolrajahiopsparse->genparams; + LOADParamsRajaHiop *loadparams = &pbpolrajahiopsparse->loadparams; + BUSParamsRajaHiop *busparams = &pbpolrajahiopsparse->busparams; + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + PetscErrorCode ierr; PetscInt *iRowstart, *jColstart; PetscScalar *x, *values; @@ -620,15 +627,227 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscFunctionBegin; + /* Using OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide */ + if (MJacS_dev == NULL) { + /* Set locations only */ - roffset = 0; - coffset = 0; + /* Bus power imbalance contribution */ + int *b_xidxpimb = busparams->xidxpimb_dev_; + int *b_gidx = busparams->gidx_dev_; + int *b_jacsp_idx = busparams->jacsp_idx_dev_; + int *b_jacsq_idx = busparams->jacsq_idx_dev_; + + std::cout << "Begin with buses" << std::endl; + + /* Bus shunt injections */ + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; + iJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i] + 1; + + iJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; + iJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 2; + }); + + if (opflow->include_powerimbalance_variables) { + std::cout << "Bus power imbalance variables" << std::endl; + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i]] = b_xidxpimb[i]; + iJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i] + 1] = b_xidxpimb[i] + 1; + + iJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i]] = b_xidxpimb[i] + 2; + iJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i] + 1] = b_xidxpimb[i] + 3; + }); + } + + /* generation contributions */ + + std::cout << "Generators " << std::endl; + + int *g_gidxbus = genparams->gidxbus_dev_; + int *g_xidx = genparams->xidx_dev_; + int *eqjacspbus_idx = genparams->eqjacspbus_idx_dev_; + int *eqjacsqbus_idx = genparams->eqjacsqbus_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[eqjacspbus_idx[i]] = g_gidxbus[i]; + jJacS_dev[eqjacspbus_idx[i]] = g_xidx[i]; + + iJacS_dev[eqjacsqbus_idx[i]] = g_gidxbus[i] + 1; + jJacS_dev[eqjacsqbus_idx[i]] = g_xidx[i] + 1; + }); + + /* Loadloss contributions */ + + if (opflow->include_loadloss_variables) { + + std::cout << "Load Loss" << std::endl; + int *l_gidx = loadparams->gidx_dev_; + int *l_xidx = loadparams->xidx_dev_; + int *l_jacsp_idx = loadparams->jacsp_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + iJacS_dev[l_jacsp_idx[i]] = l_gidx[i]; + jJacS_dev[l_jacsp_idx[i]] = l_xidx[i]; + iJacS_dev[l_jacsp_idx[i] + 1] = l_gidx[i] + 1; + jJacS_dev[l_jacsp_idx[i] + 1] = l_xidx[i] + 1; + }); + } + + /* Connected lines */ + + // std::cout << "Connected Lines" << std::endl; + + // int *xidxf = lineparams->xidxf_dev_; + // int *xidxt = lineparams->xidxt_dev_; + // int *geqidxf = lineparams->geqidxf_dev_; + // int *geqidxt = lineparams->geqidxt_dev_; + // RAJA::forall( + // RAJA::RangeSegment(0, lineparams->nlineON), + // RAJA_LAMBDA(RAJA::Index_type i) { + + // int offset(0); + + // // from bus indexes + // // indexes already computed + // // iJacS_dev[geqidxf[i + offset]] = xidxf[i]; + // // jJacS_dev[geqidxf[i + offset]] = xidxf[i]; + // // offset++; + + // // iJacS_dev[geqidxf[i] + offset] = xidxf[i]; + // // jJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; + // // offset++; + + // // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; + // // jJacS_dev[geqidxf[i] + offset] = xidxf[i]; + // // offset++; + + // // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; + // // jJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; + // // offset++; + + // iJacS_dev[geqidxf[i] + offset] = xidxf[i]; + // jJacS_dev[geqidxf[i] + offset] = xidxt[i]; + // offset++; + + // iJacS_dev[geqidxf[i] + offset] = xidxf[i]; + // jJacS_dev[geqidxf[i] + offset] = xidxt[i] + 1; + // offset++; + + // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; + // jJacS_dev[geqidxf[i] + offset] = xidxt[i]; + // offset++; + + // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; + // jJacS_dev[geqidxf[i] + offset] = xidxt[i] + 1; + // offset++; + + // offset = 0; + + // // to bus indexes + // // indexes already computed + // // iJacS_dev[geqidxt[i + offset]] = xidxt[i]; + // // jJacS_dev[geqidxt[i + offset]] = xidxt[i]; + // // offset++; + + // // iJacS_dev[geqidxt[i] + offset] = xidxt[i]; + // // jJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; + // // offset++; + + // // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; + // // jJacS_dev[geqidxt[i] + offset] = xidxt[i]; + // // offset++; + + // // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; + // // jJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; + // // offset++; + + // iJacS_dev[geqidxt[i] + offset] = xidxt[i]; + // jJacS_dev[geqidxt[i] + offset] = xidxf[i]; + // offset++; + + // iJacS_dev[geqidxt[i] + offset] = xidxt[i]; + // jJacS_dev[geqidxt[i] + offset] = xidxf[i] + 1; + // offset++; + + // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; + // jJacS_dev[geqidxf[i] + offset] = xidxf[i]; + // offset++; + + // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; + // jJacS_dev[geqidxt[i] + offset] = xidxf[i] + 1; + // offset++; + + // }); + + + + if (opflow->has_gensetpoint) { + + std::cout << "Generator set point" << std::endl; + int *eqjacspgen_idx = genparams->eqjacspgen_idx_dev_; + int *g_geqidxgen = genparams->geqidxgen_dev_; + int *g_xidx = genparams->xidx_dev_; + int *g_isrenewable = genparams->isrenewable_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + if (!g_isrenewable[i]) { + iJacS_dev[eqjacspgen_idx[i]] = g_geqidxgen[i]; + jJacS_dev[eqjacspgen_idx[i]] = g_xidx[i]; + + iJacS_dev[eqjacspgen_idx[i] + 1] = g_geqidxgen[i]; + jJacS_dev[eqjacspgen_idx[i] + 1] = g_xidx[i] + 2; + + iJacS_dev[eqjacspgen_idx[i] + 2] = g_geqidxgen[i]; + jJacS_dev[eqjacspgen_idx[i] + 2] = g_xidx[i] + 3; + + iJacS_dev[eqjacspgen_idx[i] + 3] = g_geqidxgen[i] + 1; + jJacS_dev[eqjacspgen_idx[i] + 3] = g_xidx[i] + 3; + } + }); + } + + // Create arrays on host to store i,j, and val arrays umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + // int *itemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + // int *jtemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + + // resmgr.copy(pbpolrajahiopsparse->i_jaceq, iJacS_dev, + // opflow->nnz_eqjacsp); + // resmgr.copy(pbpolrajahiopsparse->j_jaceq, jJacS_dev, + // opflow->nnz_eqjacsp); + + // std::cout << "Non-zero indexes for Equality Constraint Jacobian (GPU):" << std::endl; + // for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { + // std::cout << std::setw(5) << idx << " " + // << std::setw(5) << itemp[idx] << " " + // << std::setw(5) << jtemp[idx] << std::endl; + // } + + roffset = 0; + coffset = 0; + pbpolrajahiopsparse->i_jaceq = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); pbpolrajahiopsparse->j_jaceq = @@ -661,7 +880,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( CHKERRQ(ierr); } - std::cout << "Zero indexes for Equality Constraint Jacobian:" << std::endl; + std::cout << "Non-zero indexes for Equality Constraint Jacobian:" << std::endl; for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { std::cout << std::setw(5) << idx << " " << std::setw(5) << pbpolrajahiopsparse->i_jaceq[idx] << " " From 8acb1d9571fa1a419b062a7cca5d2a6599d9fde0 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Thu, 26 Oct 2023 09:06:25 -0700 Subject: [PATCH 08/35] Checkpoint: equality Jacobian indexes correct (w/o lines) --- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 32 +++---- .../pbpolrajahiopsparsekernels.cpp | 84 ++++++++++--------- 2 files changed, 60 insertions(+), 56 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index ff97dd5e..a8ce8503 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -230,7 +230,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { PS ps = (PS)opflow->ps; - for (int ibus = 0, igen = 0, iline = 0, iload = 0; ibus < ps->nbus; ++ibus) { + for (int ibus = 0, igen1 = 0, igen2 = 0, iline = 0, iload = 0; ibus < ps->nbus; ++ibus) { PSBUS bus = &(ps->bus[ibus]); @@ -260,9 +260,9 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { if (!gen->status) continue; // each active generator uses 1 real and reactive entry on each bus - genparams->eqjacspbus_idx[igen] = nnz_eqjac++; - genparams->eqjacsqbus_idx[igen] = nnz_eqjac++; - igen++; + genparams->eqjacspbus_idx[igen1] = nnz_eqjac++; + genparams->eqjacsqbus_idx[igen1] = nnz_eqjac++; + igen1++; } if (opflow->include_loadloss_variables) { @@ -271,9 +271,11 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { for (int bload = 0; bload < bus->nload; bload++, iload++) { loadparams->jacsp_idx[iload] = nnz_eqjac; nnz_eqjac += 2; + iload++; } } + if (0) { const PSLINE *connlines; int nconnlines; ierr = PSBUSGetSupportingLines(bus, &nconnlines, &connlines); @@ -292,32 +294,30 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { const PSBUS *connbuses; ierr = PSLINEGetConnectedBuses(line, &connbuses); CHKERRQ(ierr); - PSBUS busf = connbuses[0]; PSBUS bust = connbuses[1]; - if (bus == busf) { - lineparams->geqidxf[iline] = busparams->jacsp_idx[ibus]; - } else if (bus == bust) { - lineparams->geqidxt[iline] = busparams->jacsp_idx[ibus]; - } else { - PetscFunctionReturn(PETSC_ERR_SUP); - } - nnz_eqjac += 4; + // only count nonzeros when this is the to bus; add nonzeros for both to and from + if (bus == bust) { + lineparams->jac_idx[iline] = nnz_eqjac; + nnz_eqjac += 8; + } iline++; } + } if (opflow->has_gensetpoint) { - for (int igen = 0; igen < bus->ngen; ++igen) { + for (int bgen = 0; bgen < bus->ngen; ++bgen) { PSGEN gen; - ierr = PSBUSGetGen(bus, igen, &gen); + ierr = PSBUSGetGen(bus, bgen, &gen); CHKERRQ(ierr); if (!gen->status || gen->isrenewable) continue; // each generator uses 2 rows, 3 columns real, 1 column reactive - genparams->eqjacspbus_idx[igen] = nnz_eqjac; + genparams->eqjacspbus_idx[igen2] = nnz_eqjac; nnz_eqjac += 4; + igen2++; } } } diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 6adfaf38..181c17c7 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -633,27 +633,30 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( /* Set locations only */ + resmgr.memset(iJacS_dev, 0, opflow->nnz_eqjacsp*sizeof(int)); + resmgr.memset(jJacS_dev, 0, opflow->nnz_eqjacsp*sizeof(int)); + /* Bus power imbalance contribution */ int *b_xidxpimb = busparams->xidxpimb_dev_; int *b_gidx = busparams->gidx_dev_; + int *b_xidx = busparams->xidx_dev_; int *b_jacsp_idx = busparams->jacsp_idx_dev_; int *b_jacsq_idx = busparams->jacsq_idx_dev_; + /* Bus */ std::cout << "Begin with buses" << std::endl; - - /* Bus shunt injections */ RAJA::forall( RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { iJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; - jJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; + jJacS_dev[b_jacsp_idx[i]] = b_xidx[i]; iJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i]; - jJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i] + 1; + jJacS_dev[b_jacsp_idx[i] + 1] = b_xidx[i] + 1; iJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; - jJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; + jJacS_dev[b_jacsq_idx[i]] = b_xidx[i]; iJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 1; - jJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 2; + jJacS_dev[b_jacsq_idx[i] + 1] = b_xidx[i] + 1; }); if (opflow->include_powerimbalance_variables) { @@ -716,16 +719,18 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // int *xidxf = lineparams->xidxf_dev_; // int *xidxt = lineparams->xidxt_dev_; - // int *geqidxf = lineparams->geqidxf_dev_; - // int *geqidxt = lineparams->geqidxt_dev_; + // int *jac_idx = lineparams->jac_idx_dev_; // RAJA::forall( // RAJA::RangeSegment(0, lineparams->nlineON), // RAJA_LAMBDA(RAJA::Index_type i) { - // int offset(0); + // int offset; // // from bus indexes + + // offset = 0; + // // indexes already computed // // iJacS_dev[geqidxf[i + offset]] = xidxf[i]; // // jJacS_dev[geqidxf[i + offset]] = xidxf[i]; @@ -743,26 +748,27 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // // jJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; // // offset++; - // iJacS_dev[geqidxf[i] + offset] = xidxf[i]; - // jJacS_dev[geqidxf[i] + offset] = xidxt[i]; + // iJacS_dev[jac_idx[i] + offset] = xidxf[i]; + // jJacS_dev[jac_idx[i] + offset] = xidxt[i]; // offset++; - // iJacS_dev[geqidxf[i] + offset] = xidxf[i]; - // jJacS_dev[geqidxf[i] + offset] = xidxt[i] + 1; + // iJacS_dev[jac_idx[i] + offset] = xidxf[i]; + // jJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; // offset++; - // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; - // jJacS_dev[geqidxf[i] + offset] = xidxt[i]; + // iJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; + // jJacS_dev[jac_idx[i] + offset] = xidxt[i]; // offset++; - // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; - // jJacS_dev[geqidxf[i] + offset] = xidxt[i] + 1; + // iJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; + // jJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; // offset++; - // offset = 0; - // // to bus indexes - // // indexes already computed + + // offset = 0; + + // // indexes already computed for bus // // iJacS_dev[geqidxt[i + offset]] = xidxt[i]; // // jJacS_dev[geqidxt[i + offset]] = xidxt[i]; // // offset++; @@ -779,20 +785,20 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // // jJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; // // offset++; - // iJacS_dev[geqidxt[i] + offset] = xidxt[i]; - // jJacS_dev[geqidxt[i] + offset] = xidxf[i]; + // iJacS_dev[jac_idx[i] + offset] = xidxt[i]; + // jJacS_dev[jac_idx[i] + offset] = xidxf[i]; // offset++; - // iJacS_dev[geqidxt[i] + offset] = xidxt[i]; - // jJacS_dev[geqidxt[i] + offset] = xidxf[i] + 1; + // iJacS_dev[jac_idx[i] + offset] = xidxt[i]; + // jJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; // offset++; - // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; - // jJacS_dev[geqidxf[i] + offset] = xidxf[i]; + // iJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; + // jJacS_dev[jac_idx[i] + offset] = xidxf[i]; // offset++; - // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; - // jJacS_dev[geqidxt[i] + offset] = xidxf[i] + 1; + // iJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; + // jJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; // offset++; // }); @@ -830,20 +836,18 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // Create arrays on host to store i,j, and val arrays umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - // int *itemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - // int *jtemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + int *itemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + int *jtemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - // resmgr.copy(pbpolrajahiopsparse->i_jaceq, iJacS_dev, - // opflow->nnz_eqjacsp); - // resmgr.copy(pbpolrajahiopsparse->j_jaceq, jJacS_dev, - // opflow->nnz_eqjacsp); + resmgr.copy(itemp, iJacS_dev, opflow->nnz_eqjacsp*sizeof(int)); + resmgr.copy(jtemp, jJacS_dev, opflow->nnz_eqjacsp*sizeof(int)); - // std::cout << "Non-zero indexes for Equality Constraint Jacobian (GPU):" << std::endl; - // for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { - // std::cout << std::setw(5) << idx << " " - // << std::setw(5) << itemp[idx] << " " - // << std::setw(5) << jtemp[idx] << std::endl; - // } + std::cout << "Non-zero indexes for Equality Constraint Jacobian (GPU):" << std::endl; + for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { + std::cout << std::setw(5) << idx << " " + << std::setw(5) << itemp[idx] << " " + << std::setw(5) << jtemp[idx] << std::endl; + } roffset = 0; coffset = 0; From 3bf98afe2f55483557c56c0d0071c3cd88c9140b Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Thu, 26 Oct 2023 12:31:19 -0700 Subject: [PATCH 09/35] Add line to/from locations for sparse equality Jacobian --- .../model/power_bal_hiop/paramsrajahiop.cpp | 16 ++++++++++++++++ src/opflow/model/power_bal_hiop/paramsrajahiop.h | 6 ++++++ 2 files changed, 22 insertions(+) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index 244076cd..87280dc3 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -226,6 +226,8 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(geqidxf_dev_, geqidxf); resmgr.copy(geqidxt_dev_, geqidxt); + resmgr.copy(jacf_idx_dev_, jacf_idx); + resmgr.copy(jact_idx_dev_, jact_idx); if (opflow->nlinesmon) { resmgr.copy(gineqidx_dev_, gineqidx); @@ -246,6 +248,8 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { xidxt_dev_ = xidxt; geqidxf_dev_ = geqidxf; geqidxt_dev_ = geqidxt; + jacf_idx_dev_ = jacf_idx; + jact_idx_dev_ = jact_idx; if (opflow->nlinesmon) { gineqidx_dev_ = gineqidx; gbineqidx_dev_ = gbineqidx; @@ -272,6 +276,8 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(geqidxf); h_allocator_.deallocate(geqidxt); + h_allocator_.deallocate(jacf_idx); + h_allocator_.deallocate(jact_idx); if (opflow->nlinesmon) { h_allocator_.deallocate(gineqidx); @@ -296,6 +302,8 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(geqidxf_dev_); d_allocator_.deallocate(geqidxt_dev_); + d_allocator_.deallocate(jacf_idx_dev_); + d_allocator_.deallocate(jact_idx_dev_); if (opflow->nlinesmon) { d_allocator_.deallocate(gineqidx_dev_); @@ -344,6 +352,9 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { geqidxf = paramAlloc(h_allocator_, nlineON); geqidxt = paramAlloc(h_allocator_, nlineON); + jacf_idx = paramAlloc(h_allocator_, nlineON); + jact_idx = paramAlloc(h_allocator_, nlineON); + if (opflow->nlinesmon) { linelimidx = paramAlloc(h_allocator_, nlinelim); gineqidx = paramAlloc(h_allocator_, nlinelim); @@ -386,6 +397,8 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { */ geqidxf[linei] = busf->starteqloc; geqidxt[linei] = bust->starteqloc; + jacf_idx[linei] = 0; + jact_idx[linei] = 0; if (j < opflow->nlinesmon && opflow->linesmon[j] == i) { gbineqidx[j] = opflow->nconeq + line->startineqloc; @@ -416,6 +429,9 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { geqidxf_dev_ = paramAlloc(d_allocator_, nlineON); geqidxt_dev_ = paramAlloc(d_allocator_, nlineON); + jacf_idx_dev_ = paramAlloc(d_allocator_, nlineON); + jact_idx_dev_ = paramAlloc(d_allocator_, nlineON); + if (opflow->nconineq) { gineqidx_dev_ = paramAlloc(d_allocator_, nlinelim); gbineqidx_dev_ = paramAlloc(d_allocator_, nlinelim); diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index 91f1fb9f..5f5a92a4 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -196,6 +196,9 @@ struct LINEParamsRajaHiop { constraint bound */ int *linelimidx; /* Indices for subset of lines that have finite limits */ + int *jacf_idx; /* Location number in the sparse Jacobian (from) */ + int *jact_idx; /* Location number in the sparse Jacobian (to) */ + // Device data double *Gff_dev_; /* From side self conductance */ double *Bff_dev_; /* From side self susceptance */ @@ -219,6 +222,9 @@ struct LINEParamsRajaHiop { int * linelimidx_dev_; /* Indices for subset of lines that have finite limits */ + int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */ + int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */ + int allocate(OPFLOW); int destroy(OPFLOW); int copy(OPFLOW); From 335ddd1bcc024e511220414a97c5f580a027c73e Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Thu, 26 Oct 2023 12:32:23 -0700 Subject: [PATCH 10/35] GPU computed sparse equality Jacobian indexes correct --- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 47 +++----- .../pbpolrajahiopsparsekernels.cpp | 111 +++++++----------- 2 files changed, 57 insertions(+), 101 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index a8ce8503..79f98606 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -230,7 +230,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { PS ps = (PS)opflow->ps; - for (int ibus = 0, igen1 = 0, igen2 = 0, iline = 0, iload = 0; ibus < ps->nbus; ++ibus) { + for (int ibus = 0, igen1 = 0, igen2 = 0, iload = 0; ibus < ps->nbus; ++ibus) { PSBUS bus = &(ps->bus[ibus]); @@ -275,35 +275,6 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } } - if (0) { - const PSLINE *connlines; - int nconnlines; - ierr = PSBUSGetSupportingLines(bus, &nconnlines, &connlines); - CHKERRQ(ierr); - - for (int iconn = 0; iconn < nconnlines; iconn++) { - // each *active* connected line uses 4 entries total in each bus row - PSLINE line = connlines[iconn]; - if (!line->status) - continue; - - // each line adds 4 entries for the to bus and 4 entries for the - // from bus. The current bus is one of these and those entries - // have already been counted. - - const PSBUS *connbuses; - ierr = PSLINEGetConnectedBuses(line, &connbuses); - CHKERRQ(ierr); - PSBUS bust = connbuses[1]; - - // only count nonzeros when this is the to bus; add nonzeros for both to and from - if (bus == bust) { - lineparams->jac_idx[iline] = nnz_eqjac; - nnz_eqjac += 8; - } - iline++; - } - } if (opflow->has_gensetpoint) { for (int bgen = 0; bgen < bus->ngen; ++bgen) { @@ -322,6 +293,22 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } } + // Go through the lines + + for (int iline = 0; iline <= ps->nline; ++iline) { + PSLINE line = &(ps->line[iline]); + + if (!line->status) + continue; + + // each line adds 4 (off-diagonal) entries for the to bus and 4 + // entries for the from bus. + lineparams->jacf_idx[iline] = nnz_eqjac; + nnz_eqjac += 4; + lineparams->jact_idx[iline] = nnz_eqjac; + nnz_eqjac += 4; + } + std::cout << "Equality Jacobian nonzero count: " << nnz_eqjac << std::endl; if (opflow->has_gensetpoint) { diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 181c17c7..f7a5e30e 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -715,93 +715,62 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( /* Connected lines */ - // std::cout << "Connected Lines" << std::endl; + std::cout << "Connected Lines" << std::endl; - // int *xidxf = lineparams->xidxf_dev_; - // int *xidxt = lineparams->xidxt_dev_; - // int *jac_idx = lineparams->jac_idx_dev_; + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *geqidxf = lineparams->geqidxf_dev_; + int *geqidxt = lineparams->geqidxt_dev_; + int *jacf_idx = lineparams->jacf_idx_dev_; + int *jact_idx = lineparams->jact_idx_dev_; - // RAJA::forall( - // RAJA::RangeSegment(0, lineparams->nlineON), - // RAJA_LAMBDA(RAJA::Index_type i) { + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlineON), + RAJA_LAMBDA(RAJA::Index_type i) { - // int offset; + int offset; - // // from bus indexes + // from bus indexes - // offset = 0; + offset = 0; - // // indexes already computed - // // iJacS_dev[geqidxf[i + offset]] = xidxf[i]; - // // jJacS_dev[geqidxf[i + offset]] = xidxf[i]; - // // offset++; - - // // iJacS_dev[geqidxf[i] + offset] = xidxf[i]; - // // jJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; - // // offset++; - - // // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; - // // jJacS_dev[geqidxf[i] + offset] = xidxf[i]; - // // offset++; - - // // iJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; - // // jJacS_dev[geqidxf[i] + offset] = xidxf[i] + 1; - // // offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; + offset++; - // iJacS_dev[jac_idx[i] + offset] = xidxf[i]; - // jJacS_dev[jac_idx[i] + offset] = xidxt[i]; - // offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; + offset++; - // iJacS_dev[jac_idx[i] + offset] = xidxf[i]; - // jJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; - // offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; + offset++; - // iJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; - // jJacS_dev[jac_idx[i] + offset] = xidxt[i]; - // offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; + offset++; - // iJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; - // jJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; - // offset++; + // to bus indexes - // // to bus indexes + offset = 0; - // offset = 0; - - // // indexes already computed for bus - // // iJacS_dev[geqidxt[i + offset]] = xidxt[i]; - // // jJacS_dev[geqidxt[i + offset]] = xidxt[i]; - // // offset++; - - // // iJacS_dev[geqidxt[i] + offset] = xidxt[i]; - // // jJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; - // // offset++; - - // // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; - // // jJacS_dev[geqidxt[i] + offset] = xidxt[i]; - // // offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; + jJacS_dev[jact_idx[i] + offset] = xidxf[i]; + offset++; - // // iJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; - // // jJacS_dev[geqidxt[i] + offset] = xidxt[i] + 1; - // // offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; + jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; + offset++; - // iJacS_dev[jac_idx[i] + offset] = xidxt[i]; - // jJacS_dev[jac_idx[i] + offset] = xidxf[i]; - // offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; + jJacS_dev[jact_idx[i] + offset] = xidxf[i]; + offset++; - // iJacS_dev[jac_idx[i] + offset] = xidxt[i]; - // jJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; - // offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; + jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; + offset++; - // iJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; - // jJacS_dev[jac_idx[i] + offset] = xidxf[i]; - // offset++; - - // iJacS_dev[jac_idx[i] + offset] = xidxt[i] + 1; - // jJacS_dev[jac_idx[i] + offset] = xidxf[i] + 1; - // offset++; - - // }); + }); From 94ddd33370433ad380311e1f270e04da7489dc86 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Fri, 27 Oct 2023 07:29:22 -0700 Subject: [PATCH 11/35] Add some additional line information for sparse Jacobian assembly --- src/opflow/model/power_bal_hiop/paramsrajahiop.cpp | 14 ++++++++++++++ src/opflow/model/power_bal_hiop/paramsrajahiop.h | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index 87280dc3..597eb3b1 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -226,6 +226,8 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(geqidxf_dev_, geqidxf); resmgr.copy(geqidxt_dev_, geqidxt); + resmgr.copy(busf_idx_dev_, busf_idx); + resmgr.copy(bust_idx_dev_, bust_idx); resmgr.copy(jacf_idx_dev_, jacf_idx); resmgr.copy(jact_idx_dev_, jact_idx); @@ -248,6 +250,8 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { xidxt_dev_ = xidxt; geqidxf_dev_ = geqidxf; geqidxt_dev_ = geqidxt; + busf_idx_dev_ = busf_idx; + bust_idx_dev_ = bust_idx; jacf_idx_dev_ = jacf_idx; jact_idx_dev_ = jact_idx; if (opflow->nlinesmon) { @@ -276,6 +280,8 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(geqidxf); h_allocator_.deallocate(geqidxt); + h_allocator_.deallocate(busf_idx); + h_allocator_.deallocate(bust_idx); h_allocator_.deallocate(jacf_idx); h_allocator_.deallocate(jact_idx); @@ -302,6 +308,8 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(geqidxf_dev_); d_allocator_.deallocate(geqidxt_dev_); + d_allocator_.deallocate(busf_idx_dev_); + d_allocator_.deallocate(bust_idx_dev_); d_allocator_.deallocate(jacf_idx_dev_); d_allocator_.deallocate(jact_idx_dev_); @@ -352,6 +360,8 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { geqidxf = paramAlloc(h_allocator_, nlineON); geqidxt = paramAlloc(h_allocator_, nlineON); + busf_idx = paramAlloc(h_allocator_, nlineON); + bust_idx = paramAlloc(h_allocator_, nlineON); jacf_idx = paramAlloc(h_allocator_, nlineON); jact_idx = paramAlloc(h_allocator_, nlineON); @@ -397,6 +407,8 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { */ geqidxf[linei] = busf->starteqloc; geqidxt[linei] = bust->starteqloc; + busf_idx[linei] = line->fbus; + bust_idx[linei] = line->tbus; jacf_idx[linei] = 0; jact_idx[linei] = 0; @@ -429,6 +441,8 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { geqidxf_dev_ = paramAlloc(d_allocator_, nlineON); geqidxt_dev_ = paramAlloc(d_allocator_, nlineON); + busf_idx_dev_ = paramAlloc(d_allocator_, nlineON); + bust_idx_dev_ = paramAlloc(d_allocator_, nlineON); jacf_idx_dev_ = paramAlloc(d_allocator_, nlineON); jact_idx_dev_ = paramAlloc(d_allocator_, nlineON); diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index 5f5a92a4..73948f7b 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -196,6 +196,8 @@ struct LINEParamsRajaHiop { constraint bound */ int *linelimidx; /* Indices for subset of lines that have finite limits */ + int *busf_idx; /* From bus index */ + int *bust_idx; /* To bus index */ int *jacf_idx; /* Location number in the sparse Jacobian (from) */ int *jact_idx; /* Location number in the sparse Jacobian (to) */ @@ -222,6 +224,8 @@ struct LINEParamsRajaHiop { int * linelimidx_dev_; /* Indices for subset of lines that have finite limits */ + int *busf_idx_dev_; /* From bus index */ + int *bust_idx_dev_; /* To bus index */ int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */ int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */ From 46efbde1c1b3cb6043f2c3158364a3bc254d763a Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Fri, 27 Oct 2023 07:29:58 -0700 Subject: [PATCH 12/35] Sparse GPU equality Jacobian assembly is coded and running (results untested) --- .../pbpolrajahiopsparsekernels.cpp | 195 ++++++++++++++++-- 1 file changed, 180 insertions(+), 15 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index f7a5e30e..a415d56a 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -627,6 +627,9 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscFunctionBegin; + ierr = PetscLogEventBegin(opflow->eqconsjaclogger, 0, 0, 0, 0); + CHKERRQ(ierr); + /* Using OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide */ if (MJacS_dev == NULL) { @@ -730,8 +733,6 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( int offset; - // from bus indexes - offset = 0; iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; @@ -810,14 +811,8 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( resmgr.copy(itemp, iJacS_dev, opflow->nnz_eqjacsp*sizeof(int)); resmgr.copy(jtemp, jJacS_dev, opflow->nnz_eqjacsp*sizeof(int)); - - std::cout << "Non-zero indexes for Equality Constraint Jacobian (GPU):" << std::endl; - for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { - std::cout << std::setw(5) << idx << " " - << std::setw(5) << itemp[idx] << " " - << std::setw(5) << jtemp[idx] << std::endl; - } + if (1) { roffset = 0; coffset = 0; @@ -852,21 +847,190 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); CHKERRQ(ierr); } - + if (0) std::cout << "Non-zero indexes for Equality Constraint Jacobian:" << std::endl; for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { std::cout << std::setw(5) << idx << " " << std::setw(5) << pbpolrajahiopsparse->i_jaceq[idx] << " " << std::setw(5) << pbpolrajahiopsparse->j_jaceq[idx] << std::endl; } - + } // Copy over i_jaceq and j_jaceq arrays to device resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); + } + } else { - ierr = PetscLogEventBegin(opflow->eqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); + // Bus Contribution + int *b_jacsp_idx = busparams->jacsp_idx_dev_; + int *b_jacsq_idx = busparams->jacsq_idx_dev_; + int *isisolated = busparams->isisolated_dev_; + int *ispvpq = busparams->ispvpq_dev_; + double *gl = busparams->gl_dev_; + double *bl = busparams->bl_dev_; + int *b_xidx = busparams->xidx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + double Vm = x_dev[b_xidx[i] + 1]; + MJacS_dev[b_jacsp_idx[i]] = isisolated[i] * 1.0 + ispvpq[i] * 0.0; + MJacS_dev[b_jacsp_idx[i]+1] = isisolated[i] * 0.0 + ispvpq[i] * 2 * Vm * gl[i]; + MJacS_dev[b_jacsq_idx[i]] = 0.0; + MJacS_dev[b_jacsq_idx[i]+1] = isisolated[i] * 1.0 + ispvpq[i] * -2 * Vm * bl[i]; + }); + + + // Power imbalance + if (opflow->include_powerimbalance_variables) { + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { + MJacS_dev[b_jacsp_idx[i]] = 1.0; + MJacS_dev[b_jacsp_idx[i] + 1] = -1.0; + MJacS_dev[b_jacsq_idx[i]] = 1.0; + MJacS_dev[b_jacsq_idx[i] + 1] = -1.0; + }); + } + + /* Generator contributions */ + int *eqjacspbus_idx = genparams->eqjacspbus_idx_dev_; + int *eqjacsqbus_idx = genparams->eqjacsqbus_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + MJacS_dev[eqjacspbus_idx[i]] = -1.0; + MJacS_dev[eqjacsqbus_idx[i]] = -1.0; + }); + + if (opflow->has_gensetpoint) { + int *eqjacspgen_idx = genparams->eqjacspgen_idx_dev_; + int *g_isrenewable = genparams->isrenewable_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + if (!g_isrenewable[i]) { + MJacS_dev[eqjacspgen_idx[i]] = -1.0; + MJacS_dev[eqjacspgen_idx[i] + 1] = 1.0; + MJacS_dev[eqjacspgen_idx[i] + 2] = 1.0; + MJacS_dev[eqjacspgen_idx[i] + 3] = 1.0; + } + }); + } + + /* Loadloss contributions - 2 contributions expected */ + if (opflow->include_loadloss_variables) { + int *l_jacsp_idx = loadparams->jacsp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + MJacS_dev[l_jacsp_idx[i]] = -1; + MJacS_dev[l_jacsp_idx[i] + 1] = -1; + }); + } + + // Line contributions + + double *Gff = lineparams->Gff_dev_; + double *Gtt = lineparams->Gtt_dev_; + double *Gft = lineparams->Gft_dev_; + double *Gtf = lineparams->Gtf_dev_; + + double *Bff = lineparams->Bff_dev_; + double *Btt = lineparams->Btt_dev_; + double *Bft = lineparams->Bft_dev_; + double *Btf = lineparams->Btf_dev_; + + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *busf_idx = lineparams->busf_idx_dev_; + int *bust_idx = lineparams->bust_idx_dev_; + int *jacf_idx = lineparams->jacf_idx_dev_; + int *jact_idx = lineparams->jact_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlineON), + RAJA_LAMBDA(RAJA::Index_type i) { + double thetaf = x_dev[xidxf[i]], Vmf = x_dev[xidxf[i] + 1]; + double thetat = x_dev[xidxt[i]], Vmt = x_dev[xidxt[i] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + int ifrom(busf_idx[i]), ito(bust_idx[i]); + + // This confusing and could probably be done in a clearer + // way. Two indexing schemes are needed. Some line + // contributions (from/from, to/to) need to be added to the + // original bus contribution entries -- get those + // from the bus index list. A separate indexing system is for + // the extra (to/from, from/to) entries. + + // for reference, these are computed in the same order as + // OPFLOWComputeDenseEqualityConstraintJacobian_PBPOLRAJAHIOP() + + // from bus real entries + + /* dPf_dthetaf */ + MJacS_dev[b_jacsp_idx[ifrom]] += + Vmf * Vmt * (-Gft[i] * sin(thetaft) + Bft[i] * cos(thetaft)); + /*dPf_dVmf */ + MJacS_dev[b_jacsp_idx[ifrom] + 1] += 2 * Gff[i] * Vmf + + Vmt * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft)); + /*dPf_dthetat */ + MJacS_dev[jacf_idx[i] + 0] = + Vmf * Vmt * (Gft[i] * sin(thetaft) - Bft[i] * cos(thetaft)); + /* dPf_dVmt */ + MJacS_dev[jacf_idx[i] + 1] = + Vmf * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft)); + + // from bus reactive entries + + /* dQf_dthetaf */ + MJacS_dev[b_jacsq_idx[ifrom]] += + Vmf * Vmt * (Bft[i] * sin(thetaft) + Gft[i] * cos(thetaft)); + /* dQf_dVmf */ + MJacS_dev[b_jacsq_idx[ifrom] + 1] += -2 * Bff[i] * Vmf + + Vmt * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); + /* dQf_dthetat */ + MJacS_dev[jacf_idx[i] + 2] = + Vmf * Vmt * (-Bft[i] * sin(thetaft) - Gft[i] * cos(thetaft)); + /* dQf_dVmt */ + MJacS_dev[jacf_idx[i] + 3] = + Vmf * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); + + // to bus real entries + + /* dPt_dthetat */ + MJacS_dev[b_jacsp_idx[ito]] += + Vmt * Vmf * (-Gtf[i] * sin(thetatf) + Btf[i] * cos(thetatf)); + /* dPt_dVmt */ + MJacS_dev[b_jacsp_idx[ito] + 1] += 2 * Gtt[i] * Vmt + + Vmf * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf)); + /* dPt_dthetaf */ + MJacS_dev[jact_idx[i] + 0] = + Vmt * Vmf * (Gtf[i] * sin(thetatf) - Btf[i] * cos(thetatf)); + /* dPt_dVmf */ + MJacS_dev[jact_idx[i] + 1] = + Vmt * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf)); + + // to bus reactive entries + + /* dQt_dthetat */ + MJacS_dev[b_jacsq_idx[ito]] += + Vmt * Vmf * (Btf[i] * sin(thetatf) + Gtf[i] * cos(thetatf)); + /* dQt_dVmt */ + MJacS_dev[b_jacsq_idx[ito] + 1] += -2 * Btt[i] * Vmt + + Vmf * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); + /* dQt_dthetaf */ + MJacS_dev[jact_idx[i] + 2] = + Vmt * Vmf * (-Btf[i] * sin(thetatf) - Gtf[i] * cos(thetatf)); + /* dQt_dVmf */ + MJacS_dev[jact_idx[i] + 3] = + Vmt * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); + }); + + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -903,10 +1067,11 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // Copy over val_ineq to device resmgr.copy(MJacS_dev, pbpolrajahiopsparse->val_jaceq); - ierr = PetscLogEventEnd(opflow->eqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); } + ierr = PetscLogEventEnd(opflow->eqconsjaclogger, 0, 0, 0, 0); + CHKERRQ(ierr); + PetscFunctionReturn(0); } From 84a45be1f42afcb5a4f7ed7b7ee84abb5aae44ef Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Fri, 27 Oct 2023 09:35:39 -0700 Subject: [PATCH 13/35] Streamline debug printing of equality Jacobian --- .../pbpolrajahiopsparsekernels.cpp | 90 ++++++++++++++----- 1 file changed, 70 insertions(+), 20 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index a415d56a..955580b4 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -603,6 +603,55 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscFunctionReturn(0); } + +// A routine to get the triplet arrays from the device and print them out +static void +PrintTriplets(const std::string& title, const int& n, int *i, int *j, double *v) +{ + auto &resmgr = umpire::ResourceManager::getInstance(); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + + int *itemp(NULL); + + if (i != NULL) { + itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(itemp, i, n*sizeof(int)); + } + + int *jtemp(NULL); + if (j != 0) { + jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(jtemp, j, n*sizeof(int)); + } + + double *vtemp(NULL); + if (v != NULL) { + vtemp = (double *)(h_allocator_.allocate(n * sizeof(double))); + resmgr.copy(vtemp, v, n*sizeof(double)); + } + + std::cout << title << std::endl; + for (int idx = 0; idx < n; ++idx) { + std::cout << std::setw(5) << idx << " "; + if (itemp != NULL) { + std::cout << std::setw(5) << std::right << itemp[idx] << " "; + } + if (jtemp != NULL) { + std::cout << std::setw(5) << std::right << jtemp[idx]; + } + if (vtemp != NULL) { + std::cout << std::setw(12) << std::right + << std::scientific << std::setprecision(3) + << vtemp[idx]; + } + std::cout << std::endl; + } + h_allocator_.deallocate(itemp); + h_allocator_.deallocate(jtemp); + if (vtemp != NULL) h_allocator_.deallocate(vtemp); +} + + PetscErrorCode OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( OPFLOW opflow, const double *x_dev, int *iJacS_dev, int *jJacS_dev, @@ -630,6 +679,8 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( ierr = PetscLogEventBegin(opflow->eqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + /* Using OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide */ if (MJacS_dev == NULL) { @@ -802,15 +853,9 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); } - - // Create arrays on host to store i,j, and val arrays - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - - int *itemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - int *jtemp = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - - resmgr.copy(itemp, iJacS_dev, opflow->nnz_eqjacsp*sizeof(int)); - resmgr.copy(jtemp, jJacS_dev, opflow->nnz_eqjacsp*sizeof(int)); + if (1) + PrintTriplets("Non-zero indexes for Equality Constraint Jacobian (GPU):", + opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, NULL); if (1) { roffset = 0; @@ -847,17 +892,14 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); CHKERRQ(ierr); } - if (0) - std::cout << "Non-zero indexes for Equality Constraint Jacobian:" << std::endl; - for (int idx = 0; idx < opflow->nnz_eqjacsp; ++idx) { - std::cout << std::setw(5) << idx << " " - << std::setw(5) << pbpolrajahiopsparse->i_jaceq[idx] << " " - << std::setw(5) << pbpolrajahiopsparse->j_jaceq[idx] << std::endl; - } - } + // Copy over i_jaceq and j_jaceq arrays to device resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); + + if (1) + PrintTriplets("Non-zero indexes for Equality Constraint Jacobian:", + opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, NULL); } } else { @@ -871,6 +913,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( double *bl = busparams->bl_dev_; int *b_xidx = busparams->xidx_dev_; + // Basic bus contribution RAJA::forall( RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { @@ -894,7 +937,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); } - /* Generator contributions */ + // Generator contributions int *eqjacspbus_idx = genparams->eqjacspbus_idx_dev_; int *eqjacsqbus_idx = genparams->eqjacsqbus_idx_dev_; RAJA::forall( @@ -1029,13 +1072,17 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( MJacS_dev[jact_idx[i] + 3] = Vmt * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); }); + + if (1) + PrintTriplets("Equality Constraint Jacobian (GPU):", + opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); + - + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); // Copy from device to host - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); registerWith(x, opflow->nx, resmgr, h_allocator_); resmgr.copy((double *)x, (double *)x_dev); @@ -1067,6 +1114,9 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // Copy over val_ineq to device resmgr.copy(MJacS_dev, pbpolrajahiopsparse->val_jaceq); + if (1) + PrintTriplets("Equality Constraint Jacobian:", + opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); } ierr = PetscLogEventEnd(opflow->eqconsjaclogger, 0, 0, 0, 0); From fae9cd9462e9b680627bedc73a3ef6f2fa7f4db3 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Mon, 30 Oct 2023 10:56:50 -0700 Subject: [PATCH 14/35] Equality Jacobian is correct, but apparently needs to be reordered --- .../model/power_bal_hiop/paramsrajahiop.cpp | 4 +- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 3 +- .../pbpolrajahiopsparsekernels.cpp | 46 +++++++++++-------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index 597eb3b1..64a18b79 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -407,8 +407,8 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { */ geqidxf[linei] = busf->starteqloc; geqidxt[linei] = bust->starteqloc; - busf_idx[linei] = line->fbus; - bust_idx[linei] = line->tbus; + busf_idx[linei] = ps->busext2intmap[line->fbus]; + bust_idx[linei] = ps->busext2intmap[line->tbus]; jacf_idx[linei] = 0; jact_idx[linei] = 0; diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index 79f98606..cdaf6874 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -302,7 +302,8 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { continue; // each line adds 4 (off-diagonal) entries for the to bus and 4 - // entries for the from bus. + // entries for the from bus. Each line also modifies 4 existing + // to and from bus entries. lineparams->jacf_idx[iline] = nnz_eqjac; nnz_eqjac += 4; lineparams->jact_idx[iline] = nnz_eqjac; diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 955580b4..ef5cf779 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1015,11 +1015,13 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // from bus real entries /* dPf_dthetaf */ - MJacS_dev[b_jacsp_idx[ifrom]] += - Vmf * Vmt * (-Gft[i] * sin(thetaft) + Bft[i] * cos(thetaft)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsp_idx[ifrom]]), + Vmf * Vmt * (-Gft[i] * sin(thetaft) + Bft[i] * cos(thetaft))); /*dPf_dVmf */ - MJacS_dev[b_jacsp_idx[ifrom] + 1] += 2 * Gff[i] * Vmf + - Vmt * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsp_idx[ifrom] + 1]), + 2 * Gff[i] * Vmf + Vmt * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft))); /*dPf_dthetat */ MJacS_dev[jacf_idx[i] + 0] = Vmf * Vmt * (Gft[i] * sin(thetaft) - Bft[i] * cos(thetaft)); @@ -1030,26 +1032,31 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // from bus reactive entries /* dQf_dthetaf */ - MJacS_dev[b_jacsq_idx[ifrom]] += - Vmf * Vmt * (Bft[i] * sin(thetaft) + Gft[i] * cos(thetaft)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsq_idx[ifrom]]), + Vmf * Vmt * (Bft[i] * sin(thetaft) + Gft[i] * cos(thetaft))); /* dQf_dVmf */ - MJacS_dev[b_jacsq_idx[ifrom] + 1] += -2 * Bff[i] * Vmf + - Vmt * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsq_idx[ifrom] + 1]), + -2 * Bff[i] * Vmf + + Vmt * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft))); /* dQf_dthetat */ MJacS_dev[jacf_idx[i] + 2] = Vmf * Vmt * (-Bft[i] * sin(thetaft) - Gft[i] * cos(thetaft)); /* dQf_dVmt */ MJacS_dev[jacf_idx[i] + 3] = - Vmf * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); + Vmf * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); // to bus real entries /* dPt_dthetat */ - MJacS_dev[b_jacsp_idx[ito]] += - Vmt * Vmf * (-Gtf[i] * sin(thetatf) + Btf[i] * cos(thetatf)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsp_idx[ito]]), + Vmt * Vmf * (-Gtf[i] * sin(thetatf) + Btf[i] * cos(thetatf))); /* dPt_dVmt */ - MJacS_dev[b_jacsp_idx[ito] + 1] += 2 * Gtt[i] * Vmt + - Vmf * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsp_idx[ito] + 1]), 2 * Gtt[i] * Vmt + + Vmf * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf))); /* dPt_dthetaf */ MJacS_dev[jact_idx[i] + 0] = Vmt * Vmf * (Gtf[i] * sin(thetatf) - Btf[i] * cos(thetatf)); @@ -1060,11 +1067,13 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // to bus reactive entries /* dQt_dthetat */ - MJacS_dev[b_jacsq_idx[ito]] += - Vmt * Vmf * (Btf[i] * sin(thetatf) + Gtf[i] * cos(thetatf)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsq_idx[ito]]), + Vmt * Vmf * (Btf[i] * sin(thetatf) + Gtf[i] * cos(thetatf))); /* dQt_dVmt */ - MJacS_dev[b_jacsq_idx[ito] + 1] += -2 * Btt[i] * Vmt + - Vmf * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); + RAJA::atomicAdd + (&(MJacS_dev[b_jacsq_idx[ito] + 1]), -2 * Btt[i] * Vmt + + Vmf * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf))); /* dQt_dthetaf */ MJacS_dev[jact_idx[i] + 2] = Vmt * Vmf * (-Btf[i] * sin(thetatf) - Gtf[i] * cos(thetatf)); @@ -1078,7 +1087,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); - + if (1) { ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -1117,6 +1126,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (1) PrintTriplets("Equality Constraint Jacobian:", opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); + } } ierr = PetscLogEventEnd(opflow->eqconsjaclogger, 0, 0, 0, 0); From 6890b32f097cd05cd665e6e3dbbb326ad9c442e6 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 31 Oct 2023 07:51:35 -0700 Subject: [PATCH 15/35] Inequality Jacobian indexes are correct (for lines) --- .../model/power_bal_hiop/paramsrajahiop.cpp | 7 + .../model/power_bal_hiop/paramsrajahiop.h | 2 + .../power_bal_hiop/pbpolrajahiopsparse.cpp | 19 +- .../pbpolrajahiopsparsekernels.cpp | 243 ++++++++++++------ 4 files changed, 188 insertions(+), 83 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index 64a18b79..9259a49e 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -235,6 +235,7 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(gineqidx_dev_, gineqidx); resmgr.copy(gbineqidx_dev_, gbineqidx); resmgr.copy(linelimidx_dev_, linelimidx); + resmgr.copy(jac_ieq_idx_dev_, jac_ieq_idx); } #else Gff_dev_ = Gff; @@ -258,6 +259,7 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { gineqidx_dev_ = gineqidx; gbineqidx_dev_ = gbineqidx; linelimidx_dev_ = linelimidx; + jac_ieq_idx_dev_ = jac_ieq_idx; } #endif return 0; @@ -289,6 +291,7 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(gineqidx); h_allocator_.deallocate(gbineqidx); h_allocator_.deallocate(linelimidx); + h_allocator_.deallocate(jac_ieq_idx); } #ifdef EXAGO_ENABLE_GPU @@ -317,6 +320,7 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(gineqidx_dev_); d_allocator_.deallocate(gbineqidx_dev_); d_allocator_.deallocate(linelimidx_dev_); + d_allocator_.deallocate(jac_ieq_idx_dev_); } #endif @@ -369,6 +373,7 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { linelimidx = paramAlloc(h_allocator_, nlinelim); gineqidx = paramAlloc(h_allocator_, nlinelim); gbineqidx = paramAlloc(h_allocator_, nlinelim); + jac_ieq_idx = paramAlloc(h_allocator_, nlinelim); } PetscInt j = 0; @@ -416,6 +421,7 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { gbineqidx[j] = opflow->nconeq + line->startineqloc; gineqidx[j] = line->startineqloc; linelimidx[j] = linei; + jac_ieq_idx[j] = 0; j++; } @@ -450,6 +456,7 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { gineqidx_dev_ = paramAlloc(d_allocator_, nlinelim); gbineqidx_dev_ = paramAlloc(d_allocator_, nlinelim); linelimidx_dev_ = paramAlloc(d_allocator_, nlinelim); + jac_ieq_idx_dev_ = paramAlloc(d_allocator_, nlinelim); } #endif return 0; diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index 73948f7b..da9f922c 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -200,6 +200,7 @@ struct LINEParamsRajaHiop { int *bust_idx; /* To bus index */ int *jacf_idx; /* Location number in the sparse Jacobian (from) */ int *jact_idx; /* Location number in the sparse Jacobian (to) */ + int *jac_ieq_idx;/* Location number in sparse inequality Jacobian */ // Device data double *Gff_dev_; /* From side self conductance */ @@ -228,6 +229,7 @@ struct LINEParamsRajaHiop { int *bust_idx_dev_; /* To bus index */ int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */ int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */ + int *jac_ieq_idx_dev_;/* Location number in sparse inequality Jacobian */ int allocate(OPFLOW); int destroy(OPFLOW); diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index cdaf6874..8e25aad9 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -310,19 +310,25 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { nnz_eqjac += 4; } + // if there are lines, non-zeros were over counted + if (ps->nline > 0) { + nnz_eqjac -= 8; + } + std::cout << "Equality Jacobian nonzero count: " << nnz_eqjac << std::endl; if (opflow->has_gensetpoint) { - for (int ibus = 0; ibus < ps->nbus; ++ibus) { + for (int ibus = 0, igen = 0; ibus < ps->nbus; ++ibus) { PSBUS bus = &(ps->bus[ibus]); - for (int igen = 0; igen < bus->ngen; ++igen) { + for (int bgen = 0; bgen < bus->ngen; ++bgen) { PSGEN gen; - ierr = PSBUSGetGen(bus, igen, &gen); + ierr = PSBUSGetGen(bus, bgen, &gen); CHKERRQ(ierr); if (!gen->status) continue; - genparams->ineqjacspgen_idx[igen] = nnz_ineqjac; + genparams->ineqjacspgen_idx[igen] = nnz_eqjac + nnz_ineqjac; nnz_ineqjac += 6; + igen++; } } } @@ -331,9 +337,9 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { for (int ibus = 0; ibus < ps->nbus; ++ibus) { PSBUS bus = &(ps->bus[ibus]); if (bus->ide == PV_BUS || bus->ide == REF_BUS) { - for (int igen = 0; igen < bus->ngen; ++igen) { + for (int bgen = 0; bgen < bus->ngen; ++bgen) { PSGEN gen; - ierr = PSBUSGetGen(bus, igen, &gen); + ierr = PSBUSGetGen(bus, bgen, &gen); CHKERRQ(ierr); if (!gen->status) continue; @@ -346,6 +352,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { if (!opflow->ignore_lineflow_constraints) { for (int iline = 0; iline < opflow->nlinesmon; ++iline) { // PSLINE line = &ps->line[opflow->linesmon[iline]]; + lineparams->jac_ieq_idx[iline] = nnz_eqjac + nnz_ineqjac; nnz_ineqjac += 8; } } diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index ef5cf779..487c4dd7 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -18,6 +18,9 @@ #include "pbpolrajahiopsparsekernels.hpp" #include "pbpolrajahiopsparse.hpp" +static const bool debugmsg(true); +static const bool oldhostway(true); + PetscErrorCode OPFLOWSetInitialGuessArray_PBPOLRAJAHIOPSPARSE(OPFLOW opflow, double *x0_dev) { PetscErrorCode ierr; @@ -471,6 +474,54 @@ PetscErrorCode OPFLOWComputeGradientArray_PBPOLRAJAHIOPSPARSE( PetscFunctionReturn(0); } +// A routine to get the triplet arrays from the device and print them out +static void +PrintTriplets(const std::string& title, const int& n, int *i, int *j, double *v) +{ + auto &resmgr = umpire::ResourceManager::getInstance(); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + + int *itemp(NULL); + + if (i != NULL) { + itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(itemp, i, n*sizeof(int)); + } + + int *jtemp(NULL); + if (j != 0) { + jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(jtemp, j, n*sizeof(int)); + } + + double *vtemp(NULL); + if (v != NULL) { + vtemp = (double *)(h_allocator_.allocate(n * sizeof(double))); + resmgr.copy(vtemp, v, n*sizeof(double)); + } + + std::cout << title << std::endl; + for (int idx = 0; idx < n; ++idx) { + std::cout << std::setw(5) << idx << " "; + if (itemp != NULL) { + std::cout << std::setw(5) << std::right << itemp[idx] << " "; + } + if (jtemp != NULL) { + std::cout << std::setw(5) << std::right << jtemp[idx]; + } + if (vtemp != NULL) { + std::cout << std::setw(12) << std::right + << std::scientific << std::setprecision(3) + << vtemp[idx]; + } + std::cout << std::endl; + } + h_allocator_.deallocate(itemp); + h_allocator_.deallocate(jtemp); + if (vtemp != NULL) h_allocator_.deallocate(vtemp); +} + + PetscErrorCode OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( OPFLOW opflow, const double *x_dev, int *iJacS_dev, int *jJacS_dev, @@ -480,7 +531,7 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscErrorCode ierr; double *x, *values; PetscInt *iRowstart, *jColstart; - PetscInt roffset, coffset; + PetscInt roffset, coffset, idxoffset; PetscInt nrow, ncol; PetscInt nvals; const PetscInt *cols; @@ -491,11 +542,91 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscFunctionBegin; if (MJacS_dev == NULL) { + + if (debugmsg) + std::cout << "Official Inequality Jacobian nonzero count: " + << opflow->nnz_ineqjacsp << std::endl; + /* Set locations only */ if (opflow->Nconineq) { ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); + /* Inequality constraints start after equality constraints + Hence the offset + */ + roffset = opflow->nconeq; + coffset = 0; + idxoffset = opflow->nnz_eqjacsp; + + resmgr.memset(iJacS_dev + idxoffset, 0, + opflow->nnz_ineqjacsp*sizeof(int)); + resmgr.memset(jJacS_dev + idxoffset, 0, + opflow->nnz_ineqjacsp*sizeof(int)); + + if (!opflow->ignore_lineflow_constraints) { + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + int *jac_ieq_idx = lineparams->jac_ieq_idx_dev_; + int *linelimidx = lineparams->linelimidx_dev_; + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *gbineqidx = lineparams->gbineqidx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int iline(linelimidx[i]); + int offset(0); + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline] + 1; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline] + 1; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline] + 1; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline]; + offset++; + + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; + jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline] + 1; + }); + + } + + if (debugmsg) + PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian (GPU):", + opflow->nnz_ineqjacsp, + iJacS_dev + opflow->nnz_eqjacsp, + jJacS_dev + opflow->nnz_eqjacsp, + NULL); + + if (oldhostway) { + + /* Inequality constraints start after equality constraints + Hence the offset + */ + roffset = opflow->nconeq; + coffset = 0; + // Create arrays on host to store i,j, and val arrays umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); @@ -509,12 +640,6 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( iRowstart = pbpolrajahiopsparse->i_jacineq; jColstart = pbpolrajahiopsparse->j_jacineq; - /* Inequality constraints start after equality constraints - Hence the offset - */ - roffset = opflow->nconeq; - coffset = 0; - ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( opflow, opflow->X, opflow->Jac_Gi); CHKERRQ(ierr); @@ -536,30 +661,30 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( CHKERRQ(ierr); } - // Dump out the matrix indexes as a check - std::cout << "Nonzero indexes for Inequality Constraint Jacobian: " - << opflow->nnz_ineqjacsp - << std::endl; - for (int idx = 0; idx < opflow->nnz_ineqjacsp; ++idx) { - std::cout << std::setw(5) << idx << " " - << std::setw(5) << pbpolrajahiopsparse->i_jacineq[idx] << " " - << std::setw(5) << pbpolrajahiopsparse->j_jacineq[idx] << std::endl; - } - // Copy over i_jacineq and j_jacineq arrays to device resmgr.copy(iJacS_dev + opflow->nnz_eqjacsp, pbpolrajahiopsparse->i_jacineq); resmgr.copy(jJacS_dev + opflow->nnz_eqjacsp, pbpolrajahiopsparse->j_jacineq); + if (debugmsg) + PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian:", + opflow->nnz_ineqjacsp, + iJacS_dev + opflow->nnz_eqjacsp, + jJacS_dev + opflow->nnz_eqjacsp, + NULL); + ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); + } } } else { if (opflow->Nconineq) { ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); + if (oldhostway) { + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -595,8 +720,16 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp, pbpolrajahiopsparse->val_jacineq); + if (debugmsg) + PrintTriplets("Inequality Constraint Jacobian:", + opflow->nnz_ineqjacsp, + (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), + (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), + MJacS_dev + opflow->nnz_eqjacsp); + ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); + } } } @@ -604,54 +737,6 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( } -// A routine to get the triplet arrays from the device and print them out -static void -PrintTriplets(const std::string& title, const int& n, int *i, int *j, double *v) -{ - auto &resmgr = umpire::ResourceManager::getInstance(); - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - - int *itemp(NULL); - - if (i != NULL) { - itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); - resmgr.copy(itemp, i, n*sizeof(int)); - } - - int *jtemp(NULL); - if (j != 0) { - jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); - resmgr.copy(jtemp, j, n*sizeof(int)); - } - - double *vtemp(NULL); - if (v != NULL) { - vtemp = (double *)(h_allocator_.allocate(n * sizeof(double))); - resmgr.copy(vtemp, v, n*sizeof(double)); - } - - std::cout << title << std::endl; - for (int idx = 0; idx < n; ++idx) { - std::cout << std::setw(5) << idx << " "; - if (itemp != NULL) { - std::cout << std::setw(5) << std::right << itemp[idx] << " "; - } - if (jtemp != NULL) { - std::cout << std::setw(5) << std::right << jtemp[idx]; - } - if (vtemp != NULL) { - std::cout << std::setw(12) << std::right - << std::scientific << std::setprecision(3) - << vtemp[idx]; - } - std::cout << std::endl; - } - h_allocator_.deallocate(itemp); - h_allocator_.deallocate(jtemp); - if (vtemp != NULL) h_allocator_.deallocate(vtemp); -} - - PetscErrorCode OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( OPFLOW opflow, const double *x_dev, int *iJacS_dev, int *jJacS_dev, @@ -685,6 +770,10 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (MJacS_dev == NULL) { + if (debugmsg) + std::cout << "Official Equality Jacobian nonzero count: " + << opflow->nnz_eqjacsp << std::endl; + /* Set locations only */ resmgr.memset(iJacS_dev, 0, opflow->nnz_eqjacsp*sizeof(int)); @@ -698,7 +787,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( int *b_jacsq_idx = busparams->jacsq_idx_dev_; /* Bus */ - std::cout << "Begin with buses" << std::endl; + if (debugmsg) std::cout << "Begin with buses" << std::endl; RAJA::forall( RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { @@ -714,7 +803,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); if (opflow->include_powerimbalance_variables) { - std::cout << "Bus power imbalance variables" << std::endl; + if (debugmsg) std::cout << "Bus power imbalance variables" << std::endl; RAJA::forall( RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { @@ -732,7 +821,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( /* generation contributions */ - std::cout << "Generators " << std::endl; + if (debugmsg) std::cout << "Generators " << std::endl; int *g_gidxbus = genparams->gidxbus_dev_; int *g_xidx = genparams->xidx_dev_; @@ -752,7 +841,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (opflow->include_loadloss_variables) { - std::cout << "Load Loss" << std::endl; + if (debugmsg) std::cout << "Load Loss" << std::endl; int *l_gidx = loadparams->gidx_dev_; int *l_xidx = loadparams->xidx_dev_; int *l_jacsp_idx = loadparams->jacsp_idx_dev_; @@ -769,7 +858,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( /* Connected lines */ - std::cout << "Connected Lines" << std::endl; + if (debugmsg) std::cout << "Connected Lines" << std::endl; int *xidxf = lineparams->xidxf_dev_; int *xidxt = lineparams->xidxt_dev_; @@ -828,7 +917,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (opflow->has_gensetpoint) { - std::cout << "Generator set point" << std::endl; + if (debugmsg) std::cout << "Generator set point" << std::endl; int *eqjacspgen_idx = genparams->eqjacspgen_idx_dev_; int *g_geqidxgen = genparams->geqidxgen_dev_; int *g_xidx = genparams->xidx_dev_; @@ -853,11 +942,11 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); } - if (1) + if (debugmsg) PrintTriplets("Non-zero indexes for Equality Constraint Jacobian (GPU):", opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, NULL); - if (1) { + if (oldhostway) { roffset = 0; coffset = 0; @@ -897,7 +986,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); - if (1) + if (debugmsg) PrintTriplets("Non-zero indexes for Equality Constraint Jacobian:", opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, NULL); } @@ -1082,12 +1171,12 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( Vmt * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); }); - if (1) + if (debugmsg) PrintTriplets("Equality Constraint Jacobian (GPU):", opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); - if (1) { + if (oldhostway) { ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -1123,7 +1212,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // Copy over val_ineq to device resmgr.copy(MJacS_dev, pbpolrajahiopsparse->val_jaceq); - if (1) + if (debugmsg) PrintTriplets("Equality Constraint Jacobian:", opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); } From d60a5ffbb0087f6080bec563f19faaafea9bebad Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 31 Oct 2023 08:47:19 -0700 Subject: [PATCH 16/35] Inequality Jacobian values for line limits added --- .../pbpolrajahiopsparsekernels.cpp | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 487c4dd7..5e3c53c3 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -683,6 +683,130 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); + if (!opflow->ignore_lineflow_constraints) { + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + double *Gff_arr = lineparams->Gff_dev_; + double *Gtt_arr = lineparams->Gtt_dev_; + double *Gft_arr = lineparams->Gft_dev_; + double *Gtf_arr = lineparams->Gtf_dev_; + + double *Bff_arr = lineparams->Bff_dev_; + double *Btt_arr = lineparams->Btt_dev_; + double *Bft_arr = lineparams->Bft_dev_; + double *Btf_arr = lineparams->Btf_dev_; + + int *linelimidx = lineparams->linelimidx_dev_; + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *jac_ieq_idx = lineparams->jac_ieq_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int j = linelimidx[i]; + double val[4]; + double Pf, Qf, Pt, Qt; + double thetaf = x_dev[xidxf[j]], Vmf = x_dev[xidxf[j] + 1]; + double thetat = x_dev[xidxt[j]], Vmt = x_dev[xidxt[j] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + double dSf2_dPf, dSf2_dQf, dSt2_dPt, dSt2_dQt; + double dPf_dthetaf, dPf_dVmf, dPf_dthetat, dPf_dVmt; + double dQf_dthetaf, dQf_dVmf, dQf_dthetat, dQf_dVmt; + double dPt_dthetaf, dPt_dVmf, dPt_dthetat, dPt_dVmt; + double dQt_dthetaf, dQt_dVmf, dQt_dthetat, dQt_dVmt; + double dSf2_dthetaf, dSf2_dVmf, dSf2_dthetat, dSf2_dVmt; + double dSt2_dthetaf, dSt2_dVmf, dSt2_dthetat, dSt2_dVmt; + double Gff = Gff_arr[j], Bff = Bff_arr[j]; + double Gft = Gft_arr[j], Bft = Bft_arr[j]; + double Gtf = Gtf_arr[j], Btf = Btf_arr[j]; + double Gtt = Gtt_arr[j], Btt = Btt_arr[j]; + + Pf = Gff * Vmf * Vmf + + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Qf = -Bff * Vmf * Vmf + + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + Pt = Gtt * Vmt * Vmt + + Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Qt = -Btt * Vmt * Vmt + + Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + dSf2_dPf = 2 * Pf; + dSf2_dQf = 2 * Qf; + dSt2_dPt = 2 * Pt; + dSt2_dQt = 2 * Qt; + + dPf_dthetaf = Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf = + 2 * Gff * Vmf + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetat = Vmf * Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmt = Vmf * (Gft * cos(thetaft) + Bft * sin(thetaft)); + + dQf_dthetaf = Vmf * Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmf = + -2 * Bff * Vmf + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat = Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmt = Vmf * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + dPt_dthetat = Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmt = + 2 * Gtt * Vmt + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetaf = Vmt * Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmf = Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + dQt_dthetat = Vmt * Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt = + -2 * Btt * Vmt + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf = Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dVmf = Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + dSf2_dthetaf = dSf2_dPf * dPf_dthetaf + dSf2_dQf * dQf_dthetaf; + dSf2_dthetat = dSf2_dPf * dPf_dthetat + dSf2_dQf * dQf_dthetat; + dSf2_dVmf = dSf2_dPf * dPf_dVmf + dSf2_dQf * dQf_dVmf; + dSf2_dVmt = dSf2_dPf * dPf_dVmt + dSf2_dQf * dQf_dVmt; + + val[0] = dSf2_dthetaf; + val[1] = dSf2_dVmf; + val[2] = dSf2_dthetat; + val[3] = dSf2_dVmt; + + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 0]), val[0]); + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 1]), val[1]); + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 2]), val[2]); + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 3]), val[3]); + + dSt2_dthetaf = dSt2_dPt * dPt_dthetaf + dSt2_dQt * dQt_dthetaf; + dSt2_dthetat = dSt2_dPt * dPt_dthetat + dSt2_dQt * dQt_dthetat; + dSt2_dVmf = dSt2_dPt * dPt_dVmf + dSt2_dQt * dQt_dVmf; + dSt2_dVmt = dSt2_dPt * dPt_dVmt + dSt2_dQt * dQt_dVmt; + + val[2] = dSt2_dthetat; + val[3] = dSt2_dVmt; + val[0] = dSt2_dthetaf; + val[1] = dSt2_dVmf; + + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 4]), val[0]); + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 5]), val[1]); + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 6]), val[2]); + RAJA::atomicAdd + (&(MJacS_dev[jac_ieq_idx[i] + 7]), val[3]); + }); + + } + + if (debugmsg) + PrintTriplets("Inequality Constraint Jacobian (GPU):", + opflow->nnz_ineqjacsp, + (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), + (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), + MJacS_dev + opflow->nnz_eqjacsp); + if (oldhostway) { ierr = VecGetArray(opflow->X, &x); From 523932db4592211d04c20aa59e422e25e9c51fea Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 31 Oct 2023 10:02:41 -0700 Subject: [PATCH 17/35] Minor changes to inequality Jacobian line limit values --- .../pbpolrajahiopsparsekernels.cpp | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 5e3c53c3..6c2760a8 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -769,14 +769,19 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( val[2] = dSf2_dthetat; val[3] = dSf2_dVmt; - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 0]), val[0]); - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 1]), val[1]); - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 2]), val[2]); - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 3]), val[3]); + MJacS_dev[jac_ieq_idx[i] + 0] = val[0]; + MJacS_dev[jac_ieq_idx[i] + 1] = val[1]; + MJacS_dev[jac_ieq_idx[i] + 2] = val[2]; + MJacS_dev[jac_ieq_idx[i] + 3] = val[3]; + + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 0]), val[0]); + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 1]), val[1]); + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 2]), val[2]); + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 3]), val[3]); dSt2_dthetaf = dSt2_dPt * dPt_dthetaf + dSt2_dQt * dQt_dthetaf; dSt2_dthetat = dSt2_dPt * dPt_dthetat + dSt2_dQt * dQt_dthetat; @@ -788,14 +793,19 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( val[0] = dSt2_dthetaf; val[1] = dSt2_dVmf; - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 4]), val[0]); - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 5]), val[1]); - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 6]), val[2]); - RAJA::atomicAdd - (&(MJacS_dev[jac_ieq_idx[i] + 7]), val[3]); + MJacS_dev[jac_ieq_idx[i] + 4] = val[0]; + MJacS_dev[jac_ieq_idx[i] + 5] = val[1]; + MJacS_dev[jac_ieq_idx[i] + 6] = val[2]; + MJacS_dev[jac_ieq_idx[i] + 7] = val[3]; + + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 4]), val[0]); + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 5]), val[1]); + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 6]), val[2]); + // RAJA::atomicAdd + // (&(MJacS_dev[jac_ieq_idx[i] + 7]), val[3]); }); } From fd65166c6925bbf1b6ddee04ec58fdeb80a5fe97 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Wed, 8 Nov 2023 12:05:38 -0800 Subject: [PATCH 18/35] Sort Jacobian indexes, but not values. --- .../model/power_bal_hiop/paramsrajahiop.cpp | 3 + .../model/power_bal_hiop/paramsrajahiop.h | 2 + .../pbpolrajahiopsparsekernels.cpp | 133 ++++++++++++++---- 3 files changed, 113 insertions(+), 25 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index 9259a49e..bc377b8b 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -818,10 +818,12 @@ void PbpolModelRajaHiop::destroy(OPFLOW opflow) { auto &resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); h_allocator_.deallocate(i_jaceq); h_allocator_.deallocate(j_jaceq); h_allocator_.deallocate(val_jaceq); + d_allocator_.deallocate(idx_jaceq_dev_); h_allocator_.deallocate(i_hess); h_allocator_.deallocate(j_hess); @@ -831,6 +833,7 @@ void PbpolModelRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(i_jacineq); h_allocator_.deallocate(j_jacineq); h_allocator_.deallocate(val_jacineq); + d_allocator_.deallocate(idx_jacineq_dev_); } } #endif diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index da9f922c..1c9771b3 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -264,8 +264,10 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP { // GPU sparse model) int *i_jaceq, *j_jaceq; // Row and column indices for equality constrained Jacobian + int *idx_jaceq_dev_; // Permuted triplet indexes for equality constrained Jacobian (on-device) int *i_jacineq, *j_jacineq; // Row and column indices for inequality constrained Jacobain + int *idx_jacineq_dev_; // Permuted triplet indexes for inequality constrained Jacobian (on-device) int *i_hess, *j_hess; // Row and column indices for hessian double *val_jaceq, *val_jacineq, *val_hess; // values for equality, inequality jacobians and hessian diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 6c2760a8..6a95501a 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1,6 +1,9 @@ #include #include +#include +#include +#include #include @@ -474,13 +477,80 @@ PetscErrorCode OPFLOWComputeGradientArray_PBPOLRAJAHIOPSPARSE( PetscFunctionReturn(0); } +// A routine to sort triplet matrix indexes. The index arrays are on +// the device. This sorts them on the host. +static void +SortIndexes(const int& n, int *i_dev, int *j_dev, int *idx_perm_dev) +{ + auto &resmgr = umpire::ResourceManager::getInstance(); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + + std::vector< std::tuple > idxvect; + idxvect.reserve(n); + + int *itemp(NULL); + itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(itemp, i_dev, n*sizeof(int)); + + int *jtemp(NULL); + jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(jtemp, j_dev, n*sizeof(int)); + + for (int idx = 0; idx < n; idx++) { + idxvect.push_back(std::make_tuple(itemp[idx], jtemp[idx], idx)); + } + + std::sort(idxvect.begin(), idxvect.end(), + [] (std::tuple const &t1, + std::tuple const &t2) { + if (std::get<0>(t1) == std::get<0>(t2)) { + return (std::get<1>(t1) < std::get<1>(t2)); + } + return (std::get<0>(t1) < std::get<0>(t2)); + }); + + int *idx_perm; + idx_perm = (int *)(h_allocator_.allocate(n * sizeof(int))); + + for (int idx = 0; idx < n; idx++) { + itemp[idx] = std::get<0>(idxvect[idx]); + jtemp[idx] = std::get<1>(idxvect[idx]); + int i(std::get<2>(idxvect[idx])); + idx_perm[i] = idx; + } + + // std::cout << "Permuted Indexes: " << std::endl; + // for (int idx = 0; idx < n; idx++) { + // std::cout << std::setw(5) << std::right << itemp[idx] << " " + // << std::setw(5) << std::right << jtemp[idx] << " " + // << std::setw(5) << std::right << idx_perm[idx] << " " + // << std::endl; + // } + + resmgr.copy(i_dev, itemp, n*sizeof(int)); + resmgr.copy(j_dev, jtemp, n*sizeof(int)); + resmgr.copy(idx_perm_dev, idx_perm, n*sizeof(int)); + + h_allocator_.deallocate(itemp); + h_allocator_.deallocate(jtemp); + h_allocator_.deallocate(idx_perm); +} + // A routine to get the triplet arrays from the device and print them out static void -PrintTriplets(const std::string& title, const int& n, int *i, int *j, double *v) +PrintTriplets(const std::string& title, const int& n, int *iperm, + int *i, int *j, double *v) { auto &resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + int *ipermtemp(NULL); + + if (iperm != NULL) { + ipermtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); + resmgr.copy(ipermtemp, iperm, n*sizeof(int)); + } + int *itemp(NULL); if (i != NULL) { @@ -503,6 +573,9 @@ PrintTriplets(const std::string& title, const int& n, int *i, int *j, double *v) std::cout << title << std::endl; for (int idx = 0; idx < n; ++idx) { std::cout << std::setw(5) << idx << " "; + if (ipermtemp != NULL) { + std::cout << std::setw(5) << std::right << ipermtemp[idx] << " "; + } if (itemp != NULL) { std::cout << std::setw(5) << std::right << itemp[idx] << " "; } @@ -541,6 +614,11 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( PetscFunctionBegin; + // Create arrays on host to store i,j, and val arrays + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); + + if (MJacS_dev == NULL) { if (debugmsg) @@ -611,10 +689,19 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); } + + pbpolrajahiopsparse->idx_jacineq_dev_ = + (int *) d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); + + SortIndexes(opflow->nnz_ineqjacsp, + iJacS_dev + opflow->nnz_eqjacsp, + jJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->idx_jacineq_dev_); if (debugmsg) PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian (GPU):", opflow->nnz_ineqjacsp, + pbpolrajahiopsparse->idx_jacineq_dev_, iJacS_dev + opflow->nnz_eqjacsp, jJacS_dev + opflow->nnz_eqjacsp, NULL); @@ -627,9 +714,6 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( roffset = opflow->nconeq; coffset = 0; - // Create arrays on host to store i,j, and val arrays - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - pbpolrajahiopsparse->i_jacineq = (int *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int))); pbpolrajahiopsparse->j_jacineq = @@ -670,6 +754,7 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (debugmsg) PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian:", opflow->nnz_ineqjacsp, + NULL, iJacS_dev + opflow->nnz_eqjacsp, jJacS_dev + opflow->nnz_eqjacsp, NULL); @@ -683,6 +768,8 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); + int *iperm = pbpolrajahiopsparse->idx_jaceq_dev_; + if (!opflow->ignore_lineflow_constraints) { LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; double *Gff_arr = lineparams->Gff_dev_; @@ -774,15 +861,6 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( MJacS_dev[jac_ieq_idx[i] + 2] = val[2]; MJacS_dev[jac_ieq_idx[i] + 3] = val[3]; - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 0]), val[0]); - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 1]), val[1]); - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 2]), val[2]); - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 3]), val[3]); - dSt2_dthetaf = dSt2_dPt * dPt_dthetaf + dSt2_dQt * dQt_dthetaf; dSt2_dthetat = dSt2_dPt * dPt_dthetat + dSt2_dQt * dQt_dthetat; dSt2_dVmf = dSt2_dPt * dPt_dVmf + dSt2_dQt * dQt_dVmf; @@ -798,14 +876,6 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( MJacS_dev[jac_ieq_idx[i] + 6] = val[2]; MJacS_dev[jac_ieq_idx[i] + 7] = val[3]; - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 4]), val[0]); - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 5]), val[1]); - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 6]), val[2]); - // RAJA::atomicAdd - // (&(MJacS_dev[jac_ieq_idx[i] + 7]), val[3]); }); } @@ -813,6 +883,7 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (debugmsg) PrintTriplets("Inequality Constraint Jacobian (GPU):", opflow->nnz_ineqjacsp, + iperm, (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), MJacS_dev + opflow->nnz_eqjacsp); @@ -857,6 +928,7 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (debugmsg) PrintTriplets("Inequality Constraint Jacobian:", opflow->nnz_ineqjacsp, + NULL, (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), MJacS_dev + opflow->nnz_eqjacsp); @@ -899,6 +971,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( CHKERRQ(ierr); umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); /* Using OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide */ @@ -1076,9 +1149,17 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); } + pbpolrajahiopsparse->idx_jaceq_dev_ = + (int *) d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); + + SortIndexes(opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, + pbpolrajahiopsparse->idx_jaceq_dev_); + if (debugmsg) PrintTriplets("Non-zero indexes for Equality Constraint Jacobian (GPU):", - opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, NULL); + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->idx_jaceq_dev_, + iJacS_dev, jJacS_dev, NULL); if (oldhostway) { roffset = 0; @@ -1122,7 +1203,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (debugmsg) PrintTriplets("Non-zero indexes for Equality Constraint Jacobian:", - opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, NULL); + opflow->nnz_eqjacsp, NULL, iJacS_dev, jJacS_dev, NULL); } } else { @@ -1136,6 +1217,8 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( double *bl = busparams->bl_dev_; int *b_xidx = busparams->xidx_dev_; + int *iperm = pbpolrajahiopsparse->idx_jaceq_dev_; + // Basic bus contribution RAJA::forall( RAJA::RangeSegment(0, busparams->nbus), @@ -1307,7 +1390,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (debugmsg) PrintTriplets("Equality Constraint Jacobian (GPU):", - opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); + opflow->nnz_eqjacsp, iperm, iJacS_dev, jJacS_dev, MJacS_dev); if (oldhostway) { @@ -1348,7 +1431,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (debugmsg) PrintTriplets("Equality Constraint Jacobian:", - opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, MJacS_dev); + opflow->nnz_eqjacsp, NULL, iJacS_dev, jJacS_dev, MJacS_dev); } } From 067469fcc70e2aad50fb872a032f3e23a3811ed7 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Wed, 8 Nov 2023 14:06:15 -0800 Subject: [PATCH 19/35] Correctly reorder Jacobian values, but using the wrong method --- .../pbpolrajahiopsparsekernels.cpp | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 6a95501a..02a5ef82 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -768,7 +768,7 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( ierr = PetscLogEventBegin(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); - int *iperm = pbpolrajahiopsparse->idx_jaceq_dev_; + int *iperm = pbpolrajahiopsparse->idx_jacineq_dev_; if (!opflow->ignore_lineflow_constraints) { LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; @@ -880,13 +880,31 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( } + int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_ineqjacsp*sizeof(int)); + resmgr.copy(ipermout, iperm); + + double *MJacS_out = (double *)d_allocator_.allocate(opflow->nnz_ineqjacsp*sizeof(double)); + resmgr.copy(MJacS_out, MJacS_dev + opflow->nnz_eqjacsp, + opflow->nnz_ineqjacsp*sizeof(double)); + + RAJA::stable_sort_pairs + (RAJA::make_span(ipermout, opflow->nnz_ineqjacsp), + RAJA::make_span(MJacS_out, opflow->nnz_ineqjacsp), + RAJA::operators::less{}); + + resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp, MJacS_out, + opflow->nnz_ineqjacsp*sizeof(double)); + if (debugmsg) PrintTriplets("Inequality Constraint Jacobian (GPU):", opflow->nnz_ineqjacsp, iperm, (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), - MJacS_dev + opflow->nnz_eqjacsp); + MJacS_dev); + + d_allocator_.deallocate(ipermout); + d_allocator_.deallocate(MJacS_out); if (oldhostway) { @@ -1386,12 +1404,28 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( /* dQt_dVmf */ MJacS_dev[jact_idx[i] + 3] = Vmt * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); - }); + }); + + int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_eqjacsp*sizeof(int)); + resmgr.copy(ipermout, iperm); + + double *MJacS_out = + (double *)d_allocator_.allocate(opflow->nnz_eqjacsp*sizeof(double)); + resmgr.copy(MJacS_out, MJacS_dev, opflow->nnz_eqjacsp*sizeof(double)); + + RAJA::stable_sort_pairs + (RAJA::make_span(ipermout, opflow->nnz_eqjacsp), + RAJA::make_span(MJacS_out, opflow->nnz_eqjacsp), + RAJA::operators::less{}); + + resmgr.copy(MJacS_dev, MJacS_out, opflow->nnz_eqjacsp*sizeof(double)); if (debugmsg) PrintTriplets("Equality Constraint Jacobian (GPU):", opflow->nnz_eqjacsp, iperm, iJacS_dev, jJacS_dev, MJacS_dev); - + + d_allocator_.deallocate(ipermout); + d_allocator_.deallocate(MJacS_out); if (oldhostway) { ierr = VecGetArray(opflow->X, &x); From 8fb2e2c08069aca143c1656d2ee5afe8eacfd889 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Wed, 8 Nov 2023 14:43:16 -0800 Subject: [PATCH 20/35] Actually use GPU-computed Jacobian; reduce steps to reorder Jacobian values --- .../pbpolrajahiopsparsekernels.cpp | 21 +++---------------- 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 02a5ef82..2ab8f567 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -22,7 +22,7 @@ #include "pbpolrajahiopsparse.hpp" static const bool debugmsg(true); -static const bool oldhostway(true); +static const bool oldhostway(false); PetscErrorCode OPFLOWSetInitialGuessArray_PBPOLRAJAHIOPSPARSE(OPFLOW opflow, double *x0_dev) { @@ -883,18 +883,11 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_ineqjacsp*sizeof(int)); resmgr.copy(ipermout, iperm); - double *MJacS_out = (double *)d_allocator_.allocate(opflow->nnz_ineqjacsp*sizeof(double)); - resmgr.copy(MJacS_out, MJacS_dev + opflow->nnz_eqjacsp, - opflow->nnz_ineqjacsp*sizeof(double)); - RAJA::stable_sort_pairs (RAJA::make_span(ipermout, opflow->nnz_ineqjacsp), - RAJA::make_span(MJacS_out, opflow->nnz_ineqjacsp), + RAJA::make_span(MJacS_dev + opflow->nnz_eqjacsp, opflow->nnz_ineqjacsp), RAJA::operators::less{}); - resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp, MJacS_out, - opflow->nnz_ineqjacsp*sizeof(double)); - if (debugmsg) PrintTriplets("Inequality Constraint Jacobian (GPU):", opflow->nnz_ineqjacsp, @@ -904,7 +897,6 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( MJacS_dev); d_allocator_.deallocate(ipermout); - d_allocator_.deallocate(MJacS_out); if (oldhostway) { @@ -1409,23 +1401,16 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_eqjacsp*sizeof(int)); resmgr.copy(ipermout, iperm); - double *MJacS_out = - (double *)d_allocator_.allocate(opflow->nnz_eqjacsp*sizeof(double)); - resmgr.copy(MJacS_out, MJacS_dev, opflow->nnz_eqjacsp*sizeof(double)); - RAJA::stable_sort_pairs (RAJA::make_span(ipermout, opflow->nnz_eqjacsp), - RAJA::make_span(MJacS_out, opflow->nnz_eqjacsp), + RAJA::make_span(MJacS_dev, opflow->nnz_eqjacsp), RAJA::operators::less{}); - resmgr.copy(MJacS_dev, MJacS_out, opflow->nnz_eqjacsp*sizeof(double)); - if (debugmsg) PrintTriplets("Equality Constraint Jacobian (GPU):", opflow->nnz_eqjacsp, iperm, iJacS_dev, jJacS_dev, MJacS_dev); d_allocator_.deallocate(ipermout); - d_allocator_.deallocate(MJacS_out); if (oldhostway) { ierr = VecGetArray(opflow->X, &x); From fe0b6eaa2dc761cf9246d225c154e620eadf4b80 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Thu, 9 Nov 2023 12:42:41 -0800 Subject: [PATCH 21/35] Print (CPU) Hessian non-zero count, indexes, and values --- .../pbpolrajahiopsparsekernels.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 2ab8f567..5966b235 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1480,6 +1480,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (iHSS_dev != NULL && jHSS_dev != NULL) { + if (debugmsg) + std::cout << "Official Hessian nonzero count: " + << opflow->nnz_hesssp << std::endl; + // Create arrays on host to store i,j, and val arrays umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); @@ -1524,8 +1528,14 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // Copy over i_hess and j_hess arrays to device resmgr.copy(iHSS_dev, pbpolrajahiopsparse->i_hess); resmgr.copy(jHSS_dev, pbpolrajahiopsparse->j_hess); - } else { + if (debugmsg) { + PrintTriplets("Hessian Indexes:", + opflow->nnz_hesssp, iHSS_dev, jHSS_dev, NULL); + } + + } else { + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -1603,7 +1613,13 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // Copy over val_ineq to device resmgr.copy(MHSS_dev, pbpolrajahiopsparse->val_hess); - } + + if (debugmsg) { + PrintTriplets("Hessian Values:", + opflow->nnz_hesssp, iHSS_dev, jHSS_dev, MHSS_dev); + } + + } PetscFunctionReturn(0); } From fcf90743af6e2b56607d0aa383b2d1b4461589e4 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Thu, 9 Nov 2023 12:44:34 -0800 Subject: [PATCH 22/35] Add Hessian indexes to line params; make sure Hessian index is allocated --- src/opflow/model/power_bal_hiop/paramsrajahiop.cpp | 13 +++++++++++++ src/opflow/model/power_bal_hiop/paramsrajahiop.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp index bc377b8b..8911f7b1 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.cpp @@ -23,6 +23,7 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(gidx); h_allocator_.deallocate(jacsp_idx); h_allocator_.deallocate(jacsq_idx); + h_allocator_.deallocate(hesssp_idx); if (opflow->include_powerimbalance_variables) { h_allocator_.deallocate(xidxpimb); h_allocator_.deallocate(powerimbalance_penalty); @@ -42,6 +43,7 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(gidx_dev_); d_allocator_.deallocate(jacsp_idx_dev_); d_allocator_.deallocate(jacsq_idx_dev_); + d_allocator_.deallocate(hesssp_idx_dev_); if (opflow->include_powerimbalance_variables) { d_allocator_.deallocate(xidxpimb_dev_); d_allocator_.deallocate(powerimbalance_penalty_dev_); @@ -76,6 +78,7 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(gidx_dev_, gidx); resmgr.copy(jacsp_idx_dev_, jacsp_idx); resmgr.copy(jacsq_idx_dev_, jacsq_idx); + resmgr.copy(hesssp_idx_dev_, hesssp_idx); if (opflow->include_powerimbalance_variables) { resmgr.copy(xidxpimb_dev_, xidxpimb); resmgr.copy(powerimbalance_penalty_dev_, powerimbalance_penalty); @@ -95,6 +98,7 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) { gidx_dev_ = gidx; jacsp_idx_dev_ = jacsp_idx; jacsq_idx_dev_ = jacsq_idx; + hesssp_idx_dev_ = hesssp_idx; powerimbalance_penalty_dev_ = powerimbalance_penalty; #endif return 0; @@ -128,6 +132,7 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) { jacsp_idx = paramAlloc(h_allocator_, nbus); jacsq_idx = paramAlloc(h_allocator_, nbus); + hesssp_idx = paramAlloc(h_allocator_, nbus); if (opflow->include_powerimbalance_variables) { xidxpimb = paramAlloc(h_allocator_, nbus); powerimbalance_penalty = paramAlloc(h_allocator_, nbus); @@ -196,6 +201,7 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) { jacsp_idx_dev_ = paramAlloc(d_allocator_, nbus); jacsq_idx_dev_ = paramAlloc(d_allocator_, nbus); + hesssp_idx_dev_ = paramAlloc(d_allocator_, nbus); if (opflow->include_powerimbalance_variables) { xidxpimb_dev_ = paramAlloc(d_allocator_, nbus); powerimbalance_penalty_dev_ = paramAlloc(d_allocator_, nbus); @@ -230,6 +236,7 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { resmgr.copy(bust_idx_dev_, bust_idx); resmgr.copy(jacf_idx_dev_, jacf_idx); resmgr.copy(jact_idx_dev_, jact_idx); + resmgr.copy(hesssp_idx_dev_, hesssp_idx); if (opflow->nlinesmon) { resmgr.copy(gineqidx_dev_, gineqidx); @@ -255,6 +262,7 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) { bust_idx_dev_ = bust_idx; jacf_idx_dev_ = jacf_idx; jact_idx_dev_ = jact_idx; + hesssp_idx_dev_ = hesssp_idx; if (opflow->nlinesmon) { gineqidx_dev_ = gineqidx; gbineqidx_dev_ = gbineqidx; @@ -286,6 +294,7 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { h_allocator_.deallocate(bust_idx); h_allocator_.deallocate(jacf_idx); h_allocator_.deallocate(jact_idx); + h_allocator_.deallocate(hesssp_idx); if (opflow->nlinesmon) { h_allocator_.deallocate(gineqidx); @@ -315,6 +324,7 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) { d_allocator_.deallocate(bust_idx_dev_); d_allocator_.deallocate(jacf_idx_dev_); d_allocator_.deallocate(jact_idx_dev_); + d_allocator_.deallocate(hesssp_idx_dev_); if (opflow->nlinesmon) { d_allocator_.deallocate(gineqidx_dev_); @@ -368,6 +378,7 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { bust_idx = paramAlloc(h_allocator_, nlineON); jacf_idx = paramAlloc(h_allocator_, nlineON); jact_idx = paramAlloc(h_allocator_, nlineON); + hesssp_idx = paramAlloc(h_allocator_, nlineON); if (opflow->nlinesmon) { linelimidx = paramAlloc(h_allocator_, nlinelim); @@ -416,6 +427,7 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { bust_idx[linei] = ps->busext2intmap[line->tbus]; jacf_idx[linei] = 0; jact_idx[linei] = 0; + hesssp_idx[linei] = 0; if (j < opflow->nlinesmon && opflow->linesmon[j] == i) { gbineqidx[j] = opflow->nconeq + line->startineqloc; @@ -451,6 +463,7 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) { bust_idx_dev_ = paramAlloc(d_allocator_, nlineON); jacf_idx_dev_ = paramAlloc(d_allocator_, nlineON); jact_idx_dev_ = paramAlloc(d_allocator_, nlineON); + hesssp_idx_dev_ = paramAlloc(d_allocator_, nlineON); if (opflow->nconineq) { gineqidx_dev_ = paramAlloc(d_allocator_, nlinelim); diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index 1c9771b3..38037552 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -201,6 +201,7 @@ struct LINEParamsRajaHiop { int *jacf_idx; /* Location number in the sparse Jacobian (from) */ int *jact_idx; /* Location number in the sparse Jacobian (to) */ int *jac_ieq_idx;/* Location number in sparse inequality Jacobian */ + int *hesssp_idx; /* Location number in sparse Hessian */ // Device data double *Gff_dev_; /* From side self conductance */ @@ -230,6 +231,7 @@ struct LINEParamsRajaHiop { int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */ int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */ int *jac_ieq_idx_dev_;/* Location number in sparse inequality Jacobian */ + int *hesssp_idx_dev_; /* Location number in sparse Hessian */ int allocate(OPFLOW); int destroy(OPFLOW); From 8a2c4d3bed94ff0b8c2280244cbc09ac47d9ce05 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Thu, 9 Nov 2023 12:45:29 -0800 Subject: [PATCH 23/35] Attempt to count Hessian non-zeros (wrong, of course) --- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 56 ++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index 8e25aad9..6599d57d 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -222,7 +222,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; /* Need to compute the number of nonzeros in equality, inequality constraint - * Jacobians and Hessian */ + * Jacobians */ int nnz_eqjac = 0, nnz_ineqjac = 0; // Find nonzero entries in equality constraint Jacobian by row. Using @@ -360,7 +360,59 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { std::cout << "Inequality Jacobian nonzero count: " << nnz_ineqjac << std::endl; - // opflow->nnz_eqjacsp = nnz_eqjac; + int nnz_hesssp = 0; + + for (int ibus = 0; ibus < ps->nbus; ++ibus) { + + // reserve 2 real and 2 reactive entries for each bus + + busparams->hesssp_idx[ibus] = nnz_hesssp; + nnz_hesssp += 4; + + if (opflow->include_powerimbalance_variables) { + nnz_hesssp += 2; + } + + } + + for (int i = 0, igen = 0; i < ps->ngen; ++i) { + PSGEN gen = &(ps->gen[i]); + + if (!gen->status) + continue; + + genparams->hesssp_idx[igen] = nnz_hesssp; + nnz_hesssp += 1; + + if (opflow->has_gensetpoint) { + if (gen->isrenewable) + continue; + + if (opflow->use_agc) { + nnz_hesssp += 5; + } + } + if (opflow->genbusvoltagetype == FIXED_WITHIN_QBOUNDS) { + nnz_hesssp += 2; + } + igen++; + } + + for (int iline=0; iline < ps->nline; ++iline) { + // reserve 8 entries for each line (used twice) + lineparams->hesssp_idx[iline] = nnz_hesssp; + nnz_hesssp += 8; + } + + if (opflow->include_loadloss_variables) { + for (int iload = 0; iload < ps->nload; ++iload) { + loadparams->hesssp_idx[iload] = nnz_hesssp; + nnz_hesssp += 2; + } + } + + std::cout << "Hessian nonzero count: " << nnz_hesssp << std::endl; + ierr = busparams->copy(opflow); ierr = genparams->copy(opflow); From eaa1cf089c8941f6b9cda229b074c8eb27650158 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 14 Nov 2023 07:01:41 -0800 Subject: [PATCH 24/35] Temporarily capture and report Hessian row/col indexes used --- src/opflow/model/power_bal_polar/pbpol.cpp | 80 ++++++++++++++++------ 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/src/opflow/model/power_bal_polar/pbpol.cpp b/src/opflow/model/power_bal_polar/pbpol.cpp index fbf41c3d..57c035bb 100644 --- a/src/opflow/model/power_bal_polar/pbpol.cpp +++ b/src/opflow/model/power_bal_polar/pbpol.cpp @@ -1,3 +1,6 @@ +#include +#include + #include "pbpol.h" #include "exago_config.h" #include @@ -1611,6 +1614,23 @@ PetscErrorCode OPFLOWModelSetNumConstraints_PBPOL(OPFLOW opflow, PetscFunctionReturn(0); } + +static PetscErrorCode +MatSetValues_and_Print(Mat M, int nrow, int row[], int ncol, int col[], + PetscScalar val[], InsertMode mode) +{ + for (int r = 0; r < nrow; ++r) { + for (int c = 0; c < ncol; ++c) { + std::cout << "M: " + << std::setw(5) << std::right << row[r] << " " + << std::setw(5) << std::right << col[c] + << std::endl; + } + } + return MatSetValues(M, nrow, row, ncol, col, val, mode); +} + + /* OPFLOWComputeEqualityConstraintsHessian - Computes the Hessian for the equality constraints function part @@ -1662,7 +1682,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, col[0] = xloc + 1; val[0] = lambda[gloc] * 2 * bus->gl + lambda[gloc + 1] * (-2 * bus->bl); - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); ierr = PSBUSGetSupportingLines(bus, &nconnlines, &connlines); @@ -1806,7 +1827,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPf_dVmf_dVmt + lambda[gloc + 1] * dQf_dVmf_dVmt; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xloct; @@ -1837,7 +1859,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPf_dVmt_dVmt + lambda[gloc + 1] * dQf_dVmt_dVmt; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); } else { @@ -1945,7 +1968,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPt_dVmt_dVmf + lambda[gloc + 1] * dQt_dVmt_dVmf; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocf; @@ -1976,7 +2000,8 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[7] = lambda[gloc] * dPt_dVmf_dVmf + lambda[gloc + 1] * dQt_dVmf_dVmf; - ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2064,20 +2089,23 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[1] = -lambda[gloc] - lambda[gloc + 1]; val[2] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); - ierr = MatSetValues(H, 1, row, 3, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 3, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 3, col, val, ADD_VALUES); // df1_ddelPg = -(Pg - gen->pt); // df2_ddelPg = gen->pb - Pg; row[0] = gen->startxpdevloc; val[0] = -lambda[gloc] - lambda[gloc + 1]; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); // df1_ddelP = gen->apf*(Pg - gen->pt); // df2_ddelP = -gen->apf*(gen->pb - Pg); row[0] = ps->startxloc; val[0] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2113,14 +2141,16 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[0] = -( lambda[gloc] + lambda[gloc + 1]); // lam_eq1*d2eq1_dQg_dV + lam_eq2*d2eq2_dQg_dV - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xloc + 1; col[0] = loc + 1; val[0] = -( lambda[gloc] + lambda[gloc + 1]); // lam_eq1* d2eq1_dQg_dV + lam_eq2*d2eq2_dV_dQg - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2406,7 +2436,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[3] = lambda[gloc] * d2Sf2_dthetaf_dVmt + lambda[gloc + 1] * d2St2_dthetaf_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, @@ -2450,7 +2481,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc + 1] * d2St2_dVmf_dthetat; val[3] = lambda[gloc] * d2Sf2_dVmf_dVmt + lambda[gloc + 1] * d2St2_dVmf_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dthetat_dthetaf, d2Sf2_dthetat_dVmf, @@ -2500,7 +2532,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[3] = lambda[gloc] * d2Sf2_dthetat_dVmt + lambda[gloc + 1] * d2St2_dthetat_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dVmt_dthetaf, d2Sf2_dVmt_dVmf, d2Sf2_dVmt_dthetat, @@ -2546,7 +2579,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[3] = lambda[gloc] * d2Sf2_dVmt_dVmt + lambda[gloc + 1] * d2St2_dVmt_dVmt; - ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); // Must be inside for loop since there's a continue condition flps += (185 + (16 * EXAGO_FLOPS_SINOP) + (16 * EXAGO_FLOPS_COSOP)); @@ -2608,14 +2642,16 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, col[0] = xlocglob; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocglob + 1; col[0] = xlocglob + 1; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } @@ -2633,7 +2669,8 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, val[0] = weight * obj_factor * 2.0 * gen->cost_alpha * ps->MVAbase * ps->MVAbase; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); flps += 4; } else if (opflow->objectivetype == MIN_GENSETPOINT_DEVIATION) { @@ -2641,7 +2678,8 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, row[0] = xlocglob; col[0] = xlocglob; val[0] = weight * obj_factor * 2.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); flps += 1; @@ -2659,13 +2697,15 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, row[0] = xlocglob; col[0] = xlocglob; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocglob + 1; col[0] = xlocglob + 1; val[0] = 0.0; - ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } From d24178100084fc1eb6e81b95dadfffcc6e210785 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 14 Nov 2023 07:02:51 -0800 Subject: [PATCH 25/35] GPU Hessian indexes match model but not resulting matrix? --- .../pbpolrajahiopsparsekernels.cpp | 156 +++++++++++++++++- 1 file changed, 154 insertions(+), 2 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 5966b235..37001c13 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1465,6 +1465,11 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( int *jHSS_dev, double *MHSS_dev) { PbpolModelRajaHiop *pbpolrajahiopsparse = reinterpret_cast(opflow->model); + GENParamsRajaHiop *genparams = &pbpolrajahiopsparse->genparams; + LOADParamsRajaHiop *loadparams = &pbpolrajahiopsparse->loadparams; + BUSParamsRajaHiop *busparams = &pbpolrajahiopsparse->busparams; + LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; + PetscErrorCode ierr; PetscInt *iRow, *jCol; PetscScalar *x, *values, *lambda; @@ -1480,6 +1485,153 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (iHSS_dev != NULL && jHSS_dev != NULL) { + resmgr.memset(iHSS_dev, 0, opflow->nnz_hesssp*sizeof(int)); + resmgr.memset(jHSS_dev, 0, opflow->nnz_hesssp*sizeof(int)); + + // Bus contributions + + int *b_xidx = busparams->xidx_dev_; + int *b_xidxpimb = busparams->xidxpimb_dev_; + int *b_hesssp_idx = busparams->hesssp_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { + int off(0); + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + off++; + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + off++; + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + off++; + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + off++; + }); + + if (opflow->include_powerimbalance_variables) { + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { + int off(1); + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; + off++; + + iHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i] + 1; + jHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i] + 1; + }); + } + + /* Generator contributions for row,col numbers */ + int *g_xidx = genparams->xidx_dev_; + int *g_hesssp_idx = genparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + iHSS_dev[g_hesssp_idx[i]] = g_xidx[i]; + jHSS_dev[g_hesssp_idx[i]] = g_xidx[i]; + }); + + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *ln_hessp_idx = lineparams->hesssp_idx_dev_; + int *linelimidx = lineparams->linelimidx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int off(0); + int j = linelimidx[i]; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + // off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + // off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + // off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + // off++; + + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + // off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + off++; + }); + + /* Loadloss contributions - two contributions*/ + if (opflow->include_loadloss_variables) { + int *l_xidx = loadparams->xidx_dev_; + int *l_hesssp_idx = loadparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + iHSS_dev[l_hesssp_idx[i]] = l_xidx[i]; + jHSS_dev[l_hesssp_idx[i]] = l_xidx[i]; + iHSS_dev[l_hesssp_idx[i] + 1] = l_xidx[i] + 1; + jHSS_dev[l_hesssp_idx[i] + 1] = l_xidx[i] + 1; + }); + } + + if (debugmsg) + PrintTriplets("Hessian Indexes (GPU):", + opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, NULL); + if (debugmsg) std::cout << "Official Hessian nonzero count: " << opflow->nnz_hesssp << std::endl; @@ -1531,7 +1683,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (debugmsg) { PrintTriplets("Hessian Indexes:", - opflow->nnz_hesssp, iHSS_dev, jHSS_dev, NULL); + opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, NULL); } } else { @@ -1616,7 +1768,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (debugmsg) { PrintTriplets("Hessian Values:", - opflow->nnz_hesssp, iHSS_dev, jHSS_dev, MHSS_dev); + opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, MHSS_dev); } } From d15445bc44e81d6ac6a20a0b158d47922ad548c9 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 14 Nov 2023 11:07:34 -0800 Subject: [PATCH 26/35] First shot at GPU computed Hessian values (untested) --- .../pbpolrajahiopsparsekernels.cpp | 909 +++++++++++++++++- 1 file changed, 908 insertions(+), 1 deletion(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 37001c13..f7dc7e9d 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1687,7 +1687,914 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( } } else { - + + resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp*sizeof(double)); + + + // Bus contributions + + int *b_hesssp_idx = busparams->hesssp_idx_dev_; + int *b_gidx = busparams->gidx_dev_; + int *ispvpq = busparams->ispvpq_dev_; + double *gl = busparams->gl_dev_; + double *bl = busparams->bl_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { + // int row, col; + double val; + // row = b_xidx[i] + 1 - nxsparse; + // col = row; + val = ispvpq[i] * (lambda_dev[b_gidx[i]] * 2 * gl[i] + + lambda_dev[b_gidx[i] + 1] * (-2 * bl[i])); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[i] + 3], val); + }); + + if (opflow->objectivetype == MIN_GEN_COST) { + int *hesssp_idx = genparams->hesssp_idx_dev_; + double *cost_alpha = genparams->cost_alpha_dev_; + double obj_factor = opflow->obj_factor; + int isobj_gencost = opflow->obj_gencost; + double MVAbase = opflow->ps->MVAbase; + double weight = opflow->weight; + + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { + MHSS_dev[hesssp_idx[i]] = weight * isobj_gencost * obj_factor * + 2.0 * cost_alpha[i] * MVAbase * MVAbase; + }); + } else if (opflow->objectivetype == NO_OBJ) { + int *hesssp_idx = genparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, genparams->ngenON), + RAJA_LAMBDA(RAJA::Index_type i) { MHSS_dev[hesssp_idx[i]] = 0.0; }); + } + + // Line contributions + + double *Gff_arr = lineparams->Gff_dev_; + double *Gtt_arr = lineparams->Gtt_dev_; + double *Gft_arr = lineparams->Gft_dev_; + double *Gtf_arr = lineparams->Gtf_dev_; + + double *Bff_arr = lineparams->Bff_dev_; + double *Btt_arr = lineparams->Btt_dev_; + double *Bft_arr = lineparams->Bft_dev_; + double *Btf_arr = lineparams->Btf_dev_; + + int *busf_idx = lineparams->busf_idx_dev_; + int *bust_idx = lineparams->bust_idx_dev_; + int *xidxf = lineparams->xidxf_dev_; + int *xidxt = lineparams->xidxt_dev_; + int *geqidxf = lineparams->geqidxf_dev_; + int *geqidxt = lineparams->geqidxt_dev_; + int *ln_hessp_idx = lineparams->hesssp_idx_dev_; + + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlineON), + RAJA_LAMBDA(RAJA::Index_type i) { + int gloc; + // int row[2], col[4]; + double val[8]; + double Gff, Bff, Gft, Bft, Gtf, Btf, Gtt, Btt; + int fbusidx(busf_idx[i]), tbusidx(bust_idx[i]); + Gff = Gff_arr[i]; + Bff = Bff_arr[i]; + Gft = Gft_arr[i]; + Bft = Bft_arr[i]; + Gtf = Gtf_arr[i]; + Btf = Btf_arr[i]; + Gtt = Gtt_arr[i]; + Btt = Btt_arr[i]; + + double thetaf = x_dev[xidxf[i]], Vmf = x_dev[xidxf[i] + 1]; + double thetat = x_dev[xidxt[i]], Vmt = x_dev[xidxt[i] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + + double dPf_dthetaf_dthetaf, dPf_dthetaf_dVmf, dPf_dthetaf_dthetat, + dPf_dthetaf_dVmt; + double dPf_dVmf_dthetaf, dPf_dVmf_dVmf, dPf_dVmf_dthetat, dPf_dVmf_dVmt; + double dPf_dthetat_dthetaf, dPf_dthetat_dVmf, dPf_dthetat_dthetat, + dPf_dthetat_dVmt; + double dPf_dVmt_dthetaf, dPf_dVmt_dVmf, dPf_dVmt_dthetat, dPf_dVmt_dVmt; + + /* dPf_dthetaf = Vmf*Vmt*(-Gft*sin(thetaft) + Bft*cos(thetaft)); */ + dPf_dthetaf_dthetaf = + -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetaf_dVmf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dthetaf_dthetat = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetaf_dVmt = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + + /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ + dPf_dVmf_dthetaf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf_dVmf = 2 * Gff; + dPf_dVmf_dthetat = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmf_dVmt = (Gft * cos(thetaft) + Bft * sin(thetaft)); + + /* dPf_dthetat = Vmf*Vmt*(Gft*sin(thetaft) - Bft*cos(thetaft)); */ + dPf_dthetat_dthetaf = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetat_dVmf = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dthetat_dthetat = + Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); + dPf_dthetat_dVmt = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + + /* dPf_dVmt = Vmf*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ + dPf_dVmt_dthetaf = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmt_dVmf = (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dVmt_dthetat = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmt_dVmt = 0.0; + + double dQf_dthetaf_dthetaf, dQf_dthetaf_dVmf, dQf_dthetaf_dthetat, + dQf_dthetaf_dVmt; + double dQf_dVmf_dthetaf, dQf_dVmf_dVmf, dQf_dVmf_dthetat, dQf_dVmf_dVmt; + double dQf_dthetat_dthetaf, dQf_dthetat_dVmf, dQf_dthetat_dthetat, + dQf_dthetat_dVmt; + double dQf_dVmt_dthetaf, dQf_dVmt_dVmf, dQf_dVmt_dthetat, dQf_dVmt_dVmt; + + /* dQf_dthetaf = Vmf*Vmt*(Bft*sin(thetaft) + Gft*cos(thetaft)); */ + dQf_dthetaf_dthetaf = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + dQf_dthetaf_dVmf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dthetaf_dthetat = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetaf_dVmt = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + + /* dQf_dVmf = -2*Bff*Vmf + Vmt*(-Bft*cos(thetaft) + Gft*sin(thetaft)); + */ + dQf_dVmf_dthetaf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmf_dVmf = -2 * Bff; + dQf_dVmf_dthetat = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmf_dVmt = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + /* dQf_dthetat = Vmf*Vmt*(-Bft*sin(thetaft) - Gft*cos(thetaft)); */ + dQf_dthetat_dthetaf = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat_dVmf = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dthetat_dthetat = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + dQf_dthetat_dVmt = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + + /* dQf_dVmt = Vmf*(-Bft*cos(thetaft) + Gft*sin(thetaft)); */ + dQf_dVmt_dthetaf = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmt_dVmf = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dVmt_dthetat = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmt_dVmt = 0.0; + + // row[0] = xidxf[i] - nxsparse; + // row[1] = xidxf[i] + 1 - nxsparse; + // col[0] = xidxf[i] - nxsparse; + // col[1] = xidxf[i] + 1 - nxsparse; + // col[2] = xidxt[i] - nxsparse; + // col[3] = xidxt[i] + 1 - nxsparse; + + gloc = geqidxf[i]; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = + 0.0; + + val[0] = lambda_dev[gloc] * dPf_dthetaf_dthetaf + + lambda_dev[gloc + 1] * dQf_dthetaf_dthetaf; + val[1] = lambda_dev[gloc] * dPf_dthetaf_dVmf + + lambda_dev[gloc + 1] * dQf_dthetaf_dVmf; + val[2] = lambda_dev[gloc] * dPf_dthetaf_dthetat + + lambda_dev[gloc + 1] * dQf_dthetaf_dthetat; + val[3] = lambda_dev[gloc] * dPf_dthetaf_dVmt + + lambda_dev[gloc + 1] * dQf_dthetaf_dVmt; + + val[4] = lambda_dev[gloc] * dPf_dVmf_dthetaf + + lambda_dev[gloc + 1] * dQf_dVmf_dthetaf; + val[5] = lambda_dev[gloc] * dPf_dVmf_dVmf + + lambda_dev[gloc + 1] * dQf_dVmf_dVmf; + val[6] = lambda_dev[gloc] * dPf_dVmf_dthetat + + lambda_dev[gloc + 1] * dQf_dVmf_dthetat; + val[7] = lambda_dev[gloc] * dPf_dVmf_dVmt + + lambda_dev[gloc + 1] * dQf_dVmf_dVmt; + + // Remember central bus locations were reserved and indexed + // by bus (from-from) + + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 3], val[5]); + + // Off-center entries (from-to bus) were reserved and + // indexed by line + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + + // row[0] = xidxt[i] - nxsparse; + // row[1] = xidxt[i] + 1 - nxsparse; + + // col[0] = xidxf[i] - nxsparse; + // col[1] = xidxf[i] + 1 - nxsparse; + // col[2] = xidxt[i] - nxsparse; + // col[3] = xidxt[i] + 1 - nxsparse; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = + 0.0; + + val[0] = lambda_dev[gloc] * dPf_dthetat_dthetaf + + lambda_dev[gloc + 1] * dQf_dthetat_dthetaf; + val[1] = lambda_dev[gloc] * dPf_dthetat_dVmf + + lambda_dev[gloc + 1] * dQf_dthetat_dVmf; + val[2] = lambda_dev[gloc] * dPf_dthetat_dthetat + + lambda_dev[gloc + 1] * dQf_dthetat_dthetat; + val[3] = lambda_dev[gloc] * dPf_dthetat_dVmt + + lambda_dev[gloc + 1] * dQf_dthetat_dVmt; + + val[4] = lambda_dev[gloc] * dPf_dVmt_dthetaf + + lambda_dev[gloc + 1] * dQf_dVmt_dthetaf; + val[5] = lambda_dev[gloc] * dPf_dVmt_dVmf + + lambda_dev[gloc + 1] * dQf_dVmt_dVmf; + val[6] = lambda_dev[gloc] * dPf_dVmt_dthetat + + lambda_dev[gloc + 1] * dQf_dVmt_dthetat; + val[7] = lambda_dev[gloc] * dPf_dVmt_dVmt + + lambda_dev[gloc + 1] * dQf_dVmt_dVmt; + + + // Remember central bus locations were reserved and indexed + // by bus (to-to) + + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[2]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[3]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[6]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 3], val[7]); + + // Off-center entries (to-from bus) were reserved and + // indexed by line + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 4], val[0]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 5], val[1]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 6], val[4]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 7], val[5]); + + // ierr = MatSetValues(H,2,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + + double dPt_dthetat_dthetat, dPt_dthetat_dVmt, dPt_dthetat_dthetaf, + dPt_dthetat_dVmf; + double dPt_dVmt_dthetat, dPt_dVmt_dVmt, dPt_dVmt_dthetaf, dPt_dVmt_dVmf; + double dPt_dthetaf_dthetat, dPt_dthetaf_dVmt, dPt_dthetaf_dthetaf, + dPt_dthetaf_dVmf; + double dPt_dVmf_dthetat, dPt_dVmf_dVmt, dPt_dVmf_dthetaf, dPt_dVmf_dVmf; + + /* dPt_dthetat = Vmf*Vmt*(-Gtf*sin(thetatf) + Btf*cos(thetatf)); */ + dPt_dthetat_dthetat = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + dPt_dthetat_dVmt = Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dthetat_dthetaf = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetat_dVmf = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + + /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ + dPt_dVmt_dthetat = Vmf * (-Gtf * sin(thetatf) + Bft * cos(thetatf)); + dPt_dVmt_dVmt = 2 * Gtt; + dPt_dVmt_dthetaf = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmt_dVmf = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + /* dPt_dthetaf = Vmf*Vmt*(Gtf*sin(thetatf) - Btf*cos(thetatf)); */ + dPt_dthetaf_dthetat = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetaf_dVmt = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dthetaf_dthetaf = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + dPt_dthetaf_dVmf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + + /* dPt_dVmf = Vmt*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ + dPt_dVmf_dthetat = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmf_dVmt = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dVmf_dthetaf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmf_dVmf = 0.0; + + double dQt_dthetaf_dthetaf, dQt_dthetaf_dVmf, dQt_dthetaf_dthetat, + dQt_dthetaf_dVmt; + double dQt_dVmf_dthetaf, dQt_dVmf_dVmf, dQt_dVmf_dthetat, dQt_dVmf_dVmt; + double dQt_dthetat_dthetaf, dQt_dthetat_dVmf, dQt_dthetat_dthetat, + dQt_dthetat_dVmt; + double dQt_dVmt_dthetaf, dQt_dVmt_dVmf, dQt_dVmt_dthetat, dQt_dVmt_dVmt; + + /* dQt_dthetat = Vmf*Vmt*(Btf*sin(thetatf) + Gtf*cos(thetatf)); */ + dQt_dthetat_dthetat = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + dQt_dthetat_dVmt = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dthetat_dthetaf = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetat_dVmf = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + + /* dQt_dVmt = -2*Btt*Vmt + Vmf*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); + */ + dQt_dVmt_dthetat = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt_dVmt = -2 * Btt; + dQt_dVmt_dthetaf = Vmf * (-Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt_dVmf = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + /* dQt_dthetaf = Vmf*Vmt*(-Btf*sin(thetatf) - Gtf*cos(thetatf)); */ + dQt_dthetaf_dthetat = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf_dVmt = Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dthetaf_dthetaf = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + dQt_dthetaf_dVmf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + + /* dQt_dVmf = Vmt*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); */ + dQt_dVmf_dthetat = Vmt * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmf_dVmt = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dVmf_dthetaf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dVmf_dVmf = 0.0; + + // row[0] = xidxt[i] - nxsparse; + // row[1] = xidxt[i] + 1 - nxsparse; + // col[0] = xidxt[i] - nxsparse; + // col[1] = xidxt[i] + 1 - nxsparse; + // col[2] = xidxf[i] - nxsparse; + // col[3] = xidxf[i] + 1 - nxsparse; + + gloc = geqidxt[i]; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = + 0.0; + + val[0] = lambda_dev[gloc] * dPt_dthetat_dthetat + + lambda_dev[gloc + 1] * dQt_dthetat_dthetat; + val[1] = lambda_dev[gloc] * dPt_dthetat_dVmt + + lambda_dev[gloc + 1] * dQt_dthetat_dVmt; + val[2] = lambda_dev[gloc] * dPt_dthetat_dthetaf + + lambda_dev[gloc + 1] * dQt_dthetat_dthetaf; + val[3] = lambda_dev[gloc] * dPt_dthetat_dVmf + + lambda_dev[gloc + 1] * dQt_dthetat_dVmf; + + val[4] = lambda_dev[gloc] * dPt_dVmt_dthetat + + lambda_dev[gloc + 1] * dQt_dVmt_dthetat; + val[5] = lambda_dev[gloc] * dPt_dVmt_dVmt + + lambda_dev[gloc + 1] * dQt_dVmt_dVmt; + val[6] = lambda_dev[gloc] * dPt_dVmt_dthetaf + + lambda_dev[gloc + 1] * dQt_dVmt_dthetaf; + val[7] = lambda_dev[gloc] * dPt_dVmt_dVmf + + lambda_dev[gloc + 1] * dQt_dVmt_dVmf; + + // to-to bus entries + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[1]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[4]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[tbusidx] + 3], val[5]); + + // off-center to-from entries + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 4], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 5], val[3]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 6], val[6]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 7], val[7]); + + // row[0] = xidxf[i] - nxsparse; + // row[1] = xidxf[i] + 1 - nxsparse; + // col[0] = xidxt[i] - nxsparse; + // col[1] = xidxt[i] + 1 - nxsparse; + // col[2] = xidxf[i] - nxsparse; + // col[3] = xidxf[i] + 1 - nxsparse; + + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = + 0.0; + + val[0] = lambda_dev[gloc] * dPt_dthetaf_dthetat + + lambda_dev[gloc + 1] * dQt_dthetaf_dthetat; + val[1] = lambda_dev[gloc] * dPt_dthetaf_dVmt + + lambda_dev[gloc + 1] * dQt_dthetaf_dVmt; + val[2] = lambda_dev[gloc] * dPt_dthetaf_dthetaf + + lambda_dev[gloc + 1] * dQt_dthetaf_dthetaf; + val[3] = lambda_dev[gloc] * dPt_dthetaf_dVmf + + lambda_dev[gloc + 1] * dQt_dthetaf_dVmf; + + val[4] = lambda_dev[gloc] * dPt_dVmf_dthetat + + lambda_dev[gloc + 1] * dQt_dVmf_dthetat; + val[5] = lambda_dev[gloc] * dPt_dVmf_dVmt + + lambda_dev[gloc + 1] * dQt_dVmf_dVmt; + val[6] = lambda_dev[gloc] * dPt_dVmf_dthetaf + + lambda_dev[gloc + 1] * dQt_dVmf_dthetaf; + val[7] = lambda_dev[gloc] * dPt_dVmf_dVmf + + lambda_dev[gloc + 1] * dQt_dVmf_dVmf; + + // from-from bus entries + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); + RAJA::atomicAdd + (&MHSS_dev[b_hesssp_idx[fbusidx] + 3], val[5]); + + // off-center from-to entries + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + }); + + + + + /* Loadloss contributions - 2 contributions expected */ + if (opflow->include_loadloss_variables) { + int *l_hesssp_idx = loadparams->hesssp_idx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, loadparams->nload), + RAJA_LAMBDA(RAJA::Index_type i) { + MHSS_dev[l_hesssp_idx[i]] = 0.0; + MHSS_dev[l_hesssp_idx[i] + 1] = 0.0; + }); + } + + if (!opflow->ignore_lineflow_constraints) { + int *linelimidx = lineparams->linelimidx_dev_; + int *gineqidx = lineparams->gineqidx_dev_; + RAJA::forall( + RAJA::RangeSegment(0, lineparams->nlinelim), + RAJA_LAMBDA(RAJA::Index_type i) { + int j = linelimidx[i]; + int gloc; + // int row[2], col[4]; + double val[8]; + + double Pf, Qf, Pt, Qt; + double thetaf = x_dev[xidxf[j]], Vmf = x_dev[xidxf[j] + 1]; + double thetat = x_dev[xidxt[j]], Vmt = x_dev[xidxt[j] + 1]; + double thetaft = thetaf - thetat; + double thetatf = thetat - thetaf; + double Gff = Gff_arr[j], Bff = Bff_arr[j]; + double Gft = Gft_arr[j], Bft = Bft_arr[j]; + double Gtf = Gtf_arr[j], Btf = Btf_arr[j]; + double Gtt = Gtt_arr[j], Btt = Btt_arr[j]; + int fbusidx(busf_idx[i]), tbusidx(bust_idx[i]); + + Pf = Gff * Vmf * Vmf + + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Qf = -Bff * Vmf * Vmf + + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + Pt = Gtt * Vmt * Vmt + + Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Qt = -Btt * Vmt * Vmt + + Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + double dSf2_dPf, dSf2_dQf, dSt2_dPt, dSt2_dQt; + + dSf2_dPf = 2. * Pf; + dSf2_dQf = 2. * Qf; + dSt2_dPt = 2. * Pt; + dSt2_dQt = 2. * Qt; + + double dPf_dthetaf, dPf_dVmf, dPf_dthetat, dPf_dVmt; + double dQf_dthetaf, dQf_dVmf, dQf_dthetat, dQf_dVmt; + double dPt_dthetaf, dPt_dVmf, dPt_dthetat, dPt_dVmt; + double dQt_dthetaf, dQt_dVmf, dQt_dthetat, dQt_dVmt; + + dPf_dthetaf = Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf = + 2. * Gff * Vmf + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetat = Vmf * Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + dPf_dVmt = Vmf * (Gft * cos(thetaft) + Bft * sin(thetaft)); + + dQf_dthetaf = Vmf * Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmf = + -2. * Bff * Vmf + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat = Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmt = Vmf * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + dPt_dthetat = Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmt = + 2. * Gtt * Vmt + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetaf = Vmt * Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + dPt_dVmf = Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + dQt_dthetat = Vmt * Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt = + -2. * Btt * Vmt + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf = Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dVmf = Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + double d2Pf_dthetaf_dthetaf, d2Pf_dthetaf_dVmf, d2Pf_dthetaf_dthetat, + d2Pf_dthetaf_dVmt; + double d2Pf_dVmf_dthetaf, d2Pf_dVmf_dVmf, d2Pf_dVmf_dthetat, + d2Pf_dVmf_dVmt; + double d2Pf_dthetat_dthetaf, d2Pf_dthetat_dVmf, d2Pf_dthetat_dthetat, + d2Pf_dthetat_dVmt; + double d2Pf_dVmt_dthetaf, d2Pf_dVmt_dVmf, d2Pf_dVmt_dthetat, + d2Pf_dVmt_dVmt; + + /* dPf_dthetaf = Vmf*Vmt*(-Gft*sin(thetaft) + Bft*cos(thetaft)); */ + d2Pf_dthetaf_dthetaf = + -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetaf_dVmf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + d2Pf_dthetaf_dthetat = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetaf_dVmt = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + + /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ + d2Pf_dVmf_dthetaf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + d2Pf_dVmf_dVmf = 2 * Gff; + d2Pf_dVmf_dthetat = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + d2Pf_dVmf_dVmt = (Gft * cos(thetaft) + Bft * sin(thetaft)); + + /* dPf_dthetat = Vmf*Vmt*(Gft*sin(thetaft) - Bft*cos(thetaft)); */ + d2Pf_dthetat_dthetaf = + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetat_dVmf = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + d2Pf_dthetat_dthetat = + Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); + d2Pf_dthetat_dVmt = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + + /* dPf_dVmt = Vmf*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ + d2Pf_dVmt_dthetaf = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + d2Pf_dVmt_dVmf = (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dVmt_dthetat = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); + d2Pf_dVmt_dVmt = 0.0; + + double d2Qf_dthetaf_dthetaf, d2Qf_dthetaf_dVmf, d2Qf_dthetaf_dthetat, + d2Qf_dthetaf_dVmt; + double d2Qf_dVmf_dthetaf, d2Qf_dVmf_dVmf, d2Qf_dVmf_dthetat, + d2Qf_dVmf_dVmt; + double d2Qf_dthetat_dthetaf, d2Qf_dthetat_dVmf, d2Qf_dthetat_dthetat, + d2Qf_dthetat_dVmt; + double d2Qf_dVmt_dthetaf, d2Qf_dVmt_dVmf, d2Qf_dVmt_dthetat, + d2Qf_dVmt_dVmt; + + /* dQf_dthetaf = Vmf*Vmt*(Bft*sin(thetaft) + Gft*cos(thetaft)); */ + d2Qf_dthetaf_dthetaf = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + d2Qf_dthetaf_dVmf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + d2Qf_dthetaf_dthetat = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + d2Qf_dthetaf_dVmt = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + + /* dQf_dVmf = -2*Bff*Vmf + Vmt*(-Bft*cos(thetaft) + Gft*sin(thetaft)); + */ + d2Qf_dVmf_dthetaf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + d2Qf_dVmf_dVmf = -2 * Bff; + d2Qf_dVmf_dthetat = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dVmf_dVmt = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + + /* dQf_dthetat = Vmf*Vmt*(-Bft*sin(thetaft) - Gft*cos(thetaft)); */ + d2Qf_dthetat_dthetaf = + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + d2Qf_dthetat_dVmf = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dthetat_dthetat = + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + d2Qf_dthetat_dVmt = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + + /* dQf_dVmt = Vmf*(-Bft*cos(thetaft) + Gft*sin(thetaft)); */ + d2Qf_dVmt_dthetaf = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); + d2Qf_dVmt_dVmf = (-Bft * cos(thetaft) + Gft * sin(thetaft)); + d2Qf_dVmt_dthetat = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dVmt_dVmt = 0.0; + + double d2Pt_dthetat_dthetat, d2Pt_dthetat_dVmt, d2Pt_dthetat_dthetaf, + d2Pt_dthetat_dVmf; + double d2Pt_dVmt_dthetat, d2Pt_dVmt_dVmt, d2Pt_dVmt_dthetaf, + d2Pt_dVmt_dVmf; + double d2Pt_dthetaf_dthetat, d2Pt_dthetaf_dVmt, d2Pt_dthetaf_dthetaf, + d2Pt_dthetaf_dVmf; + double d2Pt_dVmf_dthetat, d2Pt_dVmf_dVmt, d2Pt_dVmf_dthetaf, + d2Pt_dVmf_dVmf; + + /* dPt_dthetat = Vmf*Vmt*(-Gtf*sin(thetatf) + Btf*cos(thetatf)); */ + d2Pt_dthetat_dthetat = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + d2Pt_dthetat_dVmt = Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + d2Pt_dthetat_dthetaf = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + d2Pt_dthetat_dVmf = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + + /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ + d2Pt_dVmt_dthetat = Vmf * (-Gtf * sin(thetatf) + Bft * cos(thetatf)); + d2Pt_dVmt_dVmt = 2 * Gtt; + d2Pt_dVmt_dthetaf = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + d2Pt_dVmt_dVmf = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + + /* dPt_dthetaf = Vmf*Vmt*(Gtf*sin(thetatf) - Btf*cos(thetatf)); */ + d2Pt_dthetaf_dthetat = + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + d2Pt_dthetaf_dVmt = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + d2Pt_dthetaf_dthetaf = + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + d2Pt_dthetaf_dVmf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + + /* dPt_dVmf = Vmt*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ + d2Pt_dVmf_dthetat = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + d2Pt_dVmf_dVmt = (Gtf * cos(thetatf) + Btf * sin(thetatf)); + d2Pt_dVmf_dthetaf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + d2Pt_dVmf_dVmf = 0.0; + + double d2Qt_dthetaf_dthetaf, d2Qt_dthetaf_dVmf, d2Qt_dthetaf_dthetat, + d2Qt_dthetaf_dVmt; + double d2Qt_dVmf_dthetaf, d2Qt_dVmf_dVmf, d2Qt_dVmf_dthetat, + d2Qt_dVmf_dVmt; + double d2Qt_dthetat_dthetaf, d2Qt_dthetat_dVmf, d2Qt_dthetat_dthetat, + d2Qt_dthetat_dVmt; + double d2Qt_dVmt_dthetaf, d2Qt_dVmt_dVmf, d2Qt_dVmt_dthetat, + d2Qt_dVmt_dVmt; + + /* dQt_dthetat = Vmf*Vmt*(Btf*sin(thetatf) + Gtf*cos(thetatf)); */ + d2Qt_dthetat_dthetat = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + d2Qt_dthetat_dVmt = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dthetat_dthetaf = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + d2Qt_dthetat_dVmf = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + + /* dQt_dVmt = -2*Btt*Vmt + Vmf*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); + */ + d2Qt_dVmt_dthetat = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dVmt_dVmt = -2 * Btt; + d2Qt_dVmt_dthetaf = Vmf * (-Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dVmt_dVmf = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + + /* dQt_dthetaf = Vmf*Vmt*(-Btf*sin(thetatf) - Gtf*cos(thetatf)); */ + d2Qt_dthetaf_dthetat = + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + d2Qt_dthetaf_dVmt = Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + d2Qt_dthetaf_dthetaf = + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + d2Qt_dthetaf_dVmf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + + /* dQt_dVmf = Vmt*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); */ + d2Qt_dVmf_dthetat = Vmt * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dVmf_dVmt = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + d2Qt_dVmf_dthetaf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + d2Qt_dVmf_dVmf = 0.0; + + double d2Sf2_dthetaf_dthetaf = 0.0, d2Sf2_dthetaf_dVmf = 0.0, + d2Sf2_dthetaf_dthetat = 0.0, d2Sf2_dthetaf_dVmt = 0.0; + double d2St2_dthetaf_dthetaf = 0.0, d2St2_dthetaf_dVmf = 0.0, + d2St2_dthetaf_dthetat = 0.0, d2St2_dthetaf_dVmt = 0.0; + + d2Sf2_dthetaf_dthetaf = + 2 * dPf_dthetaf * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dthetaf + + 2 * dQf_dthetaf * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dthetaf; + d2Sf2_dthetaf_dVmf = + 2 * dPf_dVmf * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmf + + 2 * dQf_dVmf * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmf; + d2Sf2_dthetaf_dthetat = + 2 * dPf_dthetat * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dthetat + + 2 * dQf_dthetat * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dthetat; + d2Sf2_dthetaf_dVmt = + 2 * dPf_dVmt * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmt + + 2 * dQf_dVmt * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmt; + + d2St2_dthetaf_dthetaf = + 2 * dPt_dthetaf * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dthetaf + + 2 * dQt_dthetaf * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dthetaf; + d2St2_dthetaf_dVmf = + 2 * dPt_dVmf * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmf + + 2 * dQt_dVmf * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmf; + d2St2_dthetaf_dthetat = + 2 * dPt_dthetat * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dthetat + + 2 * dQt_dthetat * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dthetat; + d2St2_dthetaf_dVmt = + 2 * dPt_dVmt * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmt + + 2 * dQt_dVmt * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + + // row[0] = xidxf[j] - nxsparse; + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + gloc = gineqidx[i]; + + val[0] = lambda_dev[gloc] * d2Sf2_dthetaf_dthetaf + + lambda_dev[gloc + 1] * d2St2_dthetaf_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dthetaf_dVmf + + lambda_dev[gloc + 1] * d2St2_dthetaf_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dthetaf_dthetat + + lambda_dev[gloc + 1] * d2St2_dthetaf_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dthetaf_dVmt + + lambda_dev[gloc + 1] * d2St2_dthetaf_dVmt; + + RAJA::atomicAdd + (&MHSS_dev[fbusidx + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[fbusidx + 1], val[1]); + + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + + double d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, + d2Sf2_dVmf_dVmt; + double d2St2_dVmf_dthetaf, d2St2_dVmf_dVmf, d2St2_dVmf_dthetat, + d2St2_dVmf_dVmt; + + d2Sf2_dVmf_dthetaf = + 2 * dPf_dthetaf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetaf + + 2 * dQf_dthetaf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetaf; + d2Sf2_dVmf_dVmf = 2 * dPf_dVmf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmf + + 2 * dQf_dVmf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmf; + d2Sf2_dVmf_dthetat = + 2 * dPf_dthetat * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetat + + 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetat; + d2Sf2_dVmf_dVmt = 2 * dPf_dVmt * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmt + + 2 * dQf_dVmt * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmt; + + d2St2_dVmf_dthetaf = + 2 * dPt_dthetaf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetaf + + 2 * dQt_dthetaf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetaf; + d2St2_dVmf_dVmf = 2 * dPt_dVmf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmf + + 2 * dQt_dVmf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmf; + d2St2_dVmf_dthetat = + 2 * dPt_dthetat * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetat + + 2 * dQt_dthetat * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetat; + d2St2_dVmf_dVmt = 2 * dPt_dVmt * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmt + + 2 * dQt_dVmt * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + // row[0] = xidxf[j] + 1 - nxsparse; + + val[0] = lambda_dev[gloc] * d2Sf2_dVmf_dthetaf + + lambda_dev[gloc + 1] * d2St2_dVmf_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dVmf_dVmf + + lambda_dev[gloc + 1] * d2St2_dVmf_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dVmf_dthetat + + lambda_dev[gloc + 1] * d2St2_dVmf_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dVmf_dVmt + + lambda_dev[gloc + 1] * d2St2_dVmf_dVmt; + + RAJA::atomicAdd + (&MHSS_dev[fbusidx + 2], val[0]); + RAJA::atomicAdd + (&MHSS_dev[fbusidx + 3], val[1]); + + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[3]); + + // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + + double d2Sf2_dthetat_dthetaf, d2Sf2_dthetat_dVmf, d2Sf2_dthetat_dthetat, + d2Sf2_dthetat_dVmt; + double d2St2_dthetat_dthetaf, d2St2_dthetat_dVmf, d2St2_dthetat_dthetat, + d2St2_dthetat_dVmt; + + d2Sf2_dthetat_dthetaf = + 2 * dPf_dthetaf * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dthetaf + + 2 * dQf_dthetat * dQf_dthetaf + dSf2_dQf * d2Qf_dthetat_dthetaf; + d2Sf2_dthetat_dVmf = + 2 * dPf_dVmf * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmf + + 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dthetat_dVmf; + d2Sf2_dthetat_dthetat = + 2 * dPf_dthetat * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dthetat + + 2 * dQf_dthetat * dQf_dthetat + dSf2_dQf * d2Qf_dthetat_dthetat; + d2Sf2_dthetat_dVmt = + 2 * dPf_dVmt * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmt + + 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dthetat_dVmt; + + d2St2_dthetat_dthetaf = + 2 * dPt_dthetaf * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dthetaf + + 2 * dQt_dthetaf * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dthetaf; + d2St2_dthetat_dVmf = + 2 * dPt_dVmf * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmf + + 2 * dQt_dVmf * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmf; + d2St2_dthetat_dthetat = + 2 * dPt_dthetat * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dthetat + + 2 * dQt_dthetat * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dthetat; + d2St2_dthetat_dVmt = + 2 * dPt_dVmt * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmt + + 2 * dQt_dVmt * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + // row[0] = xidxt[j] - nxsparse; + + val[0] = lambda_dev[gloc] * d2Sf2_dthetat_dthetaf + + lambda_dev[gloc + 1] * d2St2_dthetat_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dthetat_dVmf + + lambda_dev[gloc + 1] * d2St2_dthetat_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dthetat_dthetat + + lambda_dev[gloc + 1] * d2St2_dthetat_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dthetat_dVmt + + lambda_dev[gloc + 1] * d2St2_dthetat_dVmt; + + RAJA::atomicAdd + (&MHSS_dev[tbusidx + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[tbusidx + 1], val[1]); + + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 4], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 5], val[3]); + + // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + + double d2Sf2_dVmt_dthetaf, d2Sf2_dVmt_dVmf, d2Sf2_dVmt_dthetat, + d2Sf2_dVmt_dVmt; + double d2St2_dVmt_dthetaf, d2St2_dVmt_dVmf, d2St2_dVmt_dthetat, + d2St2_dVmt_dVmt; + + d2Sf2_dVmt_dthetaf = + 2 * dPf_dthetaf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetaf + + 2 * dQf_dthetaf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetaf; + d2Sf2_dVmt_dVmf = 2 * dPf_dVmf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmf + + 2 * dQf_dVmf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmf; + d2Sf2_dVmt_dthetat = + 2 * dPf_dthetat * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetat + + 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetat; + d2Sf2_dVmt_dVmt = 2 * dPf_dVmt * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmt + + 2 * dQf_dVmt * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmt; + + d2St2_dVmt_dthetaf = + 2 * dPt_dthetaf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetaf + + 2 * dQt_dthetaf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetaf; + d2St2_dVmt_dVmf = 2 * dPt_dVmf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmf + + 2 * dQt_dVmf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmf; + d2St2_dVmt_dthetat = + 2 * dPt_dthetat * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetat + + 2 * dQt_dthetat * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetat; + d2St2_dVmt_dVmt = 2 * dPt_dVmt * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmt + + 2 * dQt_dVmt * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmt; + + val[0] = val[1] = val[2] = val[3] = 0.0; + + // row[0] = xidxt[j] + 1 - nxsparse; + // col[0] = xidxf[j] - nxsparse; + // col[1] = xidxf[j] + 1 - nxsparse; + // col[2] = xidxt[j] - nxsparse; + // col[3] = xidxt[j] + 1 - nxsparse; + + val[0] = lambda_dev[gloc] * d2Sf2_dVmt_dthetaf + + lambda_dev[gloc + 1] * d2St2_dVmt_dthetaf; + val[1] = lambda_dev[gloc] * d2Sf2_dVmt_dVmf + + lambda_dev[gloc + 1] * d2St2_dVmt_dVmf; + val[2] = lambda_dev[gloc] * d2Sf2_dVmt_dthetat + + lambda_dev[gloc + 1] * d2St2_dVmt_dthetat; + val[3] = lambda_dev[gloc] * d2Sf2_dVmt_dVmt + + lambda_dev[gloc + 1] * d2St2_dVmt_dVmt; + + RAJA::atomicAdd + (&MHSS_dev[tbusidx + 2], val[0]); + RAJA::atomicAdd + (&MHSS_dev[tbusidx + 3], val[1]); + + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 6], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 7], val[3]); + }); + + } + + if (debugmsg) { + PrintTriplets("Hessian Values (GPU):", + opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, MHSS_dev); + } + + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); From ca8556152cb36c6be64aec96178f6ab52e0424ab Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 14 Nov 2023 14:32:59 -0800 Subject: [PATCH 27/35] Sort Hessian indexes and permute values; clean up allocations --- .../model/power_bal_hiop/paramsrajahiop.h | 2 + .../pbpolrajahiopsparsekernels.cpp | 75 +++++++++++++------ 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index 38037552..1ef7cdeb 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -251,6 +251,7 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP { i_jaceq = j_jaceq = i_jacineq = j_jacineq = NULL; i_hess = j_hess = NULL; val_jaceq = val_jacineq = val_hess = NULL; + idx_jaceq_dev_ = idx_jacineq_dev_ = idx_hess_dev_ = NULL; } void destroy(OPFLOW opflow); @@ -273,4 +274,5 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP { int *i_hess, *j_hess; // Row and column indices for hessian double *val_jaceq, *val_jacineq, *val_hess; // values for equality, inequality jacobians and hessian + int *idx_hess_dev_; // Permuted triplet indexes for Hessian (on-device) }; diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index f7dc7e9d..7b5aeaaf 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -690,8 +690,10 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( } - pbpolrajahiopsparse->idx_jacineq_dev_ = - (int *) d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); + if (pbpolrajahiopsparse->idx_jacineq_dev_ == NULL) { + pbpolrajahiopsparse->idx_jacineq_dev_ = + (int *) d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); + } SortIndexes(opflow->nnz_ineqjacsp, iJacS_dev + opflow->nnz_eqjacsp, @@ -713,13 +715,15 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( */ roffset = opflow->nconeq; coffset = 0; - - pbpolrajahiopsparse->i_jacineq = + + if (pbpolrajahiopsparse->i_jacineq == NULL) { + pbpolrajahiopsparse->i_jacineq = (int *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int))); - pbpolrajahiopsparse->j_jacineq = + pbpolrajahiopsparse->j_jacineq = (int *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int))); - pbpolrajahiopsparse->val_jacineq = (double *)(h_allocator_.allocate( - opflow->nnz_ineqjacsp * sizeof(double))); + pbpolrajahiopsparse->val_jacineq = + (double *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(double))); + } iRowstart = pbpolrajahiopsparse->i_jacineq; jColstart = pbpolrajahiopsparse->j_jacineq; @@ -1159,8 +1163,10 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); } - pbpolrajahiopsparse->idx_jaceq_dev_ = - (int *) d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); + if (pbpolrajahiopsparse->idx_jaceq_dev_ == NULL) { + pbpolrajahiopsparse->idx_jaceq_dev_ = + (int *) d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); + } SortIndexes(opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, pbpolrajahiopsparse->idx_jaceq_dev_); @@ -1175,13 +1181,15 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( roffset = 0; coffset = 0; - pbpolrajahiopsparse->i_jaceq = + if (pbpolrajahiopsparse->i_jaceq == NULL) { + pbpolrajahiopsparse->i_jaceq = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - pbpolrajahiopsparse->j_jaceq = + pbpolrajahiopsparse->j_jaceq = (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - pbpolrajahiopsparse->val_jaceq = + pbpolrajahiopsparse->val_jaceq = (double *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(double))); - + } + iRowstart = pbpolrajahiopsparse->i_jaceq; jColstart = pbpolrajahiopsparse->j_jaceq; @@ -1405,13 +1413,13 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( (RAJA::make_span(ipermout, opflow->nnz_eqjacsp), RAJA::make_span(MJacS_dev, opflow->nnz_eqjacsp), RAJA::operators::less{}); + + d_allocator_.deallocate(ipermout); if (debugmsg) PrintTriplets("Equality Constraint Jacobian (GPU):", opflow->nnz_eqjacsp, iperm, iJacS_dev, jJacS_dev, MJacS_dev); - d_allocator_.deallocate(ipermout); - if (oldhostway) { ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -1481,6 +1489,9 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( PetscInt ctr = 0; auto &resmgr = umpire::ResourceManager::getInstance(); + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); + PetscFunctionBegin; if (iHSS_dev != NULL && jHSS_dev != NULL) { @@ -1628,24 +1639,34 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( }); } + if (pbpolrajahiopsparse->idx_hess_dev_ == NULL) { + pbpolrajahiopsparse->idx_hess_dev_ = + (int *) d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); + } + + SortIndexes(opflow->nnz_hesssp, iHSS_dev, jHSS_dev, + pbpolrajahiopsparse->idx_hess_dev_); + if (debugmsg) PrintTriplets("Hessian Indexes (GPU):", - opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, NULL); + opflow->nnz_hesssp, pbpolrajahiopsparse->idx_hess_dev_, + iHSS_dev, jHSS_dev, NULL); if (debugmsg) std::cout << "Official Hessian nonzero count: " << opflow->nnz_hesssp << std::endl; // Create arrays on host to store i,j, and val arrays - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - pbpolrajahiopsparse->i_hess = + if (pbpolrajahiopsparse->i_hess == NULL) { + pbpolrajahiopsparse->i_hess = (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); - pbpolrajahiopsparse->j_hess = + pbpolrajahiopsparse->j_hess = (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); - pbpolrajahiopsparse->val_hess = + pbpolrajahiopsparse->val_hess = (double *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(double))); - + } + iRow = pbpolrajahiopsparse->i_hess; jCol = pbpolrajahiopsparse->j_hess; @@ -1690,7 +1711,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp*sizeof(double)); - // Bus contributions int *b_hesssp_idx = busparams->hesssp_idx_dev_; @@ -2589,11 +2609,22 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( } + int *iperm = pbpolrajahiopsparse->idx_hess_dev_; + + int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_hesssp*sizeof(int)); + resmgr.copy(ipermout, iperm); + + RAJA::stable_sort_pairs + (RAJA::make_span(ipermout, opflow->nnz_hesssp), + RAJA::make_span(MHSS_dev, opflow->nnz_hesssp), + RAJA::operators::less{}); + if (debugmsg) { PrintTriplets("Hessian Values (GPU):", opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, MHSS_dev); } + d_allocator_.deallocate(ipermout); ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); From 8e11b60dbf0d370ed3e05fcfed423d01d29d02df Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Fri, 17 Nov 2023 07:38:26 -0800 Subject: [PATCH 28/35] Add some minor debug messages about Hessian size --- .../pbpolrajahiopsparsekernels.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 7b5aeaaf..b4093b12 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1481,7 +1481,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( PetscErrorCode ierr; PetscInt *iRow, *jCol; PetscScalar *x, *values, *lambda; - PetscInt nrow; + PetscInt nrow, ncol; PetscInt nvals; const PetscInt *cols; const PetscScalar *vals; @@ -1496,6 +1496,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (iHSS_dev != NULL && jHSS_dev != NULL) { + if (debugmsg) + std::cout << "Official Hessian nonzero count: " + << opflow->nnz_hesssp << std::endl; + resmgr.memset(iHSS_dev, 0, opflow->nnz_hesssp*sizeof(int)); resmgr.memset(jHSS_dev, 0, opflow->nnz_hesssp*sizeof(int)); @@ -1652,10 +1656,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( opflow->nnz_hesssp, pbpolrajahiopsparse->idx_hess_dev_, iHSS_dev, jHSS_dev, NULL); - if (debugmsg) - std::cout << "Official Hessian nonzero count: " - << opflow->nnz_hesssp << std::endl; - // Create arrays on host to store i,j, and val arrays if (pbpolrajahiopsparse->i_hess == NULL) { @@ -1673,9 +1673,15 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( ierr = (*opflow->modelops.computehessian)( opflow, opflow->X, opflow->Lambdae, opflow->Lambdai, opflow->Hes); CHKERRQ(ierr); - ierr = MatGetSize(opflow->Hes, &nrow, &nrow); + ierr = MatGetSize(opflow->Hes, &nrow, &ncol); CHKERRQ(ierr); + if (debugmsg) + std::cout << "Official Hessian Size: " + << nrow << " rows x " << ncol << " cols" + << "(should be" << opflow->Nx << " x " << opflow->Nx << ")" + << std::endl; + /* Copy over locations to triplet format */ /* Note that HIOP requires a upper triangular Hessian as oppposed to IPOPT which requires a lower triangular Hessian From 86f787d115e42dceef5b58bb6edcaea7071f34d9 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Mon, 20 Nov 2023 07:58:42 -0800 Subject: [PATCH 29/35] Allocate Hessian differently to avoid unnecessary structure --- src/opflow/interface/opflow.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/opflow/interface/opflow.cpp b/src/opflow/interface/opflow.cpp index 933d7bb4..68f930c0 100644 --- a/src/opflow/interface/opflow.cpp +++ b/src/opflow/interface/opflow.cpp @@ -1792,7 +1792,20 @@ PetscErrorCode OPFLOWSetUp(OPFLOW opflow) { } /* Create Hessian */ - ierr = PSCreateMatrix(opflow->ps, &opflow->Hes); + // ierr = PSCreateMatrix(opflow->ps, &opflow->Hes); + // CHKERRQ(ierr); + + ierr = MatCreate(opflow->comm->type, &opflow->Hes); + CHKERRQ(ierr); + ierr = MatSetSizes(opflow->Hes, opflow->nx, opflow->nx, + opflow->Nx, opflow->Nx); + CHKERRQ(ierr); + ierr = MatSetFromOptions(opflow->Hes); + CHKERRQ(ierr); + ierr = MatSeqAIJSetPreallocation(opflow->Hes, 6, NULL); + CHKERRQ(ierr); + ierr = MatSetOption(opflow->Hes, MAT_NEW_NONZERO_ALLOCATION_ERR, + PETSC_FALSE); CHKERRQ(ierr); /* Create natural to sparse dense ordering mapping (needed for some models) From e3905c762e1ce7c79b46431d76a4f806aa55a43d Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Mon, 20 Nov 2023 08:00:01 -0800 Subject: [PATCH 30/35] Add an entry for generator reactive power even if it's unused --- src/opflow/model/power_bal_polar/pbpol.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/opflow/model/power_bal_polar/pbpol.cpp b/src/opflow/model/power_bal_polar/pbpol.cpp index 57c035bb..348a9c2b 100644 --- a/src/opflow/model/power_bal_polar/pbpol.cpp +++ b/src/opflow/model/power_bal_polar/pbpol.cpp @@ -2672,6 +2672,15 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); + + // Reactive power is usually not included in the objective, + // but let's make sure there's an entry for it + row[0] = xlocglob+1; + col[0] = xlocglob+1; + val[0] = 0.0; + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + CHKERRQ(ierr); flps += 4; } else if (opflow->objectivetype == MIN_GENSETPOINT_DEVIATION) { xlocglob = gen->startxpdevlocglob; @@ -2682,6 +2691,14 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); + // Reactive power is usually not included in the objective, + // but let's make sure there's an entry for it + row[0] = xlocglob+1; + col[0] = xlocglob+1; + val[0] = 0.0; + // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + CHKERRQ(ierr); flps += 1; } } From 1e787872ef04b1e9198304fdbcc1862173445ae0 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Mon, 20 Nov 2023 08:21:23 -0800 Subject: [PATCH 31/35] GPU assembled Hessian non-zero count and indexes are correct --- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 29 ++++-- .../pbpolrajahiopsparsekernels.cpp | 94 ++++++++++++------- 2 files changed, 79 insertions(+), 44 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index 6599d57d..c3acf1cc 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -360,14 +360,17 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { std::cout << "Inequality Jacobian nonzero count: " << nnz_ineqjac << std::endl; + // Count non-zeros in *upper triangular* Hessian + int nnz_hesssp = 0; for (int ibus = 0; ibus < ps->nbus; ++ibus) { // reserve 2 real and 2 reactive entries for each bus + // 3 upper triangular busparams->hesssp_idx[ibus] = nnz_hesssp; - nnz_hesssp += 4; + nnz_hesssp += 3; if (opflow->include_powerimbalance_variables) { nnz_hesssp += 2; @@ -382,15 +385,16 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { continue; genparams->hesssp_idx[igen] = nnz_hesssp; - nnz_hesssp += 1; + nnz_hesssp += 2; if (opflow->has_gensetpoint) { if (gen->isrenewable) continue; - - if (opflow->use_agc) { - nnz_hesssp += 5; - } + + // later ... + // if (opflow->use_agc) { + // nnz_hesssp += 5; + // } } if (opflow->genbusvoltagetype == FIXED_WITHIN_QBOUNDS) { nnz_hesssp += 2; @@ -399,9 +403,18 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } for (int iline=0; iline < ps->nline; ++iline) { - // reserve 8 entries for each line (used twice) + PSLINE line = &(ps->line[iline]); + + if (!line->status) + continue; + lineparams->hesssp_idx[iline] = nnz_hesssp; - nnz_hesssp += 8; + + // 3 diagonal entries for on the from-bus rows (already defined) + // 3 diagonal entries for on the to-bus rows (already defined) + // 4 off-diagonal entries in upper part + nnz_hesssp += 4; + } if (opflow->include_loadloss_variables) { diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index b4093b12..e15a71ac 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1506,7 +1506,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // Bus contributions int *b_xidx = busparams->xidx_dev_; - int *b_xidxpimb = busparams->xidxpimb_dev_; int *b_hesssp_idx = busparams->hesssp_idx_dev_; RAJA::forall( @@ -1519,10 +1518,11 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; off++; - - iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; - jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; - off++; + + // upper triangular only + // iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; + // jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; + // off++; iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; @@ -1530,9 +1530,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( }); if (opflow->include_powerimbalance_variables) { + int *b_xidxpimb = busparams->xidxpimb_dev_; RAJA::forall( RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { - int off(1); + int off(2); iHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; jHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; @@ -1542,7 +1543,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( jHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i] + 1; }); } - + /* Generator contributions for row,col numbers */ int *g_xidx = genparams->xidx_dev_; int *g_hesssp_idx = genparams->hesssp_idx_dev_; @@ -1551,6 +1552,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( RAJA_LAMBDA(RAJA::Index_type i) { iHSS_dev[g_hesssp_idx[i]] = g_xidx[i]; jHSS_dev[g_hesssp_idx[i]] = g_xidx[i]; + iHSS_dev[g_hesssp_idx[i] + 1] = g_xidx[i] + 1; + jHSS_dev[g_hesssp_idx[i] + 1] = g_xidx[i] + 1; }); int *xidxf = lineparams->xidxf_dev_; @@ -1564,6 +1567,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( int off(0); int j = linelimidx[i]; + // from-bus diagonal entries already defined + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; // off++; @@ -1572,14 +1577,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; // off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - off++; - - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; // off++; @@ -1588,13 +1585,26 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; // off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - off++; + // from-bus off diagonal entries only there if in upper part + if (xidxt[j] > xidxf[j]) { + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - off++; + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + off++; + } + + // to-bus diagonal entries already defined // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; @@ -1604,14 +1614,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; // off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - off++; - - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; // off++; @@ -1620,14 +1622,26 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; // off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - off++; + // to-bus off diagonal entries only there if in upper part + if (xidxf[j] > xidxt[j]) { + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - off++; - }); + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + off++; + + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + off++; + } + }); /* Loadloss contributions - two contributions*/ if (opflow->include_loadloss_variables) { @@ -1648,6 +1662,11 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( (int *) d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); } + if (debugmsg) + PrintTriplets("Hessian Indexes (GPU, unsorted):", + opflow->nnz_hesssp, pbpolrajahiopsparse->idx_hess_dev_, + iHSS_dev, jHSS_dev, NULL); + SortIndexes(opflow->nnz_hesssp, iHSS_dev, jHSS_dev, pbpolrajahiopsparse->idx_hess_dev_); @@ -1717,6 +1736,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp*sizeof(double)); + + if (0) { // Bus contributions int *b_hesssp_idx = busparams->hesssp_idx_dev_; @@ -2614,6 +2635,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( }); } + } int *iperm = pbpolrajahiopsparse->idx_hess_dev_; From 86ff70d167e532b2201b61ec3ce17ddc0be0b4d7 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 28 Nov 2023 06:27:25 -0800 Subject: [PATCH 32/35] Mark Hessian contributions with a code --- src/opflow/model/power_bal_polar/pbpol.cpp | 62 ++++++++++++---------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/opflow/model/power_bal_polar/pbpol.cpp b/src/opflow/model/power_bal_polar/pbpol.cpp index 348a9c2b..c5ee4e5f 100644 --- a/src/opflow/model/power_bal_polar/pbpol.cpp +++ b/src/opflow/model/power_bal_polar/pbpol.cpp @@ -1616,15 +1616,21 @@ PetscErrorCode OPFLOWModelSetNumConstraints_PBPOL(OPFLOW opflow, static PetscErrorCode -MatSetValues_and_Print(Mat M, int nrow, int row[], int ncol, int col[], +MatSetValues_and_Print(char code, Mat M, int nrow, int row[], int ncol, int col[], PetscScalar val[], InsertMode mode) { - for (int r = 0; r < nrow; ++r) { + for (int r = 0, i = 0; r < nrow; ++r) { for (int c = 0; c < ncol; ++c) { - std::cout << "M: " - << std::setw(5) << std::right << row[r] << " " - << std::setw(5) << std::right << col[c] - << std::endl; + if (col[c] >= row[r]) { + std::cout << "M" << code << ": " + << std::setw(5) << std::right << row[r] << " " + << std::setw(5) << std::right << col[c] + << std::setw(12) << std::right + << std::scientific << std::setprecision(3) + << val[i] + << std::endl; + } + i++; } } return MatSetValues(M, nrow, row, ncol, col, val, mode); @@ -1683,7 +1689,7 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, val[0] = lambda[gloc] * 2 * bus->gl + lambda[gloc + 1] * (-2 * bus->bl); // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('B', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); ierr = PSBUSGetSupportingLines(bus, &nconnlines, &connlines); @@ -1828,7 +1834,7 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] * dPf_dVmf_dVmt + lambda[gloc + 1] * dQf_dVmf_dVmt; // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xloct; @@ -1860,7 +1866,7 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] * dPf_dVmt_dVmt + lambda[gloc + 1] * dQf_dVmt_dVmt; // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); } else { @@ -1969,7 +1975,7 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] * dPt_dVmt_dVmf + lambda[gloc + 1] * dQt_dVmt_dVmf; // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocf; @@ -2001,7 +2007,7 @@ PetscErrorCode OPFLOWComputeEqualityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] * dPt_dVmf_dVmf + lambda[gloc + 1] * dQt_dVmf_dVmf; // ierr = MatSetValues(H, 2, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 2, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('N', H, 2, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2090,14 +2096,14 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[2] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); // ierr = MatSetValues(H, 1, row, 3, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 3, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 3, col, val, ADD_VALUES); // df1_ddelPg = -(Pg - gen->pt); // df2_ddelPg = gen->pb - Pg; row[0] = gen->startxpdevloc; val[0] = -lambda[gloc] - lambda[gloc + 1]; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); // df1_ddelP = gen->apf*(Pg - gen->pt); @@ -2105,7 +2111,7 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, row[0] = ps->startxloc; val[0] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2142,7 +2148,7 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] + lambda[gloc + 1]); // lam_eq1*d2eq1_dQg_dV + lam_eq2*d2eq2_dQg_dV // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xloc + 1; col[0] = loc + 1; @@ -2150,7 +2156,7 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] + lambda[gloc + 1]); // lam_eq1* d2eq1_dQg_dV + lam_eq2*d2eq2_dV_dQg // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2437,7 +2443,7 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc + 1] * d2St2_dthetaf_dVmt; // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, @@ -2482,7 +2488,7 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[3] = lambda[gloc] * d2Sf2_dVmf_dVmt + lambda[gloc + 1] * d2St2_dVmf_dVmt; // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dthetat_dthetaf, d2Sf2_dthetat_dVmf, @@ -2533,7 +2539,7 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc + 1] * d2St2_dthetat_dVmt; // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); PetscScalar d2Sf2_dVmt_dthetaf, d2Sf2_dVmt_dVmf, d2Sf2_dVmt_dthetat, @@ -2580,7 +2586,7 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] * d2Sf2_dVmt_dVmt + lambda[gloc + 1] * d2St2_dVmt_dVmt; // ierr = MatSetValues(H, 1, row, 4, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 4, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('L', H, 1, row, 4, col, val, ADD_VALUES); CHKERRQ(ierr); // Must be inside for loop since there's a continue condition flps += (185 + (16 * EXAGO_FLOPS_SINOP) + (16 * EXAGO_FLOPS_COSOP)); @@ -2643,7 +2649,7 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('I', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocglob + 1; @@ -2651,7 +2657,7 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('I', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } @@ -2670,7 +2676,7 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, val[0] = weight * obj_factor * 2.0 * gen->cost_alpha * ps->MVAbase * ps->MVAbase; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); // Reactive power is usually not included in the objective, @@ -2679,7 +2685,7 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, col[0] = xlocglob+1; val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); flps += 4; } else if (opflow->objectivetype == MIN_GENSETPOINT_DEVIATION) { @@ -2688,7 +2694,7 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, col[0] = xlocglob; val[0] = weight * obj_factor * 2.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); // Reactive power is usually not included in the objective, @@ -2697,7 +2703,7 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, col[0] = xlocglob+1; val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); flps += 1; } @@ -2715,14 +2721,14 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, col[0] = xlocglob; val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('l', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xlocglob + 1; col[0] = xlocglob + 1; val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print(H, 1, row, 1, col, val, ADD_VALUES); + ierr = MatSetValues_and_Print('l', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } From 718359fb63f8a6f7953535c59e33b3ec0d9e7424 Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 28 Nov 2023 06:28:35 -0800 Subject: [PATCH 33/35] Checkpoint: GPU Hessian values incorrect -- new approach maybe? --- .../pbpolrajahiopsparsekernels.cpp | 207 ++++++++++-------- 1 file changed, 114 insertions(+), 93 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index e15a71ac..8d4e3243 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1587,6 +1587,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // from-bus off diagonal entries only there if in upper part if (xidxt[j] > xidxf[j]) { + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; off++; @@ -1662,11 +1663,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( (int *) d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); } - if (debugmsg) - PrintTriplets("Hessian Indexes (GPU, unsorted):", - opflow->nnz_hesssp, pbpolrajahiopsparse->idx_hess_dev_, - iHSS_dev, jHSS_dev, NULL); - SortIndexes(opflow->nnz_hesssp, iHSS_dev, jHSS_dev, pbpolrajahiopsparse->idx_hess_dev_); @@ -1737,7 +1733,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp*sizeof(double)); - if (0) { // Bus contributions int *b_hesssp_idx = busparams->hesssp_idx_dev_; @@ -1755,7 +1750,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( val = ispvpq[i] * (lambda_dev[b_gidx[i]] * 2 * gl[i] + lambda_dev[b_gidx[i] + 1] * (-2 * bl[i])); RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[i] + 3], val); + (&MHSS_dev[b_hesssp_idx[i] + 2], val); }); if (opflow->objectivetype == MIN_GEN_COST) { @@ -1769,14 +1764,19 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( RAJA::forall( RAJA::RangeSegment(0, genparams->ngenON), RAJA_LAMBDA(RAJA::Index_type i) { - MHSS_dev[hesssp_idx[i]] = weight * isobj_gencost * obj_factor * - 2.0 * cost_alpha[i] * MVAbase * MVAbase; + MHSS_dev[hesssp_idx[i]] = + weight * isobj_gencost * obj_factor * + 2.0 * cost_alpha[i] * MVAbase * MVAbase; + MHSS_dev[hesssp_idx[i] + 1] = 0.0; }); } else if (opflow->objectivetype == NO_OBJ) { int *hesssp_idx = genparams->hesssp_idx_dev_; RAJA::forall( RAJA::RangeSegment(0, genparams->ngenON), - RAJA_LAMBDA(RAJA::Index_type i) { MHSS_dev[hesssp_idx[i]] = 0.0; }); + RAJA_LAMBDA(RAJA::Index_type i) { + MHSS_dev[hesssp_idx[i]] = 0.0; + MHSS_dev[hesssp_idx[i] + 1] = 0.0; + }); } // Line contributions @@ -1799,6 +1799,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( int *geqidxt = lineparams->geqidxt_dev_; int *ln_hessp_idx = lineparams->hesssp_idx_dev_; + if (0) { + RAJA::forall( RAJA::RangeSegment(0, lineparams->nlineON), RAJA_LAMBDA(RAJA::Index_type i) { @@ -1806,7 +1808,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // int row[2], col[4]; double val[8]; double Gff, Bff, Gft, Bft, Gtf, Btf, Gtt, Btt; - int fbusidx(busf_idx[i]), tbusidx(bust_idx[i]); + int ibusf(busf_idx[i]), ibust(bust_idx[i]); + int fbusidx(ibusf), tbusidx(ibust); Gff = Gff_arr[i]; Bff = Bff_arr[i]; Gft = Gft_arr[i]; @@ -1924,27 +1927,31 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // Remember central bus locations were reserved and indexed // by bus (from-from) - + RAJA::atomicAdd (&MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); RAJA::atomicAdd (&MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 3], val[5]); + (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[5]); // Off-center entries (from-to bus) were reserved and - // indexed by line - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + // indexed by line only if in upper triangle + if (xidxt[i] > xidxf[i]) { + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + } + // row[0] = xidxt[i] - nxsparse; // row[1] = xidxt[i] + 1 - nxsparse; @@ -1982,21 +1989,25 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( (&MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[2]); RAJA::atomicAdd (&MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[3]); + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[6]); RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[6]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 3], val[7]); + (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[7]); // Off-center entries (to-from bus) were reserved and - // indexed by line - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 4], val[0]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 5], val[1]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 6], val[4]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 7], val[5]); + // indexed by line only if in upper triangle + + if (xidxf[i] > xidxt[i]) { + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[1]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[4]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[5]); + } // ierr = MatSetValues(H,2,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); @@ -2101,25 +2112,28 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( val[7] = lambda_dev[gloc] * dPt_dVmt_dVmf + lambda_dev[gloc + 1] * dQt_dVmt_dVmf; - // to-to bus entries + // to-to diagonal bus entries RAJA::atomicAdd (&MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[0]); RAJA::atomicAdd (&MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[1]); + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[4]); RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 3], val[5]); + (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[5]); // off-center to-from entries - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 4], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 5], val[3]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 6], val[6]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 7], val[7]); + if (xidxf[i] > xidxt[i]) { + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + } // row[0] = xidxf[i] - nxsparse; // row[1] = xidxf[i] + 1 - nxsparse; @@ -2154,25 +2168,24 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( (&MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); RAJA::atomicAdd (&MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + // RAJA::atomicAdd + // (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 3], val[5]); + (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[7]); // off-center from-to entries - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + if (xidxt[i] > xidxf[i]) { + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[1]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[4]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[5]); + } }); - - - /* Loadloss contributions - 2 contributions expected */ if (opflow->include_loadloss_variables) { int *l_hesssp_idx = loadparams->hesssp_idx_dev_; @@ -2183,7 +2196,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( MHSS_dev[l_hesssp_idx[i] + 1] = 0.0; }); } - + } if (!opflow->ignore_lineflow_constraints) { int *linelimidx = lineparams->linelimidx_dev_; int *gineqidx = lineparams->gineqidx_dev_; @@ -2456,11 +2469,13 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( (&MHSS_dev[fbusidx + 0], val[0]); RAJA::atomicAdd (&MHSS_dev[fbusidx + 1], val[1]); - - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + + if (xidxt[i] > xidxf[i]) { + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + } double d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, d2Sf2_dVmf_dVmt; @@ -2490,12 +2505,13 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( 2 * dQt_dVmt * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmt; val[0] = val[1] = val[2] = val[3] = 0.0; + + // row[0] = xidxf[j] + 1 - nxsparse; // col[0] = xidxf[j] - nxsparse; // col[1] = xidxf[j] + 1 - nxsparse; // col[2] = xidxt[j] - nxsparse; // col[3] = xidxt[j] + 1 - nxsparse; - // row[0] = xidxf[j] + 1 - nxsparse; val[0] = lambda_dev[gloc] * d2Sf2_dVmf_dthetaf + lambda_dev[gloc + 1] * d2St2_dVmf_dthetaf; @@ -2506,15 +2522,18 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( val[3] = lambda_dev[gloc] * d2Sf2_dVmf_dVmt + lambda_dev[gloc + 1] * d2St2_dVmf_dVmt; + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[fbusidx + 2], val[0]); RAJA::atomicAdd - (&MHSS_dev[fbusidx + 2], val[0]); - RAJA::atomicAdd - (&MHSS_dev[fbusidx + 3], val[1]); - - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[3]); + (&MHSS_dev[fbusidx + 2], val[1]); + + if (xidxt[i] > xidxf[i]) { + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 2], val[2]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 3], val[3]); + } // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); @@ -2551,12 +2570,12 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( val[0] = val[1] = val[2] = val[3] = 0.0; + // row[0] = xidxt[j] - nxsparse; // col[0] = xidxf[j] - nxsparse; // col[1] = xidxf[j] + 1 - nxsparse; // col[2] = xidxt[j] - nxsparse; // col[3] = xidxt[j] + 1 - nxsparse; - // row[0] = xidxt[j] - nxsparse; val[0] = lambda_dev[gloc] * d2Sf2_dthetat_dthetaf + lambda_dev[gloc + 1] * d2St2_dthetat_dthetaf; @@ -2568,15 +2587,16 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( lambda_dev[gloc + 1] * d2St2_dthetat_dVmt; RAJA::atomicAdd - (&MHSS_dev[tbusidx + 0], val[0]); + (&MHSS_dev[tbusidx + 0], val[2]); RAJA::atomicAdd - (&MHSS_dev[tbusidx + 1], val[1]); - - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 4], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 5], val[3]); + (&MHSS_dev[tbusidx + 1], val[3]); + if (xidxf[i] > xidxt[i]) { + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 0], val[0]); + RAJA::atomicAdd + (&MHSS_dev[ln_hessp_idx[i] + 1], val[1]); + } // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); double d2Sf2_dVmt_dthetaf, d2Sf2_dVmt_dVmf, d2Sf2_dVmt_dthetat, @@ -2623,20 +2643,19 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( val[3] = lambda_dev[gloc] * d2Sf2_dVmt_dVmt + lambda_dev[gloc + 1] * d2St2_dVmt_dVmt; + // not in upper triangle + // RAJA::atomicAdd + // (&MHSS_dev[tbusidx + 2], val[2]); RAJA::atomicAdd - (&MHSS_dev[tbusidx + 2], val[0]); - RAJA::atomicAdd - (&MHSS_dev[tbusidx + 3], val[1]); + (&MHSS_dev[tbusidx + 2], val[3]); RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 6], val[2]); + (&MHSS_dev[ln_hessp_idx[i] + 2], val[0]); RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 7], val[3]); + (&MHSS_dev[ln_hessp_idx[i] + 3], val[1]); }); - } - } - + int *iperm = pbpolrajahiopsparse->idx_hess_dev_; int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_hesssp*sizeof(int)); @@ -2654,6 +2673,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( d_allocator_.deallocate(ipermout); + resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp*sizeof(double)); + ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); From baa1c9d5cafd3aac4931013b69f082069304fd2b Mon Sep 17 00:00:00 2001 From: William A Perkins Date: Tue, 28 Nov 2023 09:13:05 -0800 Subject: [PATCH 34/35] Fix Hessian indexing problems in line limits --- .../pbpolrajahiopsparsekernels.cpp | 99 +++++++++---------- 1 file changed, 48 insertions(+), 51 deletions(-) diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 8d4e3243..7dfc0e22 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -1560,86 +1560,85 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( int *xidxt = lineparams->xidxt_dev_; int *ln_hessp_idx = lineparams->hesssp_idx_dev_; int *linelimidx = lineparams->linelimidx_dev_; - + RAJA::forall( RAJA::RangeSegment(0, lineparams->nlinelim), RAJA_LAMBDA(RAJA::Index_type i) { int off(0); - int j = linelimidx[i]; // from-bus diagonal entries already defined - // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; // off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; // off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; // off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; // off++; // from-bus off diagonal entries only there if in upper part - if (xidxt[j] > xidxf[j]) { + if (xidxt[i] > xidxf[i]) { - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; off++; } // to-bus diagonal entries already defined - // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; // off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; // off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; // off++; - // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; // off++; // to-bus off diagonal entries only there if in upper part - if (xidxf[j] > xidxt[j]) { + if (xidxf[i] > xidxt[i]) { - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j]; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j]; + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; off++; - iHSS_dev[ln_hessp_idx[i] + off] = xidxt[j] + 1; - jHSS_dev[ln_hessp_idx[i] + off] = xidxf[j] + 1; + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; + jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; off++; } }); @@ -1799,8 +1798,6 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( int *geqidxt = lineparams->geqidxt_dev_; int *ln_hessp_idx = lineparams->hesssp_idx_dev_; - if (0) { - RAJA::forall( RAJA::RangeSegment(0, lineparams->nlineON), RAJA_LAMBDA(RAJA::Index_type i) { @@ -2196,7 +2193,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( MHSS_dev[l_hesssp_idx[i] + 1] = 0.0; }); } - } + if (!opflow->ignore_lineflow_constraints) { int *linelimidx = lineparams->linelimidx_dev_; int *gineqidx = lineparams->gineqidx_dev_; @@ -2217,7 +2214,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( double Gft = Gft_arr[j], Bft = Bft_arr[j]; double Gtf = Gtf_arr[j], Btf = Btf_arr[j]; double Gtt = Gtt_arr[j], Btt = Btt_arr[j]; - int fbusidx(busf_idx[i]), tbusidx(bust_idx[i]); + int fbusidx(busf_idx[j]), tbusidx(bust_idx[j]); Pf = Gff * Vmf * Vmf + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); @@ -2470,11 +2467,11 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( RAJA::atomicAdd (&MHSS_dev[fbusidx + 1], val[1]); - if (xidxt[i] > xidxf[i]) { + if (xidxt[j] > xidxf[j]) { RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); + (&MHSS_dev[ln_hessp_idx[j] + 0], val[2]); RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); + (&MHSS_dev[ln_hessp_idx[j] + 1], val[3]); } double d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, @@ -2528,11 +2525,11 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( RAJA::atomicAdd (&MHSS_dev[fbusidx + 2], val[1]); - if (xidxt[i] > xidxf[i]) { + if (xidxt[j] > xidxf[j]) { RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[2]); + (&MHSS_dev[ln_hessp_idx[j] + 2], val[2]); RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[3]); + (&MHSS_dev[ln_hessp_idx[j] + 3], val[3]); } // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); @@ -2591,11 +2588,11 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( RAJA::atomicAdd (&MHSS_dev[tbusidx + 1], val[3]); - if (xidxf[i] > xidxt[i]) { + if (xidxf[j] > xidxt[j]) { RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[0]); + (&MHSS_dev[ln_hessp_idx[j] + 0], val[0]); RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[1]); + (&MHSS_dev[ln_hessp_idx[j] + 1], val[1]); } // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); @@ -2650,9 +2647,9 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( (&MHSS_dev[tbusidx + 2], val[3]); RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[0]); + (&MHSS_dev[ln_hessp_idx[j] + 2], val[0]); RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[1]); + (&MHSS_dev[ln_hessp_idx[j] + 3], val[1]); }); } From f05b8d7ec214b4d784cf8f4df0d2496630f9e662 Mon Sep 17 00:00:00 2001 From: wperkins Date: Tue, 28 Nov 2023 17:25:05 +0000 Subject: [PATCH 35/35] Apply pre-commmit fixes --- src/opflow/interface/opflow.cpp | 7 +- .../model/power_bal_hiop/paramsrajahiop.h | 34 +- .../power_bal_hiop/pbpolrajahiopsparse.cpp | 36 +- .../pbpolrajahiopsparsekernels.cpp | 1581 +++++++++-------- src/opflow/model/power_bal_polar/pbpol.cpp | 46 +- 5 files changed, 873 insertions(+), 831 deletions(-) diff --git a/src/opflow/interface/opflow.cpp b/src/opflow/interface/opflow.cpp index 68f930c0..095cede0 100644 --- a/src/opflow/interface/opflow.cpp +++ b/src/opflow/interface/opflow.cpp @@ -1797,15 +1797,14 @@ PetscErrorCode OPFLOWSetUp(OPFLOW opflow) { ierr = MatCreate(opflow->comm->type, &opflow->Hes); CHKERRQ(ierr); - ierr = MatSetSizes(opflow->Hes, opflow->nx, opflow->nx, - opflow->Nx, opflow->Nx); + ierr = + MatSetSizes(opflow->Hes, opflow->nx, opflow->nx, opflow->Nx, opflow->Nx); CHKERRQ(ierr); ierr = MatSetFromOptions(opflow->Hes); CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(opflow->Hes, 6, NULL); CHKERRQ(ierr); - ierr = MatSetOption(opflow->Hes, MAT_NEW_NONZERO_ALLOCATION_ERR, - PETSC_FALSE); + ierr = MatSetOption(opflow->Hes, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); CHKERRQ(ierr); /* Create natural to sparse dense ordering mapping (needed for some models) diff --git a/src/opflow/model/power_bal_hiop/paramsrajahiop.h b/src/opflow/model/power_bal_hiop/paramsrajahiop.h index 1ef7cdeb..ea85a38f 100644 --- a/src/opflow/model/power_bal_hiop/paramsrajahiop.h +++ b/src/opflow/model/power_bal_hiop/paramsrajahiop.h @@ -196,12 +196,12 @@ struct LINEParamsRajaHiop { constraint bound */ int *linelimidx; /* Indices for subset of lines that have finite limits */ - int *busf_idx; /* From bus index */ - int *bust_idx; /* To bus index */ - int *jacf_idx; /* Location number in the sparse Jacobian (from) */ - int *jact_idx; /* Location number in the sparse Jacobian (to) */ - int *jac_ieq_idx;/* Location number in sparse inequality Jacobian */ - int *hesssp_idx; /* Location number in sparse Hessian */ + int *busf_idx; /* From bus index */ + int *bust_idx; /* To bus index */ + int *jacf_idx; /* Location number in the sparse Jacobian (from) */ + int *jact_idx; /* Location number in the sparse Jacobian (to) */ + int *jac_ieq_idx; /* Location number in sparse inequality Jacobian */ + int *hesssp_idx; /* Location number in sparse Hessian */ // Device data double *Gff_dev_; /* From side self conductance */ @@ -226,12 +226,12 @@ struct LINEParamsRajaHiop { int * linelimidx_dev_; /* Indices for subset of lines that have finite limits */ - int *busf_idx_dev_; /* From bus index */ - int *bust_idx_dev_; /* To bus index */ - int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */ - int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */ - int *jac_ieq_idx_dev_;/* Location number in sparse inequality Jacobian */ - int *hesssp_idx_dev_; /* Location number in sparse Hessian */ + int *busf_idx_dev_; /* From bus index */ + int *bust_idx_dev_; /* To bus index */ + int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */ + int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */ + int *jac_ieq_idx_dev_; /* Location number in sparse inequality Jacobian */ + int *hesssp_idx_dev_; /* Location number in sparse Hessian */ int allocate(OPFLOW); int destroy(OPFLOW); @@ -267,12 +267,14 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP { // GPU sparse model) int *i_jaceq, *j_jaceq; // Row and column indices for equality constrained Jacobian - int *idx_jaceq_dev_; // Permuted triplet indexes for equality constrained Jacobian (on-device) + int *idx_jaceq_dev_; // Permuted triplet indexes for equality constrained + // Jacobian (on-device) int *i_jacineq, *j_jacineq; // Row and column indices for inequality constrained Jacobain - int *idx_jacineq_dev_; // Permuted triplet indexes for inequality constrained Jacobian (on-device) - int *i_hess, *j_hess; // Row and column indices for hessian + int *idx_jacineq_dev_; // Permuted triplet indexes for inequality constrained + // Jacobian (on-device) + int *i_hess, *j_hess; // Row and column indices for hessian double *val_jaceq, *val_jacineq, - *val_hess; // values for equality, inequality jacobians and hessian + *val_hess; // values for equality, inequality jacobians and hessian int *idx_hess_dev_; // Permuted triplet indexes for Hessian (on-device) }; diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp index c3acf1cc..e0bdefa9 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparse.cpp @@ -234,7 +234,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { PSBUS bus = &(ps->bus[ibus]); - // Nonzero entries used by each *bus* starts here + // Nonzero entries used by each *bus* starts here // no matter what, each bus uses 2 rows and 2 columns // row 1 = real, row2 = reactive @@ -243,11 +243,11 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { nnz_eqjac += 2; busparams->jacsq_idx[ibus] = nnz_eqjac; nnz_eqjac += 2; - + if (bus->ide == ISOLATED_BUS) { continue; } - + if (opflow->include_powerimbalance_variables) { // 2 more entries on both real and reactive nnz_eqjac += 4; @@ -264,7 +264,7 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { genparams->eqjacsqbus_idx[igen1] = nnz_eqjac++; igen1++; } - + if (opflow->include_loadloss_variables) { // each load adds one real and reactive entry on each bus row // NOTE: iload is a system load counter @@ -275,13 +275,12 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } } - if (opflow->has_gensetpoint) { for (int bgen = 0; bgen < bus->ngen; ++bgen) { PSGEN gen; ierr = PSBUSGetGen(bus, bgen, &gen); CHKERRQ(ierr); - + if (!gen->status || gen->isrenewable) continue; @@ -297,10 +296,10 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { for (int iline = 0; iline <= ps->nline; ++iline) { PSLINE line = &(ps->line[iline]); - + if (!line->status) continue; - + // each line adds 4 (off-diagonal) entries for the to bus and 4 // entries for the from bus. Each line also modifies 4 existing // to and from bus entries. @@ -338,11 +337,11 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { PSBUS bus = &(ps->bus[ibus]); if (bus->ide == PV_BUS || bus->ide == REF_BUS) { for (int bgen = 0; bgen < bus->ngen; ++bgen) { - PSGEN gen; - ierr = PSBUSGetGen(bus, bgen, &gen); - CHKERRQ(ierr); - if (!gen->status) - continue; + PSGEN gen; + ierr = PSBUSGetGen(bus, bgen, &gen); + CHKERRQ(ierr); + if (!gen->status) + continue; } nnz_ineqjac += 2; } @@ -357,8 +356,8 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { } } - std::cout << "Inequality Jacobian nonzero count: " << nnz_ineqjac << std::endl; - + std::cout << "Inequality Jacobian nonzero count: " << nnz_ineqjac + << std::endl; // Count non-zeros in *upper triangular* Hessian @@ -375,7 +374,6 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { if (opflow->include_powerimbalance_variables) { nnz_hesssp += 2; } - } for (int i = 0, igen = 0; i < ps->ngen; ++i) { @@ -402,19 +400,18 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { igen++; } - for (int iline=0; iline < ps->nline; ++iline) { + for (int iline = 0; iline < ps->nline; ++iline) { PSLINE line = &(ps->line[iline]); if (!line->status) continue; - + lineparams->hesssp_idx[iline] = nnz_hesssp; // 3 diagonal entries for on the from-bus rows (already defined) // 3 diagonal entries for on the to-bus rows (already defined) // 4 off-diagonal entries in upper part nnz_hesssp += 4; - } if (opflow->include_loadloss_variables) { @@ -426,7 +423,6 @@ PetscErrorCode OPFLOWModelSetUp_PBPOLRAJAHIOPSPARSE(OPFLOW opflow) { std::cout << "Hessian nonzero count: " << nnz_hesssp << std::endl; - ierr = busparams->copy(opflow); ierr = genparams->copy(opflow); ierr = lineparams->copy(opflow); diff --git a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp index 7dfc0e22..27ed8a69 100644 --- a/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp +++ b/src/opflow/model/power_bal_hiop/pbpolrajahiopsparsekernels.cpp @@ -478,34 +478,33 @@ PetscErrorCode OPFLOWComputeGradientArray_PBPOLRAJAHIOPSPARSE( } // A routine to sort triplet matrix indexes. The index arrays are on -// the device. This sorts them on the host. -static void -SortIndexes(const int& n, int *i_dev, int *j_dev, int *idx_perm_dev) -{ +// the device. This sorts them on the host. +static void SortIndexes(const int &n, int *i_dev, int *j_dev, + int *idx_perm_dev) { auto &resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - std::vector< std::tuple > idxvect; + std::vector> idxvect; idxvect.reserve(n); int *itemp(NULL); itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); - resmgr.copy(itemp, i_dev, n*sizeof(int)); + resmgr.copy(itemp, i_dev, n * sizeof(int)); int *jtemp(NULL); jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); - resmgr.copy(jtemp, j_dev, n*sizeof(int)); + resmgr.copy(jtemp, j_dev, n * sizeof(int)); for (int idx = 0; idx < n; idx++) { idxvect.push_back(std::make_tuple(itemp[idx], jtemp[idx], idx)); } std::sort(idxvect.begin(), idxvect.end(), - [] (std::tuple const &t1, - std::tuple const &t2) { + [](std::tuple const &t1, + std::tuple const &t2) { if (std::get<0>(t1) == std::get<0>(t2)) { return (std::get<1>(t1) < std::get<1>(t2)); - } + } return (std::get<0>(t1) < std::get<0>(t2)); }); @@ -526,21 +525,19 @@ SortIndexes(const int& n, int *i_dev, int *j_dev, int *idx_perm_dev) // << std::setw(5) << std::right << idx_perm[idx] << " " // << std::endl; // } - - resmgr.copy(i_dev, itemp, n*sizeof(int)); - resmgr.copy(j_dev, jtemp, n*sizeof(int)); - resmgr.copy(idx_perm_dev, idx_perm, n*sizeof(int)); - - h_allocator_.deallocate(itemp); - h_allocator_.deallocate(jtemp); - h_allocator_.deallocate(idx_perm); + + resmgr.copy(i_dev, itemp, n * sizeof(int)); + resmgr.copy(j_dev, jtemp, n * sizeof(int)); + resmgr.copy(idx_perm_dev, idx_perm, n * sizeof(int)); + + h_allocator_.deallocate(itemp); + h_allocator_.deallocate(jtemp); + h_allocator_.deallocate(idx_perm); } // A routine to get the triplet arrays from the device and print them out -static void -PrintTriplets(const std::string& title, const int& n, int *iperm, - int *i, int *j, double *v) -{ +static void PrintTriplets(const std::string &title, const int &n, int *iperm, + int *i, int *j, double *v) { auto &resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); @@ -548,26 +545,26 @@ PrintTriplets(const std::string& title, const int& n, int *iperm, if (iperm != NULL) { ipermtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); - resmgr.copy(ipermtemp, iperm, n*sizeof(int)); + resmgr.copy(ipermtemp, iperm, n * sizeof(int)); } int *itemp(NULL); if (i != NULL) { itemp = (int *)(h_allocator_.allocate(n * sizeof(int))); - resmgr.copy(itemp, i, n*sizeof(int)); + resmgr.copy(itemp, i, n * sizeof(int)); } int *jtemp(NULL); if (j != 0) { jtemp = (int *)(h_allocator_.allocate(n * sizeof(int))); - resmgr.copy(jtemp, j, n*sizeof(int)); + resmgr.copy(jtemp, j, n * sizeof(int)); } double *vtemp(NULL); if (v != NULL) { vtemp = (double *)(h_allocator_.allocate(n * sizeof(double))); - resmgr.copy(vtemp, v, n*sizeof(double)); + resmgr.copy(vtemp, v, n * sizeof(double)); } std::cout << title << std::endl; @@ -583,17 +580,16 @@ PrintTriplets(const std::string& title, const int& n, int *iperm, std::cout << std::setw(5) << std::right << jtemp[idx]; } if (vtemp != NULL) { - std::cout << std::setw(12) << std::right - << std::scientific << std::setprecision(3) - << vtemp[idx]; + std::cout << std::setw(12) << std::right << std::scientific + << std::setprecision(3) << vtemp[idx]; } std::cout << std::endl; } h_allocator_.deallocate(itemp); h_allocator_.deallocate(jtemp); - if (vtemp != NULL) h_allocator_.deallocate(vtemp); + if (vtemp != NULL) + h_allocator_.deallocate(vtemp); } - PetscErrorCode OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( @@ -617,14 +613,13 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // Create arrays on host to store i,j, and val arrays umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); - if (MJacS_dev == NULL) { if (debugmsg) std::cout << "Official Inequality Jacobian nonzero count: " << opflow->nnz_ineqjacsp << std::endl; - + /* Set locations only */ if (opflow->Nconineq) { @@ -638,9 +633,9 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( idxoffset = opflow->nnz_eqjacsp; resmgr.memset(iJacS_dev + idxoffset, 0, - opflow->nnz_ineqjacsp*sizeof(int)); + opflow->nnz_ineqjacsp * sizeof(int)); resmgr.memset(jJacS_dev + idxoffset, 0, - opflow->nnz_ineqjacsp*sizeof(int)); + opflow->nnz_ineqjacsp * sizeof(int)); if (!opflow->ignore_lineflow_constraints) { LINEParamsRajaHiop *lineparams = &pbpolrajahiopsparse->lineparams; @@ -655,15 +650,15 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( RAJA_LAMBDA(RAJA::Index_type i) { int iline(linelimidx[i]); int offset(0); - + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline]; offset++; - + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline] + 1; offset++; - + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline]; offset++; @@ -671,15 +666,15 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i]; jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline] + 1; offset++; - + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline]; offset++; - + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; jJacS_dev[jac_ieq_idx[i] + offset] = xidxf[iline] + 1; offset++; - + iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline]; offset++; @@ -687,84 +682,79 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( iJacS_dev[jac_ieq_idx[i] + offset] = gbineqidx[i] + 1; jJacS_dev[jac_ieq_idx[i] + offset] = xidxt[iline] + 1; }); - } if (pbpolrajahiopsparse->idx_jacineq_dev_ == NULL) { pbpolrajahiopsparse->idx_jacineq_dev_ = - (int *) d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); + (int *)d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); } - SortIndexes(opflow->nnz_ineqjacsp, - iJacS_dev + opflow->nnz_eqjacsp, + SortIndexes(opflow->nnz_ineqjacsp, iJacS_dev + opflow->nnz_eqjacsp, jJacS_dev + opflow->nnz_eqjacsp, pbpolrajahiopsparse->idx_jacineq_dev_); - + if (debugmsg) - PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian (GPU):", - opflow->nnz_ineqjacsp, - pbpolrajahiopsparse->idx_jacineq_dev_, - iJacS_dev + opflow->nnz_eqjacsp, - jJacS_dev + opflow->nnz_eqjacsp, - NULL); + PrintTriplets( + "Nonzero indexes for Inequality Constraint Jacobian (GPU):", + opflow->nnz_ineqjacsp, pbpolrajahiopsparse->idx_jacineq_dev_, + iJacS_dev + opflow->nnz_eqjacsp, jJacS_dev + opflow->nnz_eqjacsp, + NULL); if (oldhostway) { - - /* Inequality constraints start after equality constraints - Hence the offset - */ - roffset = opflow->nconeq; - coffset = 0; - if (pbpolrajahiopsparse->i_jacineq == NULL) { - pbpolrajahiopsparse->i_jacineq = - (int *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int))); - pbpolrajahiopsparse->j_jacineq = - (int *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int))); - pbpolrajahiopsparse->val_jacineq = - (double *)(h_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(double))); - } + /* Inequality constraints start after equality constraints + Hence the offset + */ + roffset = opflow->nconeq; + coffset = 0; + + if (pbpolrajahiopsparse->i_jacineq == NULL) { + pbpolrajahiopsparse->i_jacineq = (int *)(h_allocator_.allocate( + opflow->nnz_ineqjacsp * sizeof(int))); + pbpolrajahiopsparse->j_jacineq = (int *)(h_allocator_.allocate( + opflow->nnz_ineqjacsp * sizeof(int))); + pbpolrajahiopsparse->val_jacineq = (double *)(h_allocator_.allocate( + opflow->nnz_ineqjacsp * sizeof(double))); + } - iRowstart = pbpolrajahiopsparse->i_jacineq; - jColstart = pbpolrajahiopsparse->j_jacineq; + iRowstart = pbpolrajahiopsparse->i_jacineq; + jColstart = pbpolrajahiopsparse->j_jacineq; - ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Gi); - CHKERRQ(ierr); + ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Gi); + CHKERRQ(ierr); - ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); - CHKERRQ(ierr); - /* Copy over locations to triplet format */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - iRowstart[j] = roffset + i; - jColstart[j] = coffset + cols[j]; + /* Copy over locations to triplet format */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + iRowstart[j] = roffset + i; + jColstart[j] = coffset + cols[j]; + } + /* Increment iRow,jCol pointers */ + iRowstart += nvals; + jColstart += nvals; + ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); } - /* Increment iRow,jCol pointers */ - iRowstart += nvals; - jColstart += nvals; - ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); - CHKERRQ(ierr); - } - // Copy over i_jacineq and j_jacineq arrays to device - resmgr.copy(iJacS_dev + opflow->nnz_eqjacsp, - pbpolrajahiopsparse->i_jacineq); - resmgr.copy(jJacS_dev + opflow->nnz_eqjacsp, - pbpolrajahiopsparse->j_jacineq); + // Copy over i_jacineq and j_jacineq arrays to device + resmgr.copy(iJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->i_jacineq); + resmgr.copy(jJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->j_jacineq); - if (debugmsg) - PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian:", - opflow->nnz_ineqjacsp, - NULL, - iJacS_dev + opflow->nnz_eqjacsp, - jJacS_dev + opflow->nnz_eqjacsp, - NULL); - - ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); + if (debugmsg) + PrintTriplets("Nonzero indexes for Inequality Constraint Jacobian:", + opflow->nnz_ineqjacsp, NULL, + iJacS_dev + opflow->nnz_eqjacsp, + jJacS_dev + opflow->nnz_eqjacsp, NULL); + + ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); + CHKERRQ(ierr); } } } else { @@ -780,12 +770,12 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( double *Gtt_arr = lineparams->Gtt_dev_; double *Gft_arr = lineparams->Gft_dev_; double *Gtf_arr = lineparams->Gtf_dev_; - + double *Bff_arr = lineparams->Bff_dev_; double *Btt_arr = lineparams->Btt_dev_; double *Bft_arr = lineparams->Bft_dev_; double *Btf_arr = lineparams->Btf_dev_; - + int *linelimidx = lineparams->linelimidx_dev_; int *xidxf = lineparams->xidxf_dev_; int *xidxt = lineparams->xidxt_dev_; @@ -811,45 +801,53 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( double Gft = Gft_arr[j], Bft = Bft_arr[j]; double Gtf = Gtf_arr[j], Btf = Btf_arr[j]; double Gtt = Gtt_arr[j], Btt = Btt_arr[j]; - + Pf = Gff * Vmf * Vmf + - Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); Qf = -Bff * Vmf * Vmf + - Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); Pt = Gtt * Vmt * Vmt + - Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); Qt = -Btt * Vmt * Vmt + - Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); - + Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dSf2_dPf = 2 * Pf; dSf2_dQf = 2 * Qf; dSt2_dPt = 2 * Pt; dSt2_dQt = 2 * Qt; - - dPf_dthetaf = Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); - dPf_dVmf = - 2 * Gff * Vmf + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); - dPf_dthetat = Vmf * Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); + + dPf_dthetaf = + Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf = 2 * Gff * Vmf + + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetat = + Vmf * Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); dPf_dVmt = Vmf * (Gft * cos(thetaft) + Bft * sin(thetaft)); - - dQf_dthetaf = Vmf * Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); - dQf_dVmf = - -2 * Bff * Vmf + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); - dQf_dthetat = Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + + dQf_dthetaf = + Vmf * Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); + dQf_dVmf = -2 * Bff * Vmf + + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat = + Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); dQf_dVmt = Vmf * (-Bft * cos(thetaft) + Gft * sin(thetaft)); - - dPt_dthetat = Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); - dPt_dVmt = - 2 * Gtt * Vmt + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); - dPt_dthetaf = Vmt * Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); + + dPt_dthetat = + Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmt = 2 * Gtt * Vmt + + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetaf = + Vmt * Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); dPt_dVmf = Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); - - dQt_dthetat = Vmt * Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); - dQt_dVmt = - -2 * Btt * Vmt + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); - dQt_dthetaf = Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + + dQt_dthetat = + Vmt * Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); + dQt_dVmt = -2 * Btt * Vmt + + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf = + Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); dQt_dVmf = Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); - + dSf2_dthetaf = dSf2_dPf * dPf_dthetaf + dSf2_dQf * dQf_dthetaf; dSf2_dthetat = dSf2_dPf * dPf_dthetat + dSf2_dQf * dQf_dthetat; dSf2_dVmf = dSf2_dPf * dPf_dVmf + dSf2_dQf * dQf_dVmf; @@ -869,7 +867,7 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( dSt2_dthetat = dSt2_dPt * dPt_dthetat + dSt2_dQt * dQt_dthetat; dSt2_dVmf = dSt2_dPt * dPt_dVmf + dSt2_dQt * dQt_dVmf; dSt2_dVmt = dSt2_dPt * dPt_dVmt + dSt2_dQt * dQt_dVmt; - + val[2] = dSt2_dthetat; val[3] = dSt2_dVmt; val[0] = dSt2_dthetaf; @@ -879,84 +877,81 @@ OPFLOWComputeSparseInequalityConstraintJacobian_PBPOLRAJAHIOPSPARSE( MJacS_dev[jac_ieq_idx[i] + 5] = val[1]; MJacS_dev[jac_ieq_idx[i] + 6] = val[2]; MJacS_dev[jac_ieq_idx[i] + 7] = val[3]; - }); - } - int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_ineqjacsp*sizeof(int)); + int *ipermout = + (int *)d_allocator_.allocate(opflow->nnz_ineqjacsp * sizeof(int)); resmgr.copy(ipermout, iperm); - RAJA::stable_sort_pairs - (RAJA::make_span(ipermout, opflow->nnz_ineqjacsp), - RAJA::make_span(MJacS_dev + opflow->nnz_eqjacsp, opflow->nnz_ineqjacsp), - RAJA::operators::less{}); + RAJA::stable_sort_pairs( + RAJA::make_span(ipermout, opflow->nnz_ineqjacsp), + RAJA::make_span(MJacS_dev + opflow->nnz_eqjacsp, + opflow->nnz_ineqjacsp), + RAJA::operators::less{}); if (debugmsg) - PrintTriplets("Inequality Constraint Jacobian (GPU):", - opflow->nnz_ineqjacsp, - iperm, - (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), - (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), - MJacS_dev); + PrintTriplets( + "Inequality Constraint Jacobian (GPU):", opflow->nnz_ineqjacsp, + iperm, (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), + (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), + MJacS_dev); d_allocator_.deallocate(ipermout); - - if (oldhostway) { - ierr = VecGetArray(opflow->X, &x); - CHKERRQ(ierr); + if (oldhostway) { - // Copy from device to host - umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); - registerWith(x, opflow->nx, resmgr, h_allocator_); - resmgr.copy((double *)x, (double *)x_dev); + ierr = VecGetArray(opflow->X, &x); + CHKERRQ(ierr); - ierr = VecRestoreArray(opflow->X, &x); - CHKERRQ(ierr); + // Copy from device to host + umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); + registerWith(x, opflow->nx, resmgr, h_allocator_); + resmgr.copy((double *)x, (double *)x_dev); - /* Compute inequality constraint jacobian */ - ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Gi); - CHKERRQ(ierr); + ierr = VecRestoreArray(opflow->X, &x); + CHKERRQ(ierr); - ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); - CHKERRQ(ierr); + /* Compute inequality constraint jacobian */ + ierr = (*opflow->modelops.computeinequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Gi); + CHKERRQ(ierr); - values = pbpolrajahiopsparse->val_jacineq; - /* Copy over values */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + ierr = MatGetSize(opflow->Jac_Gi, &nrow, &ncol); CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - values[j] = vals[j]; + + values = pbpolrajahiopsparse->val_jacineq; + /* Copy over values */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + values[j] = vals[j]; + } + values += nvals; + ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + CHKERRQ(ierr); } - values += nvals; - ierr = MatRestoreRow(opflow->Jac_Gi, i, &nvals, &cols, &vals); + // Copy over val_jacineq to device + resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp, + pbpolrajahiopsparse->val_jacineq); + + if (debugmsg) + PrintTriplets( + "Inequality Constraint Jacobian:", opflow->nnz_ineqjacsp, NULL, + (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), + (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), + MJacS_dev + opflow->nnz_eqjacsp); + + ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); } - // Copy over val_jacineq to device - resmgr.copy(MJacS_dev + opflow->nnz_eqjacsp, - pbpolrajahiopsparse->val_jacineq); - - if (debugmsg) - PrintTriplets("Inequality Constraint Jacobian:", - opflow->nnz_ineqjacsp, - NULL, - (iJacS_dev == NULL ? NULL : iJacS_dev + opflow->nnz_eqjacsp), - (jJacS_dev == NULL ? NULL : jJacS_dev + opflow->nnz_eqjacsp), - MJacS_dev + opflow->nnz_eqjacsp); - - ierr = PetscLogEventEnd(opflow->ineqconsjaclogger, 0, 0, 0, 0); - CHKERRQ(ierr); - } } } PetscFunctionReturn(0); } - PetscErrorCode OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( OPFLOW opflow, const double *x_dev, int *iJacS_dev, int *jJacS_dev, @@ -986,7 +981,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST"); umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE"); - + /* Using OPFLOWComputeEqualityConstraintJacobian_PBPOL() as a guide */ if (MJacS_dev == NULL) { @@ -994,11 +989,11 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (debugmsg) std::cout << "Official Equality Jacobian nonzero count: " << opflow->nnz_eqjacsp << std::endl; - + /* Set locations only */ - resmgr.memset(iJacS_dev, 0, opflow->nnz_eqjacsp*sizeof(int)); - resmgr.memset(jJacS_dev, 0, opflow->nnz_eqjacsp*sizeof(int)); + resmgr.memset(iJacS_dev, 0, opflow->nnz_eqjacsp * sizeof(int)); + resmgr.memset(jJacS_dev, 0, opflow->nnz_eqjacsp * sizeof(int)); /* Bus power imbalance contribution */ int *b_xidxpimb = busparams->xidxpimb_dev_; @@ -1008,42 +1003,45 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( int *b_jacsq_idx = busparams->jacsq_idx_dev_; /* Bus */ - if (debugmsg) std::cout << "Begin with buses" << std::endl; + if (debugmsg) + std::cout << "Begin with buses" << std::endl; RAJA::forall( - RAJA::RangeSegment(0, busparams->nbus), + RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { iJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; jJacS_dev[b_jacsp_idx[i]] = b_xidx[i]; iJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i]; jJacS_dev[b_jacsp_idx[i] + 1] = b_xidx[i] + 1; - + iJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; jJacS_dev[b_jacsq_idx[i]] = b_xidx[i]; iJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 1; jJacS_dev[b_jacsq_idx[i] + 1] = b_xidx[i] + 1; - }); + }); if (opflow->include_powerimbalance_variables) { - if (debugmsg) std::cout << "Bus power imbalance variables" << std::endl; + if (debugmsg) + std::cout << "Bus power imbalance variables" << std::endl; RAJA::forall( - RAJA::RangeSegment(0, busparams->nbus), + RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { iJacS_dev[b_jacsp_idx[i]] = b_gidx[i]; jJacS_dev[b_jacsp_idx[i]] = b_xidxpimb[i]; iJacS_dev[b_jacsp_idx[i] + 1] = b_gidx[i]; jJacS_dev[b_jacsp_idx[i] + 1] = b_xidxpimb[i] + 1; - + iJacS_dev[b_jacsq_idx[i]] = b_gidx[i] + 1; jJacS_dev[b_jacsq_idx[i]] = b_xidxpimb[i] + 2; iJacS_dev[b_jacsq_idx[i] + 1] = b_gidx[i] + 1; jJacS_dev[b_jacsq_idx[i] + 1] = b_xidxpimb[i] + 3; - }); + }); } /* generation contributions */ - if (debugmsg) std::cout << "Generators " << std::endl; - + if (debugmsg) + std::cout << "Generators " << std::endl; + int *g_gidxbus = genparams->gidxbus_dev_; int *g_xidx = genparams->xidx_dev_; int *eqjacspbus_idx = genparams->eqjacspbus_idx_dev_; @@ -1059,10 +1057,11 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); /* Loadloss contributions */ - + if (opflow->include_loadloss_variables) { - if (debugmsg) std::cout << "Load Loss" << std::endl; + if (debugmsg) + std::cout << "Load Loss" << std::endl; int *l_gidx = loadparams->gidx_dev_; int *l_xidx = loadparams->xidx_dev_; int *l_jacsp_idx = loadparams->jacsp_idx_dev_; @@ -1079,8 +1078,9 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( /* Connected lines */ - if (debugmsg) std::cout << "Connected Lines" << std::endl; - + if (debugmsg) + std::cout << "Connected Lines" << std::endl; + int *xidxf = lineparams->xidxf_dev_; int *xidxt = lineparams->xidxt_dev_; int *geqidxf = lineparams->geqidxf_dev_; @@ -1089,56 +1089,53 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( int *jact_idx = lineparams->jact_idx_dev_; RAJA::forall( - RAJA::RangeSegment(0, lineparams->nlineON), - RAJA_LAMBDA(RAJA::Index_type i) { + RAJA::RangeSegment(0, lineparams->nlineON), + RAJA_LAMBDA(RAJA::Index_type i) { + int offset; - int offset; + offset = 0; - offset = 0; - - iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; - jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; - offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; + offset++; - iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; - jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; - offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i]; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; + offset++; - iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; - jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; - offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i]; + offset++; - iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; - jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; - offset++; + iJacS_dev[jacf_idx[i] + offset] = geqidxf[i] + 1; + jJacS_dev[jacf_idx[i] + offset] = xidxt[i] + 1; + offset++; - // to bus indexes + // to bus indexes - offset = 0; + offset = 0; - iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; - jJacS_dev[jact_idx[i] + offset] = xidxf[i]; - offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; + jJacS_dev[jact_idx[i] + offset] = xidxf[i]; + offset++; - iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; - jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; - offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i]; + jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; + offset++; - iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; - jJacS_dev[jact_idx[i] + offset] = xidxf[i]; - offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; + jJacS_dev[jact_idx[i] + offset] = xidxf[i]; + offset++; - iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; - jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; - offset++; + iJacS_dev[jact_idx[i] + offset] = geqidxt[i] + 1; + jJacS_dev[jact_idx[i] + offset] = xidxf[i] + 1; + offset++; + }); - }); - - - if (opflow->has_gensetpoint) { - if (debugmsg) std::cout << "Generator set point" << std::endl; + if (debugmsg) + std::cout << "Generator set point" << std::endl; int *eqjacspgen_idx = genparams->eqjacspgen_idx_dev_; int *g_geqidxgen = genparams->geqidxgen_dev_; int *g_xidx = genparams->xidx_dev_; @@ -1165,65 +1162,64 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( if (pbpolrajahiopsparse->idx_jaceq_dev_ == NULL) { pbpolrajahiopsparse->idx_jaceq_dev_ = - (int *) d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); + (int *)d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); } SortIndexes(opflow->nnz_eqjacsp, iJacS_dev, jJacS_dev, pbpolrajahiopsparse->idx_jaceq_dev_); - + if (debugmsg) PrintTriplets("Non-zero indexes for Equality Constraint Jacobian (GPU):", - opflow->nnz_eqjacsp, - pbpolrajahiopsparse->idx_jaceq_dev_, + opflow->nnz_eqjacsp, pbpolrajahiopsparse->idx_jaceq_dev_, iJacS_dev, jJacS_dev, NULL); if (oldhostway) { - roffset = 0; - coffset = 0; - - if (pbpolrajahiopsparse->i_jaceq == NULL) { - pbpolrajahiopsparse->i_jaceq = - (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - pbpolrajahiopsparse->j_jaceq = - (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); - pbpolrajahiopsparse->val_jaceq = - (double *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(double))); - } - - iRowstart = pbpolrajahiopsparse->i_jaceq; - jColstart = pbpolrajahiopsparse->j_jaceq; + roffset = 0; + coffset = 0; - ierr = (*opflow->modelops.computeequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Ge); - CHKERRQ(ierr); + if (pbpolrajahiopsparse->i_jaceq == NULL) { + pbpolrajahiopsparse->i_jaceq = + (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + pbpolrajahiopsparse->j_jaceq = + (int *)(h_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int))); + pbpolrajahiopsparse->val_jaceq = (double *)(h_allocator_.allocate( + opflow->nnz_eqjacsp * sizeof(double))); + } - ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); - CHKERRQ(ierr); + iRowstart = pbpolrajahiopsparse->i_jaceq; + jColstart = pbpolrajahiopsparse->j_jaceq; - /* Copy over locations to triplet format */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + ierr = (*opflow->modelops.computeequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Ge); CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - iRowstart[j] = roffset + i; - jColstart[j] = coffset + cols[j]; - } - /* Increment iRow,jCol pointers */ - iRowstart += nvals; - jColstart += nvals; - ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + + ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); CHKERRQ(ierr); - } - // Copy over i_jaceq and j_jaceq arrays to device - resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); - resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); + /* Copy over locations to triplet format */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + iRowstart[j] = roffset + i; + jColstart[j] = coffset + cols[j]; + } + /* Increment iRow,jCol pointers */ + iRowstart += nvals; + jColstart += nvals; + ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + } + + // Copy over i_jaceq and j_jaceq arrays to device + resmgr.copy(iJacS_dev, pbpolrajahiopsparse->i_jaceq); + resmgr.copy(jJacS_dev, pbpolrajahiopsparse->j_jaceq); - if (debugmsg) - PrintTriplets("Non-zero indexes for Equality Constraint Jacobian:", - opflow->nnz_eqjacsp, NULL, iJacS_dev, jJacS_dev, NULL); + if (debugmsg) + PrintTriplets("Non-zero indexes for Equality Constraint Jacobian:", + opflow->nnz_eqjacsp, NULL, iJacS_dev, jJacS_dev, NULL); } - + } else { // Bus Contribution @@ -1243,13 +1239,14 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( RAJA_LAMBDA(RAJA::Index_type i) { double Vm = x_dev[b_xidx[i] + 1]; MJacS_dev[b_jacsp_idx[i]] = isisolated[i] * 1.0 + ispvpq[i] * 0.0; - MJacS_dev[b_jacsp_idx[i]+1] = isisolated[i] * 0.0 + ispvpq[i] * 2 * Vm * gl[i]; + MJacS_dev[b_jacsp_idx[i] + 1] = + isisolated[i] * 0.0 + ispvpq[i] * 2 * Vm * gl[i]; MJacS_dev[b_jacsq_idx[i]] = 0.0; - MJacS_dev[b_jacsq_idx[i]+1] = isisolated[i] * 1.0 + ispvpq[i] * -2 * Vm * bl[i]; + MJacS_dev[b_jacsq_idx[i] + 1] = + isisolated[i] * 1.0 + ispvpq[i] * -2 * Vm * bl[i]; }); - - // Power imbalance + // Power imbalance if (opflow->include_powerimbalance_variables) { RAJA::forall( RAJA::RangeSegment(0, busparams->nbus), @@ -1261,7 +1258,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( }); } - // Generator contributions + // Generator contributions int *eqjacspbus_idx = genparams->eqjacspbus_idx_dev_; int *eqjacsqbus_idx = genparams->eqjacsqbus_idx_dev_; RAJA::forall( @@ -1316,7 +1313,7 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( int *bust_idx = lineparams->bust_idx_dev_; int *jacf_idx = lineparams->jacf_idx_dev_; int *jact_idx = lineparams->jact_idx_dev_; - + RAJA::forall( RAJA::RangeSegment(0, lineparams->nlineON), RAJA_LAMBDA(RAJA::Index_type i) { @@ -1337,134 +1334,138 @@ OPFLOWComputeSparseEqualityConstraintJacobian_PBPOLRAJAHIOPSPARSE( // OPFLOWComputeDenseEqualityConstraintJacobian_PBPOLRAJAHIOP() // from bus real entries - + /* dPf_dthetaf */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsp_idx[ifrom]]), - Vmf * Vmt * (-Gft[i] * sin(thetaft) + Bft[i] * cos(thetaft))); + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ifrom]]), + Vmf * Vmt * (-Gft[i] * sin(thetaft) + Bft[i] * cos(thetaft))); /*dPf_dVmf */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsp_idx[ifrom] + 1]), - 2 * Gff[i] * Vmf + Vmt * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft))); + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ifrom] + 1]), + 2 * Gff[i] * Vmf + + Vmt * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft))); /*dPf_dthetat */ MJacS_dev[jacf_idx[i] + 0] = - Vmf * Vmt * (Gft[i] * sin(thetaft) - Bft[i] * cos(thetaft)); + Vmf * Vmt * (Gft[i] * sin(thetaft) - Bft[i] * cos(thetaft)); /* dPf_dVmt */ MJacS_dev[jacf_idx[i] + 1] = - Vmf * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft)); + Vmf * (Gft[i] * cos(thetaft) + Bft[i] * sin(thetaft)); // from bus reactive entries - + /* dQf_dthetaf */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsq_idx[ifrom]]), - Vmf * Vmt * (Bft[i] * sin(thetaft) + Gft[i] * cos(thetaft))); + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ifrom]]), + Vmf * Vmt * (Bft[i] * sin(thetaft) + Gft[i] * cos(thetaft))); /* dQf_dVmf */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsq_idx[ifrom] + 1]), - -2 * Bff[i] * Vmf + - Vmt * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft))); + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ifrom] + 1]), + -2 * Bff[i] * Vmf + + Vmt * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft))); /* dQf_dthetat */ MJacS_dev[jacf_idx[i] + 2] = - Vmf * Vmt * (-Bft[i] * sin(thetaft) - Gft[i] * cos(thetaft)); + Vmf * Vmt * (-Bft[i] * sin(thetaft) - Gft[i] * cos(thetaft)); /* dQf_dVmt */ MJacS_dev[jacf_idx[i] + 3] = - Vmf * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); + Vmf * (-Bft[i] * cos(thetaft) + Gft[i] * sin(thetaft)); // to bus real entries - + /* dPt_dthetat */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsp_idx[ito]]), + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ito]]), Vmt * Vmf * (-Gtf[i] * sin(thetatf) + Btf[i] * cos(thetatf))); /* dPt_dVmt */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsp_idx[ito] + 1]), 2 * Gtt[i] * Vmt + - Vmf * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf))); - /* dPt_dthetaf */ + RAJA::atomicAdd( + &(MJacS_dev[b_jacsp_idx[ito] + 1]), + 2 * Gtt[i] * Vmt + + Vmf * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf))); + /* dPt_dthetaf */ MJacS_dev[jact_idx[i] + 0] = - Vmt * Vmf * (Gtf[i] * sin(thetatf) - Btf[i] * cos(thetatf)); + Vmt * Vmf * (Gtf[i] * sin(thetatf) - Btf[i] * cos(thetatf)); /* dPt_dVmf */ - MJacS_dev[jact_idx[i] + 1] = - Vmt * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf)); + MJacS_dev[jact_idx[i] + 1] = + Vmt * (Gtf[i] * cos(thetatf) + Btf[i] * sin(thetatf)); // to bus reactive entries - + /* dQt_dthetat */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsq_idx[ito]]), + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ito]]), Vmt * Vmf * (Btf[i] * sin(thetatf) + Gtf[i] * cos(thetatf))); /* dQt_dVmt */ - RAJA::atomicAdd - (&(MJacS_dev[b_jacsq_idx[ito] + 1]), -2 * Btt[i] * Vmt + - Vmf * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf))); + RAJA::atomicAdd( + &(MJacS_dev[b_jacsq_idx[ito] + 1]), + -2 * Btt[i] * Vmt + + Vmf * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf))); /* dQt_dthetaf */ MJacS_dev[jact_idx[i] + 2] = - Vmt * Vmf * (-Btf[i] * sin(thetatf) - Gtf[i] * cos(thetatf)); + Vmt * Vmf * (-Btf[i] * sin(thetatf) - Gtf[i] * cos(thetatf)); /* dQt_dVmf */ MJacS_dev[jact_idx[i] + 3] = - Vmt * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); + Vmt * (-Btf[i] * cos(thetatf) + Gtf[i] * sin(thetatf)); }); - - int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_eqjacsp*sizeof(int)); + + int *ipermout = + (int *)d_allocator_.allocate(opflow->nnz_eqjacsp * sizeof(int)); resmgr.copy(ipermout, iperm); - - RAJA::stable_sort_pairs - (RAJA::make_span(ipermout, opflow->nnz_eqjacsp), - RAJA::make_span(MJacS_dev, opflow->nnz_eqjacsp), - RAJA::operators::less{}); + + RAJA::stable_sort_pairs( + RAJA::make_span(ipermout, opflow->nnz_eqjacsp), + RAJA::make_span(MJacS_dev, opflow->nnz_eqjacsp), + RAJA::operators::less{}); d_allocator_.deallocate(ipermout); - + if (debugmsg) - PrintTriplets("Equality Constraint Jacobian (GPU):", - opflow->nnz_eqjacsp, iperm, iJacS_dev, jJacS_dev, MJacS_dev); - + PrintTriplets("Equality Constraint Jacobian (GPU):", opflow->nnz_eqjacsp, + iperm, iJacS_dev, jJacS_dev, MJacS_dev); + if (oldhostway) { - ierr = VecGetArray(opflow->X, &x); - CHKERRQ(ierr); + ierr = VecGetArray(opflow->X, &x); + CHKERRQ(ierr); - // Copy from device to host - registerWith(x, opflow->nx, resmgr, h_allocator_); - resmgr.copy((double *)x, (double *)x_dev); + // Copy from device to host + registerWith(x, opflow->nx, resmgr, h_allocator_); + resmgr.copy((double *)x, (double *)x_dev); - ierr = VecRestoreArray(opflow->X, &x); - CHKERRQ(ierr); + ierr = VecRestoreArray(opflow->X, &x); + CHKERRQ(ierr); - /* Compute equality constraint jacobian */ - ierr = (*opflow->modelops.computeequalityconstraintjacobian)( - opflow, opflow->X, opflow->Jac_Ge); - CHKERRQ(ierr); + /* Compute equality constraint jacobian */ + ierr = (*opflow->modelops.computeequalityconstraintjacobian)( + opflow, opflow->X, opflow->Jac_Ge); + CHKERRQ(ierr); - ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); - CHKERRQ(ierr); + ierr = MatGetSize(opflow->Jac_Ge, &nrow, &ncol); + CHKERRQ(ierr); - values = pbpolrajahiopsparse->val_jaceq; + values = pbpolrajahiopsparse->val_jaceq; - /* Copy over values */ - for (i = 0; i < nrow; i++) { - ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); - CHKERRQ(ierr); - for (j = 0; j < nvals; j++) { - values[j] = vals[j]; + /* Copy over values */ + for (i = 0; i < nrow; i++) { + ierr = MatGetRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); + for (j = 0; j < nvals; j++) { + values[j] = vals[j]; + } + values += nvals; + ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); + CHKERRQ(ierr); } - values += nvals; - ierr = MatRestoreRow(opflow->Jac_Ge, i, &nvals, &cols, &vals); - CHKERRQ(ierr); - } - // Copy over val_ineq to device - resmgr.copy(MJacS_dev, pbpolrajahiopsparse->val_jaceq); + // Copy over val_ineq to device + resmgr.copy(MJacS_dev, pbpolrajahiopsparse->val_jaceq); - if (debugmsg) - PrintTriplets("Equality Constraint Jacobian:", - opflow->nnz_eqjacsp, NULL, iJacS_dev, jJacS_dev, MJacS_dev); + if (debugmsg) + PrintTriplets("Equality Constraint Jacobian:", opflow->nnz_eqjacsp, + NULL, iJacS_dev, jJacS_dev, MJacS_dev); } } ierr = PetscLogEventEnd(opflow->eqconsjaclogger, 0, 0, 0, 0); CHKERRQ(ierr); - + PetscFunctionReturn(0); } @@ -1497,24 +1498,25 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (iHSS_dev != NULL && jHSS_dev != NULL) { if (debugmsg) - std::cout << "Official Hessian nonzero count: " - << opflow->nnz_hesssp << std::endl; - - resmgr.memset(iHSS_dev, 0, opflow->nnz_hesssp*sizeof(int)); - resmgr.memset(jHSS_dev, 0, opflow->nnz_hesssp*sizeof(int)); + std::cout << "Official Hessian nonzero count: " << opflow->nnz_hesssp + << std::endl; + + resmgr.memset(iHSS_dev, 0, opflow->nnz_hesssp * sizeof(int)); + resmgr.memset(jHSS_dev, 0, opflow->nnz_hesssp * sizeof(int)); // Bus contributions - + int *b_xidx = busparams->xidx_dev_; int *b_hesssp_idx = busparams->hesssp_idx_dev_; - + RAJA::forall( - RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { int off(0); iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; off++; - + iHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i]; jHSS_dev[b_hesssp_idx[i] + off] = b_xidx[i] + 1; off++; @@ -1532,9 +1534,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (opflow->include_powerimbalance_variables) { int *b_xidxpimb = busparams->xidxpimb_dev_; RAJA::forall( - RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { int off(2); - + iHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; jHSS_dev[b_hesssp_idx[i] + off] = b_xidxpimb[i]; off++; @@ -1568,10 +1571,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // from-bus diagonal entries already defined - // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; // off++; - + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; // off++; @@ -1579,18 +1582,18 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; // off++; - + // iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; // jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; // off++; // from-bus off diagonal entries only there if in upper part if (xidxt[i] > xidxf[i]) { - + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; off++; - + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; off++; @@ -1598,7 +1601,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; off++; - + iHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; off++; @@ -1609,7 +1612,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; // off++; - + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; // off++; @@ -1617,7 +1620,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; // off++; - + // iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; // jHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; // off++; @@ -1628,7 +1631,7 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; off++; - + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i]; jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; off++; @@ -1636,13 +1639,13 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i]; off++; - + iHSS_dev[ln_hessp_idx[i] + off] = xidxt[i] + 1; jHSS_dev[ln_hessp_idx[i] + off] = xidxf[i] + 1; off++; } }); - + /* Loadloss contributions - two contributions*/ if (opflow->include_loadloss_variables) { int *l_xidx = loadparams->xidx_dev_; @@ -1659,28 +1662,28 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( if (pbpolrajahiopsparse->idx_hess_dev_ == NULL) { pbpolrajahiopsparse->idx_hess_dev_ = - (int *) d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); + (int *)d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); } SortIndexes(opflow->nnz_hesssp, iHSS_dev, jHSS_dev, pbpolrajahiopsparse->idx_hess_dev_); - + if (debugmsg) - PrintTriplets("Hessian Indexes (GPU):", - opflow->nnz_hesssp, pbpolrajahiopsparse->idx_hess_dev_, - iHSS_dev, jHSS_dev, NULL); - + PrintTriplets("Hessian Indexes (GPU):", opflow->nnz_hesssp, + pbpolrajahiopsparse->idx_hess_dev_, iHSS_dev, jHSS_dev, + NULL); + // Create arrays on host to store i,j, and val arrays - if (pbpolrajahiopsparse->i_hess == NULL) { + if (pbpolrajahiopsparse->i_hess == NULL) { pbpolrajahiopsparse->i_hess = - (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); + (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); pbpolrajahiopsparse->j_hess = - (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); - pbpolrajahiopsparse->val_hess = - (double *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(double))); + (int *)(h_allocator_.allocate(opflow->nnz_hesssp * sizeof(int))); + pbpolrajahiopsparse->val_hess = (double *)(h_allocator_.allocate( + opflow->nnz_hesssp * sizeof(double))); } - + iRow = pbpolrajahiopsparse->i_hess; jCol = pbpolrajahiopsparse->j_hess; @@ -1691,8 +1694,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( CHKERRQ(ierr); if (debugmsg) - std::cout << "Official Hessian Size: " - << nrow << " rows x " << ncol << " cols" + std::cout << "Official Hessian Size: " << nrow << " rows x " << ncol + << " cols" << "(should be" << opflow->Nx << " x " << opflow->Nx << ")" << std::endl; @@ -1723,17 +1726,16 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( resmgr.copy(jHSS_dev, pbpolrajahiopsparse->j_hess); if (debugmsg) { - PrintTriplets("Hessian Indexes:", - opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, NULL); + PrintTriplets("Hessian Indexes:", opflow->nnz_hesssp, NULL, iHSS_dev, + jHSS_dev, NULL); } - - } else { - resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp*sizeof(double)); + } else { + resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp * sizeof(double)); // Bus contributions - + int *b_hesssp_idx = busparams->hesssp_idx_dev_; int *b_gidx = busparams->gidx_dev_; int *ispvpq = busparams->ispvpq_dev_; @@ -1741,16 +1743,17 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( double *bl = busparams->bl_dev_; RAJA::forall( - RAJA::RangeSegment(0, busparams->nbus), RAJA_LAMBDA(RAJA::Index_type i) { + RAJA::RangeSegment(0, busparams->nbus), + RAJA_LAMBDA(RAJA::Index_type i) { // int row, col; double val; // row = b_xidx[i] + 1 - nxsparse; // col = row; val = ispvpq[i] * (lambda_dev[b_gidx[i]] * 2 * gl[i] + lambda_dev[b_gidx[i] + 1] * (-2 * bl[i])); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[i] + 2], val); - }); + RAJA::atomicAdd(&MHSS_dev[b_hesssp_idx[i] + 2], + val); + }); if (opflow->objectivetype == MIN_GEN_COST) { int *hesssp_idx = genparams->hesssp_idx_dev_; @@ -1763,9 +1766,8 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( RAJA::forall( RAJA::RangeSegment(0, genparams->ngenON), RAJA_LAMBDA(RAJA::Index_type i) { - MHSS_dev[hesssp_idx[i]] = - weight * isobj_gencost * obj_factor * - 2.0 * cost_alpha[i] * MVAbase * MVAbase; + MHSS_dev[hesssp_idx[i]] = weight * isobj_gencost * obj_factor * + 2.0 * cost_alpha[i] * MVAbase * MVAbase; MHSS_dev[hesssp_idx[i] + 1] = 0.0; }); } else if (opflow->objectivetype == NO_OBJ) { @@ -1822,21 +1824,24 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( double thetatf = thetat - thetaf; double dPf_dthetaf_dthetaf, dPf_dthetaf_dVmf, dPf_dthetaf_dthetat, - dPf_dthetaf_dVmt; - double dPf_dVmf_dthetaf, dPf_dVmf_dVmf, dPf_dVmf_dthetat, dPf_dVmf_dVmt; + dPf_dthetaf_dVmt; + double dPf_dVmf_dthetaf, dPf_dVmf_dVmf, dPf_dVmf_dthetat, + dPf_dVmf_dVmt; double dPf_dthetat_dthetaf, dPf_dthetat_dVmf, dPf_dthetat_dthetat, - dPf_dthetat_dVmt; - double dPf_dVmt_dthetaf, dPf_dVmt_dVmf, dPf_dVmt_dthetat, dPf_dVmt_dVmt; + dPf_dthetat_dVmt; + double dPf_dVmt_dthetaf, dPf_dVmt_dVmf, dPf_dVmt_dthetat, + dPf_dVmt_dVmt; /* dPf_dthetaf = Vmf*Vmt*(-Gft*sin(thetaft) + Bft*cos(thetaft)); */ dPf_dthetaf_dthetaf = - -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); dPf_dthetaf_dVmf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); dPf_dthetaf_dthetat = - Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); dPf_dthetaf_dVmt = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); - /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ + /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); + */ dPf_dVmf_dthetaf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); dPf_dVmf_dVmf = 2 * Gff; dPf_dVmf_dthetat = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); @@ -1844,10 +1849,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( /* dPf_dthetat = Vmf*Vmt*(Gft*sin(thetaft) - Bft*cos(thetaft)); */ dPf_dthetat_dthetaf = - Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); dPf_dthetat_dVmf = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); dPf_dthetat_dthetat = - Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); + Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); dPf_dthetat_dVmt = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); /* dPf_dVmt = Vmf*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ @@ -1857,18 +1862,20 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( dPf_dVmt_dVmt = 0.0; double dQf_dthetaf_dthetaf, dQf_dthetaf_dVmf, dQf_dthetaf_dthetat, - dQf_dthetaf_dVmt; - double dQf_dVmf_dthetaf, dQf_dVmf_dVmf, dQf_dVmf_dthetat, dQf_dVmf_dVmt; + dQf_dthetaf_dVmt; + double dQf_dVmf_dthetaf, dQf_dVmf_dVmf, dQf_dVmf_dthetat, + dQf_dVmf_dVmt; double dQf_dthetat_dthetaf, dQf_dthetat_dVmf, dQf_dthetat_dthetat, - dQf_dthetat_dVmt; - double dQf_dVmt_dthetaf, dQf_dVmt_dVmf, dQf_dVmt_dthetat, dQf_dVmt_dVmt; + dQf_dthetat_dVmt; + double dQf_dVmt_dthetaf, dQf_dVmt_dVmf, dQf_dVmt_dthetat, + dQf_dVmt_dVmt; /* dQf_dthetaf = Vmf*Vmt*(Bft*sin(thetaft) + Gft*cos(thetaft)); */ dQf_dthetaf_dthetaf = - Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); dQf_dthetaf_dVmf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); dQf_dthetaf_dthetat = - Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); dQf_dthetaf_dVmt = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); /* dQf_dVmf = -2*Bff*Vmf + Vmt*(-Bft*cos(thetaft) + Gft*sin(thetaft)); @@ -1880,10 +1887,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( /* dQf_dthetat = Vmf*Vmt*(-Bft*sin(thetaft) - Gft*cos(thetaft)); */ dQf_dthetat_dthetaf = - Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); dQf_dthetat_dVmf = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); dQf_dthetat_dthetat = - Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); dQf_dthetat_dVmt = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); /* dQf_dVmt = Vmf*(-Bft*cos(thetaft) + Gft*sin(thetaft)); */ @@ -1901,54 +1908,54 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( gloc = geqidxf[i]; - val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = - 0.0; + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; val[0] = lambda_dev[gloc] * dPf_dthetaf_dthetaf + - lambda_dev[gloc + 1] * dQf_dthetaf_dthetaf; + lambda_dev[gloc + 1] * dQf_dthetaf_dthetaf; val[1] = lambda_dev[gloc] * dPf_dthetaf_dVmf + - lambda_dev[gloc + 1] * dQf_dthetaf_dVmf; + lambda_dev[gloc + 1] * dQf_dthetaf_dVmf; val[2] = lambda_dev[gloc] * dPf_dthetaf_dthetat + - lambda_dev[gloc + 1] * dQf_dthetaf_dthetat; + lambda_dev[gloc + 1] * dQf_dthetaf_dthetat; val[3] = lambda_dev[gloc] * dPf_dthetaf_dVmt + - lambda_dev[gloc + 1] * dQf_dthetaf_dVmt; + lambda_dev[gloc + 1] * dQf_dthetaf_dVmt; val[4] = lambda_dev[gloc] * dPf_dVmf_dthetaf + - lambda_dev[gloc + 1] * dQf_dVmf_dthetaf; + lambda_dev[gloc + 1] * dQf_dVmf_dthetaf; val[5] = lambda_dev[gloc] * dPf_dVmf_dVmf + - lambda_dev[gloc + 1] * dQf_dVmf_dVmf; + lambda_dev[gloc + 1] * dQf_dVmf_dVmf; val[6] = lambda_dev[gloc] * dPf_dVmf_dthetat + - lambda_dev[gloc + 1] * dQf_dVmf_dthetat; + lambda_dev[gloc + 1] * dQf_dVmf_dthetat; val[7] = lambda_dev[gloc] * dPf_dVmf_dVmt + - lambda_dev[gloc + 1] * dQf_dVmf_dVmt; + lambda_dev[gloc + 1] * dQf_dVmf_dVmt; // Remember central bus locations were reserved and indexed // by bus (from-from) - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); // not in upper triangle // RAJA::atomicAdd // (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[5]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[5]); // Off-center entries (from-to bus) were reserved and // indexed by line only if in upper triangle if (xidxt[i] > xidxf[i]) { - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[3]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[6]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[7]); } - + // row[0] = xidxt[i] - nxsparse; // row[1] = xidxt[i] + 1 - nxsparse; @@ -1957,73 +1964,75 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // col[2] = xidxt[i] - nxsparse; // col[3] = xidxt[i] + 1 - nxsparse; - val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = - 0.0; + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; val[0] = lambda_dev[gloc] * dPf_dthetat_dthetaf + - lambda_dev[gloc + 1] * dQf_dthetat_dthetaf; + lambda_dev[gloc + 1] * dQf_dthetat_dthetaf; val[1] = lambda_dev[gloc] * dPf_dthetat_dVmf + - lambda_dev[gloc + 1] * dQf_dthetat_dVmf; + lambda_dev[gloc + 1] * dQf_dthetat_dVmf; val[2] = lambda_dev[gloc] * dPf_dthetat_dthetat + - lambda_dev[gloc + 1] * dQf_dthetat_dthetat; + lambda_dev[gloc + 1] * dQf_dthetat_dthetat; val[3] = lambda_dev[gloc] * dPf_dthetat_dVmt + - lambda_dev[gloc + 1] * dQf_dthetat_dVmt; + lambda_dev[gloc + 1] * dQf_dthetat_dVmt; val[4] = lambda_dev[gloc] * dPf_dVmt_dthetaf + - lambda_dev[gloc + 1] * dQf_dVmt_dthetaf; + lambda_dev[gloc + 1] * dQf_dVmt_dthetaf; val[5] = lambda_dev[gloc] * dPf_dVmt_dVmf + - lambda_dev[gloc + 1] * dQf_dVmt_dVmf; + lambda_dev[gloc + 1] * dQf_dVmt_dVmf; val[6] = lambda_dev[gloc] * dPf_dVmt_dthetat + - lambda_dev[gloc + 1] * dQf_dVmt_dthetat; + lambda_dev[gloc + 1] * dQf_dVmt_dthetat; val[7] = lambda_dev[gloc] * dPf_dVmt_dVmt + - lambda_dev[gloc + 1] * dQf_dVmt_dVmt; + lambda_dev[gloc + 1] * dQf_dVmt_dVmt; - // Remember central bus locations were reserved and indexed // by bus (to-to) - - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[3]); + + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[2]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[3]); // not in upper triangle // RAJA::atomicAdd // (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[6]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[7]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[7]); // Off-center entries (to-from bus) were reserved and // indexed by line only if in upper triangle if (xidxf[i] > xidxt[i]) { - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[0]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[1]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[5]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[1]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[4]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[5]); } // ierr = MatSetValues(H,2,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); double dPt_dthetat_dthetat, dPt_dthetat_dVmt, dPt_dthetat_dthetaf, - dPt_dthetat_dVmf; - double dPt_dVmt_dthetat, dPt_dVmt_dVmt, dPt_dVmt_dthetaf, dPt_dVmt_dVmf; + dPt_dthetat_dVmf; + double dPt_dVmt_dthetat, dPt_dVmt_dVmt, dPt_dVmt_dthetaf, + dPt_dVmt_dVmf; double dPt_dthetaf_dthetat, dPt_dthetaf_dVmt, dPt_dthetaf_dthetaf, - dPt_dthetaf_dVmf; - double dPt_dVmf_dthetat, dPt_dVmf_dVmt, dPt_dVmf_dthetaf, dPt_dVmf_dVmf; + dPt_dthetaf_dVmf; + double dPt_dVmf_dthetat, dPt_dVmf_dVmt, dPt_dVmf_dthetaf, + dPt_dVmf_dVmf; /* dPt_dthetat = Vmf*Vmt*(-Gtf*sin(thetatf) + Btf*cos(thetatf)); */ dPt_dthetat_dthetat = - Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); dPt_dthetat_dVmt = Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); dPt_dthetat_dthetaf = - Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); dPt_dthetat_dVmf = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); - /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ + /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); + */ dPt_dVmt_dthetat = Vmf * (-Gtf * sin(thetatf) + Bft * cos(thetatf)); dPt_dVmt_dVmt = 2 * Gtt; dPt_dVmt_dthetaf = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); @@ -2031,10 +2040,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( /* dPt_dthetaf = Vmf*Vmt*(Gtf*sin(thetatf) - Btf*cos(thetatf)); */ dPt_dthetaf_dthetat = - Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); dPt_dthetaf_dVmt = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); dPt_dthetaf_dthetaf = - Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); dPt_dthetaf_dVmf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); /* dPt_dVmf = Vmt*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ @@ -2044,18 +2053,20 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( dPt_dVmf_dVmf = 0.0; double dQt_dthetaf_dthetaf, dQt_dthetaf_dVmf, dQt_dthetaf_dthetat, - dQt_dthetaf_dVmt; - double dQt_dVmf_dthetaf, dQt_dVmf_dVmf, dQt_dVmf_dthetat, dQt_dVmf_dVmt; + dQt_dthetaf_dVmt; + double dQt_dVmf_dthetaf, dQt_dVmf_dVmf, dQt_dVmf_dthetat, + dQt_dVmf_dVmt; double dQt_dthetat_dthetaf, dQt_dthetat_dVmf, dQt_dthetat_dthetat, - dQt_dthetat_dVmt; - double dQt_dVmt_dthetaf, dQt_dVmt_dVmf, dQt_dVmt_dthetat, dQt_dVmt_dVmt; + dQt_dthetat_dVmt; + double dQt_dVmt_dthetaf, dQt_dVmt_dVmf, dQt_dVmt_dthetat, + dQt_dVmt_dVmt; /* dQt_dthetat = Vmf*Vmt*(Btf*sin(thetatf) + Gtf*cos(thetatf)); */ dQt_dthetat_dthetat = - Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); dQt_dthetat_dVmt = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); dQt_dthetat_dthetaf = - Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); dQt_dthetat_dVmf = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); /* dQt_dVmt = -2*Btt*Vmt + Vmf*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); @@ -2067,10 +2078,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( /* dQt_dthetaf = Vmf*Vmt*(-Btf*sin(thetatf) - Gtf*cos(thetatf)); */ dQt_dthetaf_dthetat = - Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); dQt_dthetaf_dVmt = Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); dQt_dthetaf_dthetaf = - Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); dQt_dthetaf_dVmf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); /* dQt_dVmf = Vmt*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); */ @@ -2088,48 +2099,48 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( gloc = geqidxt[i]; - val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = - 0.0; + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; val[0] = lambda_dev[gloc] * dPt_dthetat_dthetat + - lambda_dev[gloc + 1] * dQt_dthetat_dthetat; + lambda_dev[gloc + 1] * dQt_dthetat_dthetat; val[1] = lambda_dev[gloc] * dPt_dthetat_dVmt + - lambda_dev[gloc + 1] * dQt_dthetat_dVmt; + lambda_dev[gloc + 1] * dQt_dthetat_dVmt; val[2] = lambda_dev[gloc] * dPt_dthetat_dthetaf + - lambda_dev[gloc + 1] * dQt_dthetat_dthetaf; + lambda_dev[gloc + 1] * dQt_dthetat_dthetaf; val[3] = lambda_dev[gloc] * dPt_dthetat_dVmf + - lambda_dev[gloc + 1] * dQt_dthetat_dVmf; + lambda_dev[gloc + 1] * dQt_dthetat_dVmf; val[4] = lambda_dev[gloc] * dPt_dVmt_dthetat + - lambda_dev[gloc + 1] * dQt_dVmt_dthetat; + lambda_dev[gloc + 1] * dQt_dVmt_dthetat; val[5] = lambda_dev[gloc] * dPt_dVmt_dVmt + - lambda_dev[gloc + 1] * dQt_dVmt_dVmt; + lambda_dev[gloc + 1] * dQt_dVmt_dVmt; val[6] = lambda_dev[gloc] * dPt_dVmt_dthetaf + - lambda_dev[gloc + 1] * dQt_dVmt_dthetaf; + lambda_dev[gloc + 1] * dQt_dVmt_dthetaf; val[7] = lambda_dev[gloc] * dPt_dVmt_dVmf + - lambda_dev[gloc + 1] * dQt_dVmt_dVmf; + lambda_dev[gloc + 1] * dQt_dVmt_dVmf; // to-to diagonal bus entries - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[0]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[1]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 0], val[0]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 1], val[1]); // not in upper triangle // RAJA::atomicAdd // (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[5]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[tbusidx] + 2], val[5]); // off-center to-from entries if (xidxf[i] > xidxt[i]) { - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[3]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[6]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[7]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[3]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[6]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[7]); } // row[0] = xidxf[i] - nxsparse; @@ -2139,47 +2150,47 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // col[2] = xidxf[i] - nxsparse; // col[3] = xidxf[i] + 1 - nxsparse; - val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = val[7] = - 0.0; + val[0] = val[1] = val[2] = val[3] = val[4] = val[5] = val[6] = + val[7] = 0.0; val[0] = lambda_dev[gloc] * dPt_dthetaf_dthetat + - lambda_dev[gloc + 1] * dQt_dthetaf_dthetat; + lambda_dev[gloc + 1] * dQt_dthetaf_dthetat; val[1] = lambda_dev[gloc] * dPt_dthetaf_dVmt + - lambda_dev[gloc + 1] * dQt_dthetaf_dVmt; + lambda_dev[gloc + 1] * dQt_dthetaf_dVmt; val[2] = lambda_dev[gloc] * dPt_dthetaf_dthetaf + - lambda_dev[gloc + 1] * dQt_dthetaf_dthetaf; + lambda_dev[gloc + 1] * dQt_dthetaf_dthetaf; val[3] = lambda_dev[gloc] * dPt_dthetaf_dVmf + - lambda_dev[gloc + 1] * dQt_dthetaf_dVmf; + lambda_dev[gloc + 1] * dQt_dthetaf_dVmf; val[4] = lambda_dev[gloc] * dPt_dVmf_dthetat + - lambda_dev[gloc + 1] * dQt_dVmf_dthetat; + lambda_dev[gloc + 1] * dQt_dVmf_dthetat; val[5] = lambda_dev[gloc] * dPt_dVmf_dVmt + - lambda_dev[gloc + 1] * dQt_dVmf_dVmt; + lambda_dev[gloc + 1] * dQt_dVmf_dVmt; val[6] = lambda_dev[gloc] * dPt_dVmf_dthetaf + - lambda_dev[gloc + 1] * dQt_dVmf_dthetaf; + lambda_dev[gloc + 1] * dQt_dVmf_dthetaf; val[7] = lambda_dev[gloc] * dPt_dVmf_dVmf + - lambda_dev[gloc + 1] * dQt_dVmf_dVmf; + lambda_dev[gloc + 1] * dQt_dVmf_dVmf; // from-from bus entries - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 0], val[0]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 1], val[1]); // RAJA::atomicAdd // (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[7]); + RAJA::atomicAdd( + &MHSS_dev[b_hesssp_idx[fbusidx] + 2], val[7]); // off-center from-to entries if (xidxt[i] > xidxf[i]) { - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 0], val[0]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 1], val[1]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 2], val[4]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[i] + 3], val[5]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 0], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 1], + val[1]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 2], + val[4]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[i] + 3], + val[5]); } }); @@ -2217,14 +2228,14 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( int fbusidx(busf_idx[j]), tbusidx(bust_idx[j]); Pf = Gff * Vmf * Vmf + - Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); Qf = -Bff * Vmf * Vmf + - Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); Pt = Gtt * Vmt * Vmt + - Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Vmt * Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); Qt = -Btt * Vmt * Vmt + - Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + Vmt * Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); double dSf2_dPf, dSf2_dQf, dSt2_dPt, dSt2_dQt; @@ -2238,210 +2249,238 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( double dPt_dthetaf, dPt_dVmf, dPt_dthetat, dPt_dVmt; double dQt_dthetaf, dQt_dVmf, dQt_dthetat, dQt_dVmt; - dPf_dthetaf = Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); - dPf_dVmf = - 2. * Gff * Vmf + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + dPf_dthetaf = + Vmf * Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + dPf_dVmf = 2. * Gff * Vmf + + Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); dPf_dthetat = Vmf * Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); dPf_dVmt = Vmf * (Gft * cos(thetaft) + Bft * sin(thetaft)); dQf_dthetaf = Vmf * Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); - dQf_dVmf = - -2. * Bff * Vmf + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); - dQf_dthetat = Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + dQf_dVmf = -2. * Bff * Vmf + + Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + dQf_dthetat = + Vmf * Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); dQf_dVmt = Vmf * (-Bft * cos(thetaft) + Gft * sin(thetaft)); - dPt_dthetat = Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); - dPt_dVmt = - 2. * Gtt * Vmt + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + dPt_dthetat = + Vmt * Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + dPt_dVmt = 2. * Gtt * Vmt + + Vmf * (Gtf * cos(thetatf) + Btf * sin(thetatf)); dPt_dthetaf = Vmt * Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); dPt_dVmf = Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); dQt_dthetat = Vmt * Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); - dQt_dVmt = - -2. * Btt * Vmt + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); - dQt_dthetaf = Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + dQt_dVmt = -2. * Btt * Vmt + + Vmf * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + dQt_dthetaf = + Vmt * Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); dQt_dVmf = Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); - double d2Pf_dthetaf_dthetaf, d2Pf_dthetaf_dVmf, d2Pf_dthetaf_dthetat, - d2Pf_dthetaf_dVmt; + double d2Pf_dthetaf_dthetaf, d2Pf_dthetaf_dVmf, + d2Pf_dthetaf_dthetat, d2Pf_dthetaf_dVmt; double d2Pf_dVmf_dthetaf, d2Pf_dVmf_dVmf, d2Pf_dVmf_dthetat, - d2Pf_dVmf_dVmt; - double d2Pf_dthetat_dthetaf, d2Pf_dthetat_dVmf, d2Pf_dthetat_dthetat, - d2Pf_dthetat_dVmt; + d2Pf_dVmf_dVmt; + double d2Pf_dthetat_dthetaf, d2Pf_dthetat_dVmf, + d2Pf_dthetat_dthetat, d2Pf_dthetat_dVmt; double d2Pf_dVmt_dthetaf, d2Pf_dVmt_dVmf, d2Pf_dVmt_dthetat, - d2Pf_dVmt_dVmt; + d2Pf_dVmt_dVmt; /* dPf_dthetaf = Vmf*Vmt*(-Gft*sin(thetaft) + Bft*cos(thetaft)); */ d2Pf_dthetaf_dthetaf = - -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); - d2Pf_dthetaf_dVmf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + -Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetaf_dVmf = + Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); d2Pf_dthetaf_dthetat = - Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); - d2Pf_dthetaf_dVmt = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + d2Pf_dthetaf_dVmt = + Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); - /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ - d2Pf_dVmf_dthetaf = Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + /* dPf_Vmf = 2*Gff*Vmf + Vmt*(Gft*cos(thetaft) + Bft*sin(thetaft)); + */ + d2Pf_dVmf_dthetaf = + Vmt * (-Gft * sin(thetaft) + Bft * cos(thetaft)); d2Pf_dVmf_dVmf = 2 * Gff; d2Pf_dVmf_dthetat = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); d2Pf_dVmf_dVmt = (Gft * cos(thetaft) + Bft * sin(thetaft)); /* dPf_dthetat = Vmf*Vmt*(Gft*sin(thetaft) - Bft*cos(thetaft)); */ d2Pf_dthetat_dthetaf = - Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); + Vmf * Vmt * (Gft * cos(thetaft) + Bft * sin(thetaft)); d2Pf_dthetat_dVmf = Vmt * (Gft * sin(thetaft) - Bft * cos(thetaft)); d2Pf_dthetat_dthetat = - Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); + Vmf * Vmt * (-Gft * cos(thetaft) - Bft * sin(thetaft)); d2Pf_dthetat_dVmt = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); /* dPf_dVmt = Vmf*(Gft*cos(thetaft) + Bft*sin(thetaft)); */ - d2Pf_dVmt_dthetaf = Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); + d2Pf_dVmt_dthetaf = + Vmf * (-Gft * sin(thetaft) + Bft * cos(thetaft)); d2Pf_dVmt_dVmf = (Gft * cos(thetaft) + Bft * sin(thetaft)); d2Pf_dVmt_dthetat = Vmf * (Gft * sin(thetaft) - Bft * cos(thetaft)); d2Pf_dVmt_dVmt = 0.0; - double d2Qf_dthetaf_dthetaf, d2Qf_dthetaf_dVmf, d2Qf_dthetaf_dthetat, - d2Qf_dthetaf_dVmt; + double d2Qf_dthetaf_dthetaf, d2Qf_dthetaf_dVmf, + d2Qf_dthetaf_dthetat, d2Qf_dthetaf_dVmt; double d2Qf_dVmf_dthetaf, d2Qf_dVmf_dVmf, d2Qf_dVmf_dthetat, - d2Qf_dVmf_dVmt; - double d2Qf_dthetat_dthetaf, d2Qf_dthetat_dVmf, d2Qf_dthetat_dthetat, - d2Qf_dthetat_dVmt; + d2Qf_dVmf_dVmt; + double d2Qf_dthetat_dthetaf, d2Qf_dthetat_dVmf, + d2Qf_dthetat_dthetat, d2Qf_dthetat_dVmt; double d2Qf_dVmt_dthetaf, d2Qf_dVmt_dVmf, d2Qf_dVmt_dthetat, - d2Qf_dVmt_dVmt; + d2Qf_dVmt_dVmt; /* dQf_dthetaf = Vmf*Vmt*(Bft*sin(thetaft) + Gft*cos(thetaft)); */ d2Qf_dthetaf_dthetaf = - Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); d2Qf_dthetaf_dVmf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); d2Qf_dthetaf_dthetat = - Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); d2Qf_dthetaf_dVmt = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); - /* dQf_dVmf = -2*Bff*Vmf + Vmt*(-Bft*cos(thetaft) + Gft*sin(thetaft)); + /* dQf_dVmf = -2*Bff*Vmf + Vmt*(-Bft*cos(thetaft) + + * Gft*sin(thetaft)); */ d2Qf_dVmf_dthetaf = Vmt * (Bft * sin(thetaft) + Gft * cos(thetaft)); d2Qf_dVmf_dVmf = -2 * Bff; - d2Qf_dVmf_dthetat = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dVmf_dthetat = + Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); d2Qf_dVmf_dVmt = (-Bft * cos(thetaft) + Gft * sin(thetaft)); /* dQf_dthetat = Vmf*Vmt*(-Bft*sin(thetaft) - Gft*cos(thetaft)); */ d2Qf_dthetat_dthetaf = - Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); - d2Qf_dthetat_dVmf = Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + Vmf * Vmt * (-Bft * cos(thetaft) + Gft * sin(thetaft)); + d2Qf_dthetat_dVmf = + Vmt * (-Bft * sin(thetaft) - Gft * cos(thetaft)); d2Qf_dthetat_dthetat = - Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); - d2Qf_dthetat_dVmt = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + Vmf * Vmt * (Bft * cos(thetaft) - Gft * sin(thetaft)); + d2Qf_dthetat_dVmt = + Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); /* dQf_dVmt = Vmf*(-Bft*cos(thetaft) + Gft*sin(thetaft)); */ d2Qf_dVmt_dthetaf = Vmf * (Bft * sin(thetaft) + Gft * cos(thetaft)); d2Qf_dVmt_dVmf = (-Bft * cos(thetaft) + Gft * sin(thetaft)); - d2Qf_dVmt_dthetat = Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); + d2Qf_dVmt_dthetat = + Vmf * (-Bft * sin(thetaft) - Gft * cos(thetaft)); d2Qf_dVmt_dVmt = 0.0; - double d2Pt_dthetat_dthetat, d2Pt_dthetat_dVmt, d2Pt_dthetat_dthetaf, - d2Pt_dthetat_dVmf; + double d2Pt_dthetat_dthetat, d2Pt_dthetat_dVmt, + d2Pt_dthetat_dthetaf, d2Pt_dthetat_dVmf; double d2Pt_dVmt_dthetat, d2Pt_dVmt_dVmt, d2Pt_dVmt_dthetaf, - d2Pt_dVmt_dVmf; - double d2Pt_dthetaf_dthetat, d2Pt_dthetaf_dVmt, d2Pt_dthetaf_dthetaf, - d2Pt_dthetaf_dVmf; + d2Pt_dVmt_dVmf; + double d2Pt_dthetaf_dthetat, d2Pt_dthetaf_dVmt, + d2Pt_dthetaf_dthetaf, d2Pt_dthetaf_dVmf; double d2Pt_dVmf_dthetat, d2Pt_dVmf_dVmt, d2Pt_dVmf_dthetaf, - d2Pt_dVmf_dVmf; + d2Pt_dVmf_dVmf; /* dPt_dthetat = Vmf*Vmt*(-Gtf*sin(thetatf) + Btf*cos(thetatf)); */ d2Pt_dthetat_dthetat = - Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); - d2Pt_dthetat_dVmt = Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + d2Pt_dthetat_dVmt = + Vmf * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); d2Pt_dthetat_dthetaf = - Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); - d2Pt_dthetat_dVmf = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + d2Pt_dthetat_dVmf = + Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); - /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ - d2Pt_dVmt_dthetat = Vmf * (-Gtf * sin(thetatf) + Bft * cos(thetatf)); + /* dPt_Vmt = 2*Gtt*Vmt + Vmf*(Gtf*cos(thetatf) + Btf*sin(thetatf)); + */ + d2Pt_dVmt_dthetat = + Vmf * (-Gtf * sin(thetatf) + Bft * cos(thetatf)); d2Pt_dVmt_dVmt = 2 * Gtt; d2Pt_dVmt_dthetaf = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); d2Pt_dVmt_dVmf = (Gtf * cos(thetatf) + Btf * sin(thetatf)); /* dPt_dthetaf = Vmf*Vmt*(Gtf*sin(thetatf) - Btf*cos(thetatf)); */ d2Pt_dthetaf_dthetat = - Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); + Vmf * Vmt * (Gtf * cos(thetatf) + Btf * sin(thetatf)); d2Pt_dthetaf_dVmt = Vmf * (Gtf * sin(thetatf) - Btf * cos(thetatf)); d2Pt_dthetaf_dthetaf = - Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); + Vmf * Vmt * (-Gtf * cos(thetatf) - Btf * sin(thetatf)); d2Pt_dthetaf_dVmf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); /* dPt_dVmf = Vmt*(Gtf*cos(thetatf) + Btf*sin(thetatf)); */ - d2Pt_dVmf_dthetat = Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); + d2Pt_dVmf_dthetat = + Vmt * (-Gtf * sin(thetatf) + Btf * cos(thetatf)); d2Pt_dVmf_dVmt = (Gtf * cos(thetatf) + Btf * sin(thetatf)); d2Pt_dVmf_dthetaf = Vmt * (Gtf * sin(thetatf) - Btf * cos(thetatf)); d2Pt_dVmf_dVmf = 0.0; - double d2Qt_dthetaf_dthetaf, d2Qt_dthetaf_dVmf, d2Qt_dthetaf_dthetat, - d2Qt_dthetaf_dVmt; + double d2Qt_dthetaf_dthetaf, d2Qt_dthetaf_dVmf, + d2Qt_dthetaf_dthetat, d2Qt_dthetaf_dVmt; double d2Qt_dVmf_dthetaf, d2Qt_dVmf_dVmf, d2Qt_dVmf_dthetat, - d2Qt_dVmf_dVmt; - double d2Qt_dthetat_dthetaf, d2Qt_dthetat_dVmf, d2Qt_dthetat_dthetat, - d2Qt_dthetat_dVmt; + d2Qt_dVmf_dVmt; + double d2Qt_dthetat_dthetaf, d2Qt_dthetat_dVmf, + d2Qt_dthetat_dthetat, d2Qt_dthetat_dVmt; double d2Qt_dVmt_dthetaf, d2Qt_dVmt_dVmf, d2Qt_dVmt_dthetat, - d2Qt_dVmt_dVmt; + d2Qt_dVmt_dVmt; /* dQt_dthetat = Vmf*Vmt*(Btf*sin(thetatf) + Gtf*cos(thetatf)); */ d2Qt_dthetat_dthetat = - Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); d2Qt_dthetat_dVmt = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); d2Qt_dthetat_dthetaf = - Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); d2Qt_dthetat_dVmf = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); - /* dQt_dVmt = -2*Btt*Vmt + Vmf*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); + /* dQt_dVmt = -2*Btt*Vmt + Vmf*(-Btf*cos(thetatf) + + * Gtf*sin(thetatf)); */ d2Qt_dVmt_dthetat = Vmf * (Btf * sin(thetatf) + Gtf * cos(thetatf)); d2Qt_dVmt_dVmt = -2 * Btt; - d2Qt_dVmt_dthetaf = Vmf * (-Btf * sin(thetatf) + Gtf * cos(thetatf)); + d2Qt_dVmt_dthetaf = + Vmf * (-Btf * sin(thetatf) + Gtf * cos(thetatf)); d2Qt_dVmt_dVmf = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); /* dQt_dthetaf = Vmf*Vmt*(-Btf*sin(thetatf) - Gtf*cos(thetatf)); */ d2Qt_dthetaf_dthetat = - Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); - d2Qt_dthetaf_dVmt = Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + Vmf * Vmt * (-Btf * cos(thetatf) + Gtf * sin(thetatf)); + d2Qt_dthetaf_dVmt = + Vmf * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); d2Qt_dthetaf_dthetaf = - Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); - d2Qt_dthetaf_dVmf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + Vmf * Vmt * (Btf * cos(thetatf) - Gtf * sin(thetatf)); + d2Qt_dthetaf_dVmf = + Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); /* dQt_dVmf = Vmt*(-Btf*cos(thetatf) + Gtf*sin(thetatf)); */ d2Qt_dVmf_dthetat = Vmt * (Btf * sin(thetatf) + Gtf * cos(thetatf)); d2Qt_dVmf_dVmt = (-Btf * cos(thetatf) + Gtf * sin(thetatf)); - d2Qt_dVmf_dthetaf = Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); + d2Qt_dVmf_dthetaf = + Vmt * (-Btf * sin(thetatf) - Gtf * cos(thetatf)); d2Qt_dVmf_dVmf = 0.0; double d2Sf2_dthetaf_dthetaf = 0.0, d2Sf2_dthetaf_dVmf = 0.0, - d2Sf2_dthetaf_dthetat = 0.0, d2Sf2_dthetaf_dVmt = 0.0; + d2Sf2_dthetaf_dthetat = 0.0, d2Sf2_dthetaf_dVmt = 0.0; double d2St2_dthetaf_dthetaf = 0.0, d2St2_dthetaf_dVmf = 0.0, - d2St2_dthetaf_dthetat = 0.0, d2St2_dthetaf_dVmt = 0.0; + d2St2_dthetaf_dthetat = 0.0, d2St2_dthetaf_dVmt = 0.0; - d2Sf2_dthetaf_dthetaf = - 2 * dPf_dthetaf * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dthetaf + - 2 * dQf_dthetaf * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dthetaf; + d2Sf2_dthetaf_dthetaf = 2 * dPf_dthetaf * dPf_dthetaf + + dSf2_dPf * d2Pf_dthetaf_dthetaf + + 2 * dQf_dthetaf * dQf_dthetaf + + dSf2_dQf * d2Qf_dthetaf_dthetaf; d2Sf2_dthetaf_dVmf = - 2 * dPf_dVmf * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmf + - 2 * dQf_dVmf * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmf; - d2Sf2_dthetaf_dthetat = - 2 * dPf_dthetat * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dthetat + - 2 * dQf_dthetat * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dthetat; + 2 * dPf_dVmf * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmf + + 2 * dQf_dVmf * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmf; + d2Sf2_dthetaf_dthetat = 2 * dPf_dthetat * dPf_dthetaf + + dSf2_dPf * d2Pf_dthetaf_dthetat + + 2 * dQf_dthetat * dQf_dthetaf + + dSf2_dQf * d2Qf_dthetaf_dthetat; d2Sf2_dthetaf_dVmt = - 2 * dPf_dVmt * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmt + - 2 * dQf_dVmt * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmt; + 2 * dPf_dVmt * dPf_dthetaf + dSf2_dPf * d2Pf_dthetaf_dVmt + + 2 * dQf_dVmt * dQf_dthetaf + dSf2_dQf * d2Qf_dthetaf_dVmt; - d2St2_dthetaf_dthetaf = - 2 * dPt_dthetaf * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dthetaf + - 2 * dQt_dthetaf * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dthetaf; + d2St2_dthetaf_dthetaf = 2 * dPt_dthetaf * dPt_dthetaf + + dSt2_dPt * d2Pt_dthetaf_dthetaf + + 2 * dQt_dthetaf * dQt_dthetaf + + dSt2_dQt * d2Qt_dthetaf_dthetaf; d2St2_dthetaf_dVmf = - 2 * dPt_dVmf * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmf + - 2 * dQt_dVmf * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmf; - d2St2_dthetaf_dthetat = - 2 * dPt_dthetat * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dthetat + - 2 * dQt_dthetat * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dthetat; + 2 * dPt_dVmf * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmf + + 2 * dQt_dVmf * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmf; + d2St2_dthetaf_dthetat = 2 * dPt_dthetat * dPt_dthetaf + + dSt2_dPt * d2Pt_dthetaf_dthetat + + 2 * dQt_dthetat * dQt_dthetaf + + dSt2_dQt * d2Qt_dthetaf_dthetat; d2St2_dthetaf_dVmt = - 2 * dPt_dVmt * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmt + - 2 * dQt_dVmt * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmt; + 2 * dPt_dVmt * dPt_dthetaf + dSt2_dPt * d2Pt_dthetaf_dVmt + + 2 * dQt_dVmt * dQt_dthetaf + dSt2_dQt * d2Qt_dthetaf_dVmt; val[0] = val[1] = val[2] = val[3] = 0.0; @@ -2454,52 +2493,54 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( gloc = gineqidx[i]; val[0] = lambda_dev[gloc] * d2Sf2_dthetaf_dthetaf + - lambda_dev[gloc + 1] * d2St2_dthetaf_dthetaf; + lambda_dev[gloc + 1] * d2St2_dthetaf_dthetaf; val[1] = lambda_dev[gloc] * d2Sf2_dthetaf_dVmf + - lambda_dev[gloc + 1] * d2St2_dthetaf_dVmf; + lambda_dev[gloc + 1] * d2St2_dthetaf_dVmf; val[2] = lambda_dev[gloc] * d2Sf2_dthetaf_dthetat + - lambda_dev[gloc + 1] * d2St2_dthetaf_dthetat; + lambda_dev[gloc + 1] * d2St2_dthetaf_dthetat; val[3] = lambda_dev[gloc] * d2Sf2_dthetaf_dVmt + - lambda_dev[gloc + 1] * d2St2_dthetaf_dVmt; + lambda_dev[gloc + 1] * d2St2_dthetaf_dVmt; - RAJA::atomicAdd - (&MHSS_dev[fbusidx + 0], val[0]); - RAJA::atomicAdd - (&MHSS_dev[fbusidx + 1], val[1]); + RAJA::atomicAdd(&MHSS_dev[fbusidx + 0], val[0]); + RAJA::atomicAdd(&MHSS_dev[fbusidx + 1], val[1]); if (xidxt[j] > xidxf[j]) { - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 1], val[3]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 0], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 1], + val[3]); } double d2Sf2_dVmf_dthetaf, d2Sf2_dVmf_dVmf, d2Sf2_dVmf_dthetat, - d2Sf2_dVmf_dVmt; + d2Sf2_dVmf_dVmt; double d2St2_dVmf_dthetaf, d2St2_dVmf_dVmf, d2St2_dVmf_dthetat, - d2St2_dVmf_dVmt; + d2St2_dVmf_dVmt; d2Sf2_dVmf_dthetaf = - 2 * dPf_dthetaf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetaf + - 2 * dQf_dthetaf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetaf; - d2Sf2_dVmf_dVmf = 2 * dPf_dVmf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmf + - 2 * dQf_dVmf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmf; + 2 * dPf_dthetaf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetaf + + 2 * dQf_dthetaf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetaf; + d2Sf2_dVmf_dVmf = + 2 * dPf_dVmf * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmf + + 2 * dQf_dVmf * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmf; d2Sf2_dVmf_dthetat = - 2 * dPf_dthetat * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetat + - 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetat; - d2Sf2_dVmf_dVmt = 2 * dPf_dVmt * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmt + - 2 * dQf_dVmt * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmt; + 2 * dPf_dthetat * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dthetat + + 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dthetat; + d2Sf2_dVmf_dVmt = + 2 * dPf_dVmt * dPf_dVmf + dSf2_dPf * d2Pf_dVmf_dVmt + + 2 * dQf_dVmt * dQf_dVmf + dSf2_dQf * d2Qf_dVmf_dVmt; d2St2_dVmf_dthetaf = - 2 * dPt_dthetaf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetaf + - 2 * dQt_dthetaf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetaf; - d2St2_dVmf_dVmf = 2 * dPt_dVmf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmf + - 2 * dQt_dVmf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmf; + 2 * dPt_dthetaf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetaf + + 2 * dQt_dthetaf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetaf; + d2St2_dVmf_dVmf = + 2 * dPt_dVmf * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmf + + 2 * dQt_dVmf * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmf; d2St2_dVmf_dthetat = - 2 * dPt_dthetat * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetat + - 2 * dQt_dthetat * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetat; - d2St2_dVmf_dVmt = 2 * dPt_dVmt * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmt + - 2 * dQt_dVmt * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmt; + 2 * dPt_dthetat * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dthetat + + 2 * dQt_dthetat * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dthetat; + d2St2_dVmf_dVmt = + 2 * dPt_dVmt * dPt_dVmf + dSt2_dPt * d2Pt_dVmf_dVmt + + 2 * dQt_dVmt * dQt_dVmf + dSt2_dQt * d2Qt_dVmf_dVmt; val[0] = val[1] = val[2] = val[3] = 0.0; @@ -2509,61 +2550,64 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // col[2] = xidxt[j] - nxsparse; // col[3] = xidxt[j] + 1 - nxsparse; - val[0] = lambda_dev[gloc] * d2Sf2_dVmf_dthetaf + - lambda_dev[gloc + 1] * d2St2_dVmf_dthetaf; + lambda_dev[gloc + 1] * d2St2_dVmf_dthetaf; val[1] = lambda_dev[gloc] * d2Sf2_dVmf_dVmf + - lambda_dev[gloc + 1] * d2St2_dVmf_dVmf; + lambda_dev[gloc + 1] * d2St2_dVmf_dVmf; val[2] = lambda_dev[gloc] * d2Sf2_dVmf_dthetat + - lambda_dev[gloc + 1] * d2St2_dVmf_dthetat; + lambda_dev[gloc + 1] * d2St2_dVmf_dthetat; val[3] = lambda_dev[gloc] * d2Sf2_dVmf_dVmt + - lambda_dev[gloc + 1] * d2St2_dVmf_dVmt; + lambda_dev[gloc + 1] * d2St2_dVmf_dVmt; // not in upper triangle // RAJA::atomicAdd // (&MHSS_dev[fbusidx + 2], val[0]); - RAJA::atomicAdd - (&MHSS_dev[fbusidx + 2], val[1]); + RAJA::atomicAdd(&MHSS_dev[fbusidx + 2], val[1]); if (xidxt[j] > xidxf[j]) { - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 2], val[2]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 3], val[3]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 2], + val[2]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 3], + val[3]); } - // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + // ierr = + // MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); - double d2Sf2_dthetat_dthetaf, d2Sf2_dthetat_dVmf, d2Sf2_dthetat_dthetat, - d2Sf2_dthetat_dVmt; - double d2St2_dthetat_dthetaf, d2St2_dthetat_dVmf, d2St2_dthetat_dthetat, - d2St2_dthetat_dVmt; + double d2Sf2_dthetat_dthetaf, d2Sf2_dthetat_dVmf, + d2Sf2_dthetat_dthetat, d2Sf2_dthetat_dVmt; + double d2St2_dthetat_dthetaf, d2St2_dthetat_dVmf, + d2St2_dthetat_dthetat, d2St2_dthetat_dVmt; - d2Sf2_dthetat_dthetaf = - 2 * dPf_dthetaf * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dthetaf + - 2 * dQf_dthetat * dQf_dthetaf + dSf2_dQf * d2Qf_dthetat_dthetaf; + d2Sf2_dthetat_dthetaf = 2 * dPf_dthetaf * dPf_dthetat + + dSf2_dPf * d2Pf_dthetat_dthetaf + + 2 * dQf_dthetat * dQf_dthetaf + + dSf2_dQf * d2Qf_dthetat_dthetaf; d2Sf2_dthetat_dVmf = - 2 * dPf_dVmf * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmf + - 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dthetat_dVmf; - d2Sf2_dthetat_dthetat = - 2 * dPf_dthetat * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dthetat + - 2 * dQf_dthetat * dQf_dthetat + dSf2_dQf * d2Qf_dthetat_dthetat; + 2 * dPf_dVmf * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmf + + 2 * dQf_dthetat * dQf_dVmf + dSf2_dQf * d2Qf_dthetat_dVmf; + d2Sf2_dthetat_dthetat = 2 * dPf_dthetat * dPf_dthetat + + dSf2_dPf * d2Pf_dthetat_dthetat + + 2 * dQf_dthetat * dQf_dthetat + + dSf2_dQf * d2Qf_dthetat_dthetat; d2Sf2_dthetat_dVmt = - 2 * dPf_dVmt * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmt + - 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dthetat_dVmt; + 2 * dPf_dVmt * dPf_dthetat + dSf2_dPf * d2Pf_dthetat_dVmt + + 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dthetat_dVmt; - d2St2_dthetat_dthetaf = - 2 * dPt_dthetaf * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dthetaf + - 2 * dQt_dthetaf * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dthetaf; + d2St2_dthetat_dthetaf = 2 * dPt_dthetaf * dPt_dthetat + + dSt2_dPt * d2Pt_dthetat_dthetaf + + 2 * dQt_dthetaf * dQt_dthetat + + dSt2_dQt * d2Qt_dthetat_dthetaf; d2St2_dthetat_dVmf = - 2 * dPt_dVmf * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmf + - 2 * dQt_dVmf * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmf; - d2St2_dthetat_dthetat = - 2 * dPt_dthetat * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dthetat + - 2 * dQt_dthetat * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dthetat; + 2 * dPt_dVmf * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmf + + 2 * dQt_dVmf * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmf; + d2St2_dthetat_dthetat = 2 * dPt_dthetat * dPt_dthetat + + dSt2_dPt * d2Pt_dthetat_dthetat + + 2 * dQt_dthetat * dQt_dthetat + + dSt2_dQt * d2Qt_dthetat_dthetat; d2St2_dthetat_dVmt = - 2 * dPt_dVmt * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmt + - 2 * dQt_dVmt * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmt; + 2 * dPt_dVmt * dPt_dthetat + dSt2_dPt * d2Pt_dthetat_dVmt + + 2 * dQt_dVmt * dQt_dthetat + dSt2_dQt * d2Qt_dthetat_dVmt; val[0] = val[1] = val[2] = val[3] = 0.0; @@ -2573,55 +2617,57 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // col[2] = xidxt[j] - nxsparse; // col[3] = xidxt[j] + 1 - nxsparse; - val[0] = lambda_dev[gloc] * d2Sf2_dthetat_dthetaf + - lambda_dev[gloc + 1] * d2St2_dthetat_dthetaf; + lambda_dev[gloc + 1] * d2St2_dthetat_dthetaf; val[1] = lambda_dev[gloc] * d2Sf2_dthetat_dVmf + - lambda_dev[gloc + 1] * d2St2_dthetat_dVmf; + lambda_dev[gloc + 1] * d2St2_dthetat_dVmf; val[2] = lambda_dev[gloc] * d2Sf2_dthetat_dthetat + - lambda_dev[gloc + 1] * d2St2_dthetat_dthetat; + lambda_dev[gloc + 1] * d2St2_dthetat_dthetat; val[3] = lambda_dev[gloc] * d2Sf2_dthetat_dVmt + - lambda_dev[gloc + 1] * d2St2_dthetat_dVmt; + lambda_dev[gloc + 1] * d2St2_dthetat_dVmt; - RAJA::atomicAdd - (&MHSS_dev[tbusidx + 0], val[2]); - RAJA::atomicAdd - (&MHSS_dev[tbusidx + 1], val[3]); + RAJA::atomicAdd(&MHSS_dev[tbusidx + 0], val[2]); + RAJA::atomicAdd(&MHSS_dev[tbusidx + 1], val[3]); if (xidxf[j] > xidxt[j]) { - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 0], val[0]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 1], val[1]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 0], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 1], + val[1]); } - // ierr = MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); + // ierr = + // MatSetValues(H,1,row,4,col,val,ADD_VALUES);CHKERRQ(ierr); double d2Sf2_dVmt_dthetaf, d2Sf2_dVmt_dVmf, d2Sf2_dVmt_dthetat, - d2Sf2_dVmt_dVmt; + d2Sf2_dVmt_dVmt; double d2St2_dVmt_dthetaf, d2St2_dVmt_dVmf, d2St2_dVmt_dthetat, - d2St2_dVmt_dVmt; + d2St2_dVmt_dVmt; d2Sf2_dVmt_dthetaf = - 2 * dPf_dthetaf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetaf + - 2 * dQf_dthetaf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetaf; - d2Sf2_dVmt_dVmf = 2 * dPf_dVmf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmf + - 2 * dQf_dVmf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmf; + 2 * dPf_dthetaf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetaf + + 2 * dQf_dthetaf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetaf; + d2Sf2_dVmt_dVmf = + 2 * dPf_dVmf * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmf + + 2 * dQf_dVmf * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmf; d2Sf2_dVmt_dthetat = - 2 * dPf_dthetat * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetat + - 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetat; - d2Sf2_dVmt_dVmt = 2 * dPf_dVmt * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmt + - 2 * dQf_dVmt * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmt; + 2 * dPf_dthetat * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dthetat + + 2 * dQf_dthetat * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dthetat; + d2Sf2_dVmt_dVmt = + 2 * dPf_dVmt * dPf_dVmt + dSf2_dPf * d2Pf_dVmt_dVmt + + 2 * dQf_dVmt * dQf_dVmt + dSf2_dQf * d2Qf_dVmt_dVmt; d2St2_dVmt_dthetaf = - 2 * dPt_dthetaf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetaf + - 2 * dQt_dthetaf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetaf; - d2St2_dVmt_dVmf = 2 * dPt_dVmf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmf + - 2 * dQt_dVmf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmf; + 2 * dPt_dthetaf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetaf + + 2 * dQt_dthetaf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetaf; + d2St2_dVmt_dVmf = + 2 * dPt_dVmf * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmf + + 2 * dQt_dVmf * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmf; d2St2_dVmt_dthetat = - 2 * dPt_dthetat * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetat + - 2 * dQt_dthetat * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetat; - d2St2_dVmt_dVmt = 2 * dPt_dVmt * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmt + - 2 * dQt_dVmt * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmt; + 2 * dPt_dthetat * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dthetat + + 2 * dQt_dthetat * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dthetat; + d2St2_dVmt_dVmt = + 2 * dPt_dVmt * dPt_dVmt + dSt2_dPt * d2Pt_dVmt_dVmt + + 2 * dQt_dVmt * dQt_dVmt + dSt2_dQt * d2Qt_dVmt_dVmt; val[0] = val[1] = val[2] = val[3] = 0.0; @@ -2632,45 +2678,45 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( // col[3] = xidxt[j] + 1 - nxsparse; val[0] = lambda_dev[gloc] * d2Sf2_dVmt_dthetaf + - lambda_dev[gloc + 1] * d2St2_dVmt_dthetaf; + lambda_dev[gloc + 1] * d2St2_dVmt_dthetaf; val[1] = lambda_dev[gloc] * d2Sf2_dVmt_dVmf + - lambda_dev[gloc + 1] * d2St2_dVmt_dVmf; + lambda_dev[gloc + 1] * d2St2_dVmt_dVmf; val[2] = lambda_dev[gloc] * d2Sf2_dVmt_dthetat + - lambda_dev[gloc + 1] * d2St2_dVmt_dthetat; + lambda_dev[gloc + 1] * d2St2_dVmt_dthetat; val[3] = lambda_dev[gloc] * d2Sf2_dVmt_dVmt + - lambda_dev[gloc + 1] * d2St2_dVmt_dVmt; + lambda_dev[gloc + 1] * d2St2_dVmt_dVmt; // not in upper triangle // RAJA::atomicAdd // (&MHSS_dev[tbusidx + 2], val[2]); - RAJA::atomicAdd - (&MHSS_dev[tbusidx + 2], val[3]); - - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 2], val[0]); - RAJA::atomicAdd - (&MHSS_dev[ln_hessp_idx[j] + 3], val[1]); + RAJA::atomicAdd(&MHSS_dev[tbusidx + 2], val[3]); + + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 2], + val[0]); + RAJA::atomicAdd(&MHSS_dev[ln_hessp_idx[j] + 3], + val[1]); }); } - + int *iperm = pbpolrajahiopsparse->idx_hess_dev_; - int *ipermout = (int *)d_allocator_.allocate(opflow->nnz_hesssp*sizeof(int)); + int *ipermout = + (int *)d_allocator_.allocate(opflow->nnz_hesssp * sizeof(int)); resmgr.copy(ipermout, iperm); - - RAJA::stable_sort_pairs - (RAJA::make_span(ipermout, opflow->nnz_hesssp), - RAJA::make_span(MHSS_dev, opflow->nnz_hesssp), - RAJA::operators::less{}); + + RAJA::stable_sort_pairs( + RAJA::make_span(ipermout, opflow->nnz_hesssp), + RAJA::make_span(MHSS_dev, opflow->nnz_hesssp), + RAJA::operators::less{}); if (debugmsg) { - PrintTriplets("Hessian Values (GPU):", - opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, MHSS_dev); + PrintTriplets("Hessian Values (GPU):", opflow->nnz_hesssp, NULL, iHSS_dev, + jHSS_dev, MHSS_dev); } - + d_allocator_.deallocate(ipermout); - - resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp*sizeof(double)); + + resmgr.memset(MHSS_dev, 0, opflow->nnz_hesssp * sizeof(double)); ierr = VecGetArray(opflow->X, &x); CHKERRQ(ierr); @@ -2751,11 +2797,10 @@ PetscErrorCode OPFLOWComputeSparseHessian_PBPOLRAJAHIOPSPARSE( resmgr.copy(MHSS_dev, pbpolrajahiopsparse->val_hess); if (debugmsg) { - PrintTriplets("Hessian Values:", - opflow->nnz_hesssp, NULL, iHSS_dev, jHSS_dev, MHSS_dev); + PrintTriplets("Hessian Values:", opflow->nnz_hesssp, NULL, iHSS_dev, + jHSS_dev, MHSS_dev); } - - } + } PetscFunctionReturn(0); } diff --git a/src/opflow/model/power_bal_polar/pbpol.cpp b/src/opflow/model/power_bal_polar/pbpol.cpp index c5ee4e5f..1c6c05c5 100644 --- a/src/opflow/model/power_bal_polar/pbpol.cpp +++ b/src/opflow/model/power_bal_polar/pbpol.cpp @@ -1614,29 +1614,24 @@ PetscErrorCode OPFLOWModelSetNumConstraints_PBPOL(OPFLOW opflow, PetscFunctionReturn(0); } - -static PetscErrorCode -MatSetValues_and_Print(char code, Mat M, int nrow, int row[], int ncol, int col[], - PetscScalar val[], InsertMode mode) -{ +static PetscErrorCode MatSetValues_and_Print(char code, Mat M, int nrow, + int row[], int ncol, int col[], + PetscScalar val[], + InsertMode mode) { for (int r = 0, i = 0; r < nrow; ++r) { for (int c = 0; c < ncol; ++c) { if (col[c] >= row[r]) { - std::cout << "M" << code << ": " - << std::setw(5) << std::right << row[r] << " " - << std::setw(5) << std::right << col[c] - << std::setw(12) << std::right - << std::scientific << std::setprecision(3) - << val[i] - << std::endl; + std::cout << "M" << code << ": " << std::setw(5) << std::right << row[r] + << " " << std::setw(5) << std::right << col[c] + << std::setw(12) << std::right << std::scientific + << std::setprecision(3) << val[i] << std::endl; } i++; } } return MatSetValues(M, nrow, row, ncol, col, val, mode); } - - + /* OPFLOWComputeEqualityConstraintsHessian - Computes the Hessian for the equality constraints function part @@ -2096,14 +2091,16 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, val[2] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); // ierr = MatSetValues(H, 1, row, 3, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print('G', H, 1, row, 3, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 3, col, val, ADD_VALUES); // df1_ddelPg = -(Pg - gen->pt); // df2_ddelPg = gen->pb - Pg; row[0] = gen->startxpdevloc; val[0] = -lambda[gloc] - lambda[gloc + 1]; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); // df1_ddelP = gen->apf*(Pg - gen->pt); @@ -2111,7 +2108,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, row[0] = ps->startxloc; val[0] = gen->apf * (lambda[gloc] + lambda[gloc + 1]); // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2148,7 +2146,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] + lambda[gloc + 1]); // lam_eq1*d2eq1_dQg_dV + lam_eq2*d2eq2_dQg_dV // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); row[0] = xloc + 1; col[0] = loc + 1; @@ -2156,7 +2155,8 @@ PetscErrorCode OPFLOWComputeInequalityConstraintsHessian_PBPOL(OPFLOW opflow, lambda[gloc] + lambda[gloc + 1]); // lam_eq1* d2eq1_dQg_dV + lam_eq2*d2eq2_dV_dQg // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); - ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); + ierr = + MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); CHKERRQ(ierr); } } @@ -2681,8 +2681,8 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, // Reactive power is usually not included in the objective, // but let's make sure there's an entry for it - row[0] = xlocglob+1; - col[0] = xlocglob+1; + row[0] = xlocglob + 1; + col[0] = xlocglob + 1; val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES); @@ -2699,8 +2699,8 @@ PetscErrorCode OPFLOWComputeObjectiveHessian_PBPOL(OPFLOW opflow, Vec X, // Reactive power is usually not included in the objective, // but let's make sure there's an entry for it - row[0] = xlocglob+1; - col[0] = xlocglob+1; + row[0] = xlocglob + 1; + col[0] = xlocglob + 1; val[0] = 0.0; // ierr = MatSetValues(H, 1, row, 1, col, val, ADD_VALUES); ierr = MatSetValues_and_Print('G', H, 1, row, 1, col, val, ADD_VALUES);