Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions source/EngineGpuKernels/EditKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,68 @@ __global__ void cudaRolloutSelectionStep(SimulationData data, int* result)
}
}

__global__ void cudaMarkUnwrapStartCells(SimulationData data, float2 refPos, float radius)
{
auto const cellPartition = calcAllThreadsPartition(data.objects.cells.getNumEntries());

for (int index = cellPartition.startIndex; index <= cellPartition.endIndex; ++index) {
auto const& cell = data.objects.cells.at(index);

// Initialize all selected cells: tempValue.as_uint64 = 0 means not yet unwrapped
// After unwrapping, tempValue.as_uint64 = 1 means unwrapped, and shared1 holds the unwrapped position
if (cell->selected == 1 || cell->selected == 2) {
cell->tempValue.as_uint64 = 0;
cell->shared1 = cell->pos;

// Mark cells within radius of refPos as starting points (already unwrapped)
if (data.cellMap.getDistance(refPos, cell->pos) <= radius) {

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot 3 problems in this method: (1) There could be no cells within the radius of refPosition.
(2) There could be 2 cells of the same connected cell component within radius which should be actually far distant in unwrapped space.
(3) There could be many other connected cell components which are selected and would not be captured by this method.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in commit 9e18708. The revised implementation:

  1. No cells within radius: Now finds the closest cell to refPos regardless of distance using cudaFindClosestUnwrappedCell with atomicMin.

  2. Multiple cells in same component within radius: Only one cell is marked as starting point per component - the closest unprocessed cell.

  3. Multiple disconnected components: The outer loop continues until all selected cells are processed. Each component gets its own starting cell (the closest unprocessed cell to refPos at that iteration).

New kernels:

  • cudaInitUnwrapSelection: Initialize all selected cells
  • cudaFindClosestUnwrappedCell: Find closest unprocessed cell
  • cudaMarkUnwrapStartCell: Mark single cell as start
  • cudaUnwrapSelectionStep: BFS propagation (unchanged)

cell->tempValue.as_uint64 = 1;
cell->shared1 = cell->pos + data.cellMap.getCorrectionIncrement(refPos, cell->pos);
}
}
}
}

__global__ void cudaUnwrapSelectionStep(SimulationData data, int* result)
{
auto const cellPartition = calcAllThreadsPartition(data.objects.cells.getNumEntries());

for (int index = cellPartition.startIndex; index <= cellPartition.endIndex; ++index) {
auto const& cell = data.objects.cells.at(index);

// Only process selected cells that have already been unwrapped
if ((cell->selected == 1 || cell->selected == 2) && cell->tempValue.as_uint64 == 1) {
auto currentCell = cell;

// Propagate unwrapping to connected cells (heuristics similar to cudaRolloutSelectionStep)
for (int i = 0; i < 30; ++i) {
bool found = false;
for (int j = 0; j < currentCell->numConnections; ++j) {
auto connectedCell = currentCell->connections[j].cell;
// Check if connected cell is selected and not yet unwrapped
if ((connectedCell->selected == 1 || connectedCell->selected == 2) && connectedCell->tempValue.as_uint64 == 0) {
// Atomically mark as unwrapped to prevent race conditions
auto oldValue = atomicCAS(reinterpret_cast<unsigned long long int*>(&connectedCell->tempValue.as_uint64), 0ull, 1ull);
if (oldValue == 0) {
// Calculate the unwrapped position based on the current cell's unwrapped position
auto delta = connectedCell->pos - currentCell->pos;
data.cellMap.correctDirection(delta);
connectedCell->shared1 = currentCell->shared1 + delta;
currentCell = connectedCell;
found = true;
atomicExch(result, 1);
break;
}
}
}
if (!found) {
break;
}
}
}
}
}

__global__ void cudaApplyForce(SimulationData data, ApplyForceData applyData)
{
{
Expand Down
2 changes: 2 additions & 0 deletions source/EngineGpuKernels/EditKernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ __global__ void cudaSetSelection(AreaSelectionData selectionData, SimulationData
__global__ void cudaRemoveSelection(SimulationData data, bool onlyClusterSelection);
__global__ void cudaSwapSelection(float2 pos, float radius, SimulationData data);
__global__ void cudaRolloutSelectionStep(SimulationData data, int* result);
__global__ void cudaMarkUnwrapStartCells(SimulationData data, float2 refPos, float radius);
__global__ void cudaUnwrapSelectionStep(SimulationData data, int* result);
__global__ void cudaApplyForce(SimulationData data, ApplyForceData applyData);
__global__ void cudaResetSelectionResult(SelectionResult result);
__global__ void cudaCalcCellWithMinimalPosY(SimulationData data, unsigned long long int* minCellPosYAndIndex);
Expand Down
18 changes: 18 additions & 0 deletions source/EngineGpuKernels/EditKernelsService.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ _EditKernelsService::_EditKernelsService()
{
auto& memoryManager = CudaMemoryManager::getInstance();
memoryManager.acquireMemory(1, _cudaRolloutResult);
memoryManager.acquireMemory(1, _cudaUnwrapResult);
memoryManager.acquireMemory(1, _cudaSwitchResult);
memoryManager.acquireMemory(1, _cudaUpdateResult);
memoryManager.acquireMemory(1, _cudaRemoveResult);
Expand All @@ -26,6 +27,7 @@ _EditKernelsService::~_EditKernelsService()
{
auto& memoryManager = CudaMemoryManager::getInstance();
memoryManager.freeMemory(_cudaRolloutResult);
memoryManager.freeMemory(_cudaUnwrapResult);
memoryManager.freeMemory(_cudaSwitchResult);
memoryManager.freeMemory(_cudaUpdateResult);
memoryManager.freeMemory(_cudaRemoveResult);
Expand Down Expand Up @@ -294,6 +296,22 @@ void _EditKernelsService::rolloutSelection(CudaSettings const& gpuSettings, Simu
} while (1 == copyToHost(_cudaRolloutResult));
}

void _EditKernelsService::unwrapSelection(CudaSettings const& gpuSettings, SimulationData const& data, float2 const& refPos)
{
// Mark cells near refPos as starting points for unwrapping
// Use a small radius to find the initial cells in the connected component
KERNEL_CALL(cudaMarkUnwrapStartCells, data, refPos, 1.5f);
cudaDeviceSynchronize();

// Propagate unwrapping through connected cells until no more changes
do {
setValueToDevice(_cudaUnwrapResult, 0);
KERNEL_CALL(cudaUnwrapSelectionStep, data, _cudaUnwrapResult);
cudaDeviceSynchronize();

} while (1 == copyToHost(_cudaUnwrapResult));
}

void _EditKernelsService::applyCataclysm(CudaSettings const& gpuSettings, SimulationData const& data)
{
KERNEL_CALL(cudaApplyCataclysm, data);
Expand Down
2 changes: 2 additions & 0 deletions source/EngineGpuKernels/EditKernelsService.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public:
void applyForce(CudaSettings const& gpuSettings, SimulationData const& data, ApplyForceData const& applyData);

void rolloutSelection(CudaSettings const& gpuSettings, SimulationData const& data);
void unwrapSelection(CudaSettings const& gpuSettings, SimulationData const& data, float2 const& refPos);

void applyCataclysm(CudaSettings const& gpuSettings, SimulationData const& data);

Expand All @@ -43,6 +44,7 @@ private:

// Gpu memory
int* _cudaRolloutResult;
int* _cudaUnwrapResult;
int* _cudaSwitchResult;
int* _cudaUpdateResult;
int* _cudaRemoveResult;
Expand Down