From 1884daea1f6db7269be074a6dd436227ae4fa370 Mon Sep 17 00:00:00 2001 From: Arif Hasanic Date: Sat, 27 Dec 2025 22:01:42 +0100 Subject: [PATCH] stop being dumb dumb --- shaders/A2Task1Interleaved.comp | 8 ++++++-- shaders/A2Task1Sequential.comp | 5 +++-- src/A2Task1Solution/Interleaved.cpp | 4 ++-- src/A2Task1Solution/Sequential.cpp | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/shaders/A2Task1Interleaved.comp b/shaders/A2Task1Interleaved.comp index cc8be56..fa4c372 100644 --- a/shaders/A2Task1Interleaved.comp +++ b/shaders/A2Task1Interleaved.comp @@ -17,7 +17,11 @@ layout(push_constant) uniform PushStruct { layout(binding = 0) buffer inoutBufer {uint v[];}; void main() { - uint idx = gl_GlobalInvocationID.x; - + uint idx = gl_GlobalInvocationID.x * 2 * p.stride; + + if (idx + p.stride >= p.size) { + return; + } + v[idx] += v[idx + p.stride]; } diff --git a/shaders/A2Task1Sequential.comp b/shaders/A2Task1Sequential.comp index 6b33125..afcb285 100644 --- a/shaders/A2Task1Sequential.comp +++ b/shaders/A2Task1Sequential.comp @@ -18,9 +18,10 @@ layout(binding = 0) buffer inoutBufer { uint v[]; }; void main() { uint gIDx = gl_GlobalInvocationID.x; - if (gIDx >= p.size) { + + if (gIDx + p.offset >= p.size) { return; } - v[gIDx] = v[gIDx] + v[gIDx + p.offset]; + v[gIDx] += v[gIDx + p.offset]; } diff --git a/src/A2Task1Solution/Interleaved.cpp b/src/A2Task1Solution/Interleaved.cpp index 88d2ad7..7b963a4 100644 --- a/src/A2Task1Solution/Interleaved.cpp +++ b/src/A2Task1Solution/Interleaved.cpp @@ -56,12 +56,12 @@ void A2Task1SolutionInterleaved::compute() PushConstant pc; pc.size = inputSize; - for (uint stride = inputSize / 2; stride > 0; stride >>= 1) + for (uint stride = 1; stride < inputSize; stride <<= 1) { pc.stride = stride; cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc); - cb.dispatch((stride + workGroupSize - 1) / workGroupSize, 1, 1); + cb.dispatch((inputSize / (stride * 2) + workGroupSize - 1) / workGroupSize, 1, 1); vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead); cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader, diff --git a/src/A2Task1Solution/Sequential.cpp b/src/A2Task1Solution/Sequential.cpp index 0016f54..f204433 100644 --- a/src/A2Task1Solution/Sequential.cpp +++ b/src/A2Task1Solution/Sequential.cpp @@ -62,7 +62,7 @@ void A2Task1SolutionSequential::compute() { cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc); - uint groupCount = (pc.size + pc.offset + workGroupSize - 1) / workGroupSize; + uint groupCount = (pc.offset + workGroupSize - 1) / workGroupSize; cb.dispatch(groupCount, 1, 1); vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);