stop being dumb dumb

This commit is contained in:
2025-12-27 22:01:42 +01:00
parent c957b833d3
commit 1884daea1f
4 changed files with 12 additions and 7 deletions

View File

@@ -17,7 +17,11 @@ layout(push_constant) uniform PushStruct {
layout(binding = 0) buffer inoutBufer {uint v[];}; layout(binding = 0) buffer inoutBufer {uint v[];};
void main() { void main() {
uint idx = gl_GlobalInvocationID.x; uint idx = gl_GlobalInvocationID.x * 2 * p.stride;
if (idx + p.stride >= p.size) {
return;
}
v[idx] += v[idx + p.stride]; v[idx] += v[idx + p.stride];
} }

View File

@@ -18,9 +18,10 @@ layout(binding = 0) buffer inoutBufer { uint v[]; };
void main() { void main() {
uint gIDx = gl_GlobalInvocationID.x; uint gIDx = gl_GlobalInvocationID.x;
if (gIDx >= p.size) {
if (gIDx + p.offset >= p.size) {
return; return;
} }
v[gIDx] = v[gIDx] + v[gIDx + p.offset]; v[gIDx] += v[gIDx + p.offset];
} }

View File

@@ -56,12 +56,12 @@ void A2Task1SolutionInterleaved::compute()
PushConstant pc; PushConstant pc;
pc.size = inputSize; pc.size = inputSize;
for (uint stride = inputSize / 2; stride > 0; stride >>= 1) for (uint stride = 1; stride < inputSize; stride <<= 1)
{ {
pc.stride = stride; pc.stride = stride;
cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc); cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc);
cb.dispatch((stride + workGroupSize - 1) / workGroupSize, 1, 1); cb.dispatch((inputSize / (stride * 2) + workGroupSize - 1) / workGroupSize, 1, 1);
vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead); vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);
cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader, cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader,

View File

@@ -62,7 +62,7 @@ void A2Task1SolutionSequential::compute()
{ {
cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc); cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc);
uint groupCount = (pc.size + pc.offset + workGroupSize - 1) / workGroupSize; uint groupCount = (pc.offset + workGroupSize - 1) / workGroupSize;
cb.dispatch(groupCount, 1, 1); cb.dispatch(groupCount, 1, 1);
vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead); vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);