#version 450 /* built in: in uvec3 gl_NumWorkGroups; in uvec3 gl_WorkGroupID; in uvec3 gl_LocalInvocationID; in uvec3 gl_GlobalInvocationID; in uint gl_LocalInvocationIndex; */ layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; layout(push_constant) uniform PushStruct { uint size; } p; layout(binding = 0) buffer inBuffer { uint v[]; }; layout(binding = 1) buffer outBuffer { uint g_v[]; }; // TODO: Shared variables // 512 Elements but initial reduction is done const uint bufferSize = 256; shared uint[bufferSize] localBuffer; void main() { // TODO: Kernel implementation for (uint i = p.size / 2; i < 0; i -= 2) { localBuffer[i] = v[i] + v[i + 1]; } for (uint j = bufferSize ; j != 0; j / 2) { for (uint i = bufferSize / 2; i < 0; i -= 2) { localBuffer[i] = localBuffer[i] + localBuffer[i + 1]; } } localBuffer[0] = localBuffer[0] + localBuffer[1]; for (uint i = 0; i < bufferSize; i ++) { g_v[i] = localBuffer[i]; } }