From c957b833d38e42a0a75c646ee75a266cfec441b3 Mon Sep 17 00:00:00 2001 From: Arif Hasanic Date: Sat, 27 Dec 2025 21:29:50 +0100 Subject: [PATCH] get slow but correct results for intermediate --- .gitignore | 1 + CMakeLists.txt | 1 + device_selection_cache | Bin 0 -> 8 bytes shaders/A2Task1Interleaved.comp | 6 ++++-- shaders/A2Task1Sequential.comp | 9 +++++++-- shaders/A2Task2Naive.comp | 1 - src/A2Task1Solution/Interleaved.cpp | 19 +++++++++++++++++++ src/A2Task1Solution/Sequential.cpp | 23 ++++++++++++++++++++++- src/main.cpp | 14 +++++++------- 9 files changed, 61 insertions(+), 13 deletions(-) create mode 100644 device_selection_cache diff --git a/.gitignore b/.gitignore index 122452b..a9c2a66 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ include/divisible/* build/ .cache/ .vscode/ +assignment/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b6d81c..16c36ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,7 @@ elseif (WIN32) set(RENDERDOC_PATH "C:\\Program Files\\RenderDoc") endif() else () + set(RENDERDOC_PATH "/usr/lib/") #LINUX PATH HERE endif () diff --git a/device_selection_cache b/device_selection_cache new file mode 100644 index 0000000000000000000000000000000000000000..59e0b5a358c46e6ac5440c3e198029fefd8a6a11 GIT binary patch literal 8 Pcmca7z`)R=!oUCk3FHBn literal 0 HcmV?d00001 diff --git a/shaders/A2Task1Interleaved.comp b/shaders/A2Task1Interleaved.comp index 440da96..cc8be56 100644 --- a/shaders/A2Task1Interleaved.comp +++ b/shaders/A2Task1Interleaved.comp @@ -12,10 +12,12 @@ layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; layout(push_constant) uniform PushStruct { uint size; uint stride; -} p; +}p; layout(binding = 0) buffer inoutBufer {uint v[];}; void main() { - // TODO: Kernel implementation + uint idx = gl_GlobalInvocationID.x; + + v[idx] += v[idx + p.stride]; } diff --git a/shaders/A2Task1Sequential.comp b/shaders/A2Task1Sequential.comp index f790993..6b33125 100644 --- a/shaders/A2Task1Sequential.comp +++ b/shaders/A2Task1Sequential.comp @@ -16,6 +16,11 @@ layout(push_constant) uniform PushStruct { layout(binding = 0) buffer inoutBufer { uint v[]; }; -void main() { - // TODO: Kernel implementation +void main() { + uint gIDx = gl_GlobalInvocationID.x; + if (gIDx >= p.size) { + return; + } + + v[gIDx] = v[gIDx] + v[gIDx + p.offset]; } diff --git a/shaders/A2Task2Naive.comp b/shaders/A2Task2Naive.comp index e60a643..6edfd98 100644 --- a/shaders/A2Task2Naive.comp +++ b/shaders/A2Task2Naive.comp @@ -19,5 +19,4 @@ layout(binding = 0) buffer inBuffer { uint v[]; }; layout(binding = 1) buffer outBufer { uint g_v[]; }; void main() { - // TODO: Kernel implementation } diff --git a/src/A2Task1Solution/Interleaved.cpp b/src/A2Task1Solution/Interleaved.cpp index 0781d21..88d2ad7 100644 --- a/src/A2Task1Solution/Interleaved.cpp +++ b/src/A2Task1Solution/Interleaved.cpp @@ -49,6 +49,25 @@ void A2Task1SolutionInterleaved::compute() // TODO: Implement reduction with interleaved addressing + cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline); + cb.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); + + uint inputSize = mpInput->size(); + PushConstant pc; + pc.size = inputSize; + + for (uint stride = inputSize / 2; stride > 0; stride >>= 1) + { + pc.stride = stride; + cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc); + + cb.dispatch((stride + workGroupSize - 1) / workGroupSize, 1, 1); + + vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead); + cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlags(), 1, &memoryBarrier, 0, nullptr, 0, nullptr); + } + cb.end(); vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb); diff --git a/src/A2Task1Solution/Sequential.cpp b/src/A2Task1Solution/Sequential.cpp index 1bc404d..0016f54 100644 --- a/src/A2Task1Solution/Sequential.cpp +++ b/src/A2Task1Solution/Sequential.cpp @@ -1,6 +1,7 @@ #include "Sequential.h" #include "host_timer.h" +#include A2Task1SolutionSequential::A2Task1SolutionSequential(AppResources &app, uint workGroupSize) : app(app), workGroupSize(workGroupSize) {} @@ -47,7 +48,27 @@ void A2Task1SolutionSequential::compute() cb.begin(beginInfo); - // TODO: Implement reduction with sequential addressing + cb.resetQueryPool(app.queryPool, 0, 1); + + cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline); + cb.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); + + uint inputSize = mpInput->size(); + PushConstant pc; + pc.size = inputSize; + pc.offset = inputSize / 2; + + for (; pc.offset != 0 ; pc.offset = pc.offset / 2) + { + cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc); + + uint groupCount = (pc.size + pc.offset + workGroupSize - 1) / workGroupSize; + cb.dispatch(groupCount, 1, 1); + + vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead); + cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlags(), 1, &memoryBarrier, 0, nullptr, 0, nullptr); + } cb.end(); diff --git a/src/main.cpp b/src/main.cpp index 49b4508..e1286f7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,14 +45,14 @@ void run_A2_task1(AppResources &app){ A2Task1SolutionSequential sequentialSolution(app, 128); evaluateTask1Solution(&sequentialSolution, "Sequential"); - A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv"); - evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition"); + // A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv"); + // evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition"); - A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv"); - evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll"); + // A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv"); + // evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll"); - A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv"); - evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic"); + // A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv"); + // evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic"); } void run_A2_task2(AppResources& app){ @@ -111,7 +111,7 @@ int main() run_A2_task1(app); - run_A2_task2(app); + // run_A2_task2(app); renderdoc::endCapture();