get slow but correct results for intermediate

This commit is contained in:
2025-12-27 21:29:50 +01:00
parent c86af91a92
commit c957b833d3
9 changed files with 61 additions and 13 deletions

1
.gitignore vendored
View File

@@ -43,3 +43,4 @@ include/divisible/*
build/ build/
.cache/ .cache/
.vscode/ .vscode/
assignment/

View File

@@ -34,6 +34,7 @@ elseif (WIN32)
set(RENDERDOC_PATH "C:\\Program Files\\RenderDoc") set(RENDERDOC_PATH "C:\\Program Files\\RenderDoc")
endif() endif()
else () else ()
set(RENDERDOC_PATH "/usr/lib/")
#LINUX PATH HERE #LINUX PATH HERE
endif () endif ()

BIN
device_selection_cache Normal file

Binary file not shown.

View File

@@ -12,10 +12,12 @@ layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
layout(push_constant) uniform PushStruct { layout(push_constant) uniform PushStruct {
uint size; uint size;
uint stride; uint stride;
} p; }p;
layout(binding = 0) buffer inoutBufer {uint v[];}; layout(binding = 0) buffer inoutBufer {uint v[];};
void main() { void main() {
// TODO: Kernel implementation uint idx = gl_GlobalInvocationID.x;
v[idx] += v[idx + p.stride];
} }

View File

@@ -16,6 +16,11 @@ layout(push_constant) uniform PushStruct {
layout(binding = 0) buffer inoutBufer { uint v[]; }; layout(binding = 0) buffer inoutBufer { uint v[]; };
void main() { void main() {
// TODO: Kernel implementation uint gIDx = gl_GlobalInvocationID.x;
if (gIDx >= p.size) {
return;
}
v[gIDx] = v[gIDx] + v[gIDx + p.offset];
} }

View File

@@ -19,5 +19,4 @@ layout(binding = 0) buffer inBuffer { uint v[]; };
layout(binding = 1) buffer outBufer { uint g_v[]; }; layout(binding = 1) buffer outBufer { uint g_v[]; };
void main() { void main() {
// TODO: Kernel implementation
} }

View File

@@ -49,6 +49,25 @@ void A2Task1SolutionInterleaved::compute()
// TODO: Implement reduction with interleaved addressing // TODO: Implement reduction with interleaved addressing
cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline);
cb.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
uint inputSize = mpInput->size();
PushConstant pc;
pc.size = inputSize;
for (uint stride = inputSize / 2; stride > 0; stride >>= 1)
{
pc.stride = stride;
cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc);
cb.dispatch((stride + workGroupSize - 1) / workGroupSize, 1, 1);
vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);
cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlags(), 1, &memoryBarrier, 0, nullptr, 0, nullptr);
}
cb.end(); cb.end();
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb); vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);

View File

@@ -1,6 +1,7 @@
#include "Sequential.h" #include "Sequential.h"
#include "host_timer.h" #include "host_timer.h"
#include <iostream>
A2Task1SolutionSequential::A2Task1SolutionSequential(AppResources &app, uint workGroupSize) : A2Task1SolutionSequential::A2Task1SolutionSequential(AppResources &app, uint workGroupSize) :
app(app), workGroupSize(workGroupSize) {} app(app), workGroupSize(workGroupSize) {}
@@ -47,7 +48,27 @@ void A2Task1SolutionSequential::compute()
cb.begin(beginInfo); cb.begin(beginInfo);
// TODO: Implement reduction with sequential addressing cb.resetQueryPool(app.queryPool, 0, 1);
cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline);
cb.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
uint inputSize = mpInput->size();
PushConstant pc;
pc.size = inputSize;
pc.offset = inputSize / 2;
for (; pc.offset != 0 ; pc.offset = pc.offset / 2)
{
cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc);
uint groupCount = (pc.size + pc.offset + workGroupSize - 1) / workGroupSize;
cb.dispatch(groupCount, 1, 1);
vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);
cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlags(), 1, &memoryBarrier, 0, nullptr, 0, nullptr);
}
cb.end(); cb.end();

View File

@@ -45,14 +45,14 @@ void run_A2_task1(AppResources &app){
A2Task1SolutionSequential sequentialSolution(app, 128); A2Task1SolutionSequential sequentialSolution(app, 128);
evaluateTask1Solution(&sequentialSolution, "Sequential"); evaluateTask1Solution(&sequentialSolution, "Sequential");
A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv"); // A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv");
evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition"); // evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition");
A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv"); // A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv");
evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll"); // evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll");
A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv"); // A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv");
evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic"); // evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic");
} }
void run_A2_task2(AppResources& app){ void run_A2_task2(AppResources& app){
@@ -111,7 +111,7 @@ int main()
run_A2_task1(app); run_A2_task1(app);
run_A2_task2(app); // run_A2_task2(app);
renderdoc::endCapture(); renderdoc::endCapture();