get slow but correct results for intermediate
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -43,3 +43,4 @@ include/divisible/*
|
|||||||
build/
|
build/
|
||||||
.cache/
|
.cache/
|
||||||
.vscode/
|
.vscode/
|
||||||
|
assignment/
|
||||||
@@ -34,6 +34,7 @@ elseif (WIN32)
|
|||||||
set(RENDERDOC_PATH "C:\\Program Files\\RenderDoc")
|
set(RENDERDOC_PATH "C:\\Program Files\\RenderDoc")
|
||||||
endif()
|
endif()
|
||||||
else ()
|
else ()
|
||||||
|
set(RENDERDOC_PATH "/usr/lib/")
|
||||||
#LINUX PATH HERE
|
#LINUX PATH HERE
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|||||||
BIN
device_selection_cache
Normal file
BIN
device_selection_cache
Normal file
Binary file not shown.
@@ -12,10 +12,12 @@ layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
|||||||
layout(push_constant) uniform PushStruct {
|
layout(push_constant) uniform PushStruct {
|
||||||
uint size;
|
uint size;
|
||||||
uint stride;
|
uint stride;
|
||||||
} p;
|
}p;
|
||||||
|
|
||||||
layout(binding = 0) buffer inoutBufer {uint v[];};
|
layout(binding = 0) buffer inoutBufer {uint v[];};
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
// TODO: Kernel implementation
|
uint idx = gl_GlobalInvocationID.x;
|
||||||
|
|
||||||
|
v[idx] += v[idx + p.stride];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,11 @@ layout(push_constant) uniform PushStruct {
|
|||||||
|
|
||||||
layout(binding = 0) buffer inoutBufer { uint v[]; };
|
layout(binding = 0) buffer inoutBufer { uint v[]; };
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
// TODO: Kernel implementation
|
uint gIDx = gl_GlobalInvocationID.x;
|
||||||
|
if (gIDx >= p.size) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
v[gIDx] = v[gIDx] + v[gIDx + p.offset];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,5 +19,4 @@ layout(binding = 0) buffer inBuffer { uint v[]; };
|
|||||||
layout(binding = 1) buffer outBufer { uint g_v[]; };
|
layout(binding = 1) buffer outBufer { uint g_v[]; };
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
// TODO: Kernel implementation
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,6 +49,25 @@ void A2Task1SolutionInterleaved::compute()
|
|||||||
|
|
||||||
// TODO: Implement reduction with interleaved addressing
|
// TODO: Implement reduction with interleaved addressing
|
||||||
|
|
||||||
|
cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline);
|
||||||
|
cb.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
|
||||||
|
|
||||||
|
uint inputSize = mpInput->size();
|
||||||
|
PushConstant pc;
|
||||||
|
pc.size = inputSize;
|
||||||
|
|
||||||
|
for (uint stride = inputSize / 2; stride > 0; stride >>= 1)
|
||||||
|
{
|
||||||
|
pc.stride = stride;
|
||||||
|
cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc);
|
||||||
|
|
||||||
|
cb.dispatch((stride + workGroupSize - 1) / workGroupSize, 1, 1);
|
||||||
|
|
||||||
|
vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);
|
||||||
|
cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader,
|
||||||
|
vk::DependencyFlags(), 1, &memoryBarrier, 0, nullptr, 0, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
cb.end();
|
cb.end();
|
||||||
|
|
||||||
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include "Sequential.h"
|
#include "Sequential.h"
|
||||||
|
|
||||||
#include "host_timer.h"
|
#include "host_timer.h"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
A2Task1SolutionSequential::A2Task1SolutionSequential(AppResources &app, uint workGroupSize) :
|
A2Task1SolutionSequential::A2Task1SolutionSequential(AppResources &app, uint workGroupSize) :
|
||||||
app(app), workGroupSize(workGroupSize) {}
|
app(app), workGroupSize(workGroupSize) {}
|
||||||
@@ -47,7 +48,27 @@ void A2Task1SolutionSequential::compute()
|
|||||||
|
|
||||||
cb.begin(beginInfo);
|
cb.begin(beginInfo);
|
||||||
|
|
||||||
// TODO: Implement reduction with sequential addressing
|
cb.resetQueryPool(app.queryPool, 0, 1);
|
||||||
|
|
||||||
|
cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline);
|
||||||
|
cb.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
|
||||||
|
|
||||||
|
uint inputSize = mpInput->size();
|
||||||
|
PushConstant pc;
|
||||||
|
pc.size = inputSize;
|
||||||
|
pc.offset = inputSize / 2;
|
||||||
|
|
||||||
|
for (; pc.offset != 0 ; pc.offset = pc.offset / 2)
|
||||||
|
{
|
||||||
|
cb.pushConstants(pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant), &pc);
|
||||||
|
|
||||||
|
uint groupCount = (pc.size + pc.offset + workGroupSize - 1) / workGroupSize;
|
||||||
|
cb.dispatch(groupCount, 1, 1);
|
||||||
|
|
||||||
|
vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead);
|
||||||
|
cb.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader,
|
||||||
|
vk::DependencyFlags(), 1, &memoryBarrier, 0, nullptr, 0, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
cb.end();
|
cb.end();
|
||||||
|
|
||||||
|
|||||||
14
src/main.cpp
14
src/main.cpp
@@ -45,14 +45,14 @@ void run_A2_task1(AppResources &app){
|
|||||||
A2Task1SolutionSequential sequentialSolution(app, 128);
|
A2Task1SolutionSequential sequentialSolution(app, 128);
|
||||||
evaluateTask1Solution(&sequentialSolution, "Sequential");
|
evaluateTask1Solution(&sequentialSolution, "Sequential");
|
||||||
|
|
||||||
A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv");
|
// A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv");
|
||||||
evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition");
|
// evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition");
|
||||||
|
|
||||||
A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv");
|
// A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv");
|
||||||
evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll");
|
// evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll");
|
||||||
|
|
||||||
A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv");
|
// A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv");
|
||||||
evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic");
|
// evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic");
|
||||||
}
|
}
|
||||||
void run_A2_task2(AppResources& app){
|
void run_A2_task2(AppResources& app){
|
||||||
|
|
||||||
@@ -111,7 +111,7 @@ int main()
|
|||||||
|
|
||||||
run_A2_task1(app);
|
run_A2_task1(app);
|
||||||
|
|
||||||
run_A2_task2(app);
|
// run_A2_task2(app);
|
||||||
|
|
||||||
renderdoc::endCapture();
|
renderdoc::endCapture();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user