init commit
This commit is contained in:
45
.gitignore
vendored
Normal file
45
.gitignore
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
# Compiled Object files
|
||||
**/.DS_Store
|
||||
*.slo
|
||||
*.lo
|
||||
*.o
|
||||
*.obj
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
*.dylib
|
||||
*.dll
|
||||
|
||||
# Fortran module files
|
||||
*.mod
|
||||
*.smod
|
||||
|
||||
# Compiled Static libraries
|
||||
*.lai
|
||||
*.la
|
||||
*.lib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
|
||||
**/cmake-build-debug
|
||||
**/CMakeCache.txt
|
||||
**/cmake_install.cmake
|
||||
**/install_manifest.txt
|
||||
**/CMakeFiles/
|
||||
**/CTestTestfile.cmake
|
||||
**/*.cbp
|
||||
**/CMakeScripts
|
||||
**/compile_commands.json
|
||||
|
||||
include/divisible/*
|
||||
|
||||
build/
|
||||
.cache/
|
||||
.vscode/
|
||||
77
CMakeLists.txt
Normal file
77
CMakeLists.txt
Normal file
@@ -0,0 +1,77 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
project(Assignment2)
|
||||
|
||||
function(add_shader TARGET SHADER)
|
||||
find_program(GLSLC glslc)
|
||||
|
||||
set(current-shader-path ${CMAKE_CURRENT_SOURCE_DIR}/${SHADER})
|
||||
set(current-output-path ${CMAKE_CURRENT_SOURCE_DIR}/build/${SHADER}.spv)
|
||||
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/build)
|
||||
get_filename_component(current-output-dir ${current-output-path} DIRECTORY)
|
||||
file(MAKE_DIRECTORY ${current-output-dir})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${current-output-path}
|
||||
COMMAND ${GLSLC} --target-env=vulkan1.2 -o ${current-output-path} ${current-shader-path}
|
||||
DEPENDS ${current-shader-path}
|
||||
IMPLICIT_DEPENDS CXX ${current-shader-path}
|
||||
VERBATIM)
|
||||
|
||||
# Make sure our build depends on this output.
|
||||
set_source_files_properties(${current-output-path} PROPERTIES GENERATED TRUE)
|
||||
target_sources(${TARGET} PRIVATE ${current-output-path})
|
||||
endfunction(add_shader)
|
||||
|
||||
find_package(Vulkan REQUIRED)
|
||||
|
||||
if (DEFINED ENV{RENDERDOC_PATH})
|
||||
set(RENDERDOC_PATH ENV{RENDERDOC_PATH})
|
||||
elseif (WIN32)
|
||||
if(EXISTS "C:\\Program Files\\RenderDoc")
|
||||
set(RENDERDOC_PATH "C:\\Program Files\\RenderDoc")
|
||||
endif()
|
||||
else ()
|
||||
#LINUX PATH HERE
|
||||
endif ()
|
||||
|
||||
set(SOURCE_FILE
|
||||
src/main.cpp
|
||||
src/task_common.cpp
|
||||
src/host_timer.cpp
|
||||
src/initialization.cpp
|
||||
src/renderdoc.cpp
|
||||
src/utils.cpp
|
||||
src/A2Task1.cpp
|
||||
src/A2Task2.cpp
|
||||
src/A2Task1Solution/Interleaved.cpp
|
||||
src/A2Task1Solution/KernelDecomposition.cpp
|
||||
src/A2Task1Solution/Sequential.cpp
|
||||
src/A2Task2Solution/KernelDecomposition.cpp
|
||||
src/A2Task2Solution/Naive.cpp
|
||||
)
|
||||
|
||||
add_compile_definitions(WORKING_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
|
||||
add_executable(${PROJECT_NAME} ${SOURCE_FILE})
|
||||
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task1Interleaved.comp)
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task1KernelDecomposition.comp)
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task1KernelDecompositionAtomic.comp)
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task1KernelDecompositionUnroll.comp)
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task1Sequential.comp)
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task2KernelDecomposition.comp)
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task2KernelDecompositionOffset.comp)
|
||||
add_shader(${PROJECT_NAME} shaders/A2Task2Naive.comp)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC ./include)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${Vulkan_INCLUDE_DIRS})
|
||||
target_link_libraries(${PROJECT_NAME} Vulkan::Vulkan)
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE)
|
||||
if (RENDERDOC_PATH)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE /usr/local/renderdoc_1.33/include)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${RENDERDOC_PATH})
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_RENDERDOC)
|
||||
endif ()
|
||||
37
include/A2Task1.h
Normal file
37
include/A2Task1.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
#include "helper.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
#include "task_common.h"
|
||||
|
||||
class A2Task1Solution {
|
||||
public:
|
||||
float mstime;
|
||||
|
||||
virtual void prepare(const std::vector<uint> &input) = 0;
|
||||
virtual void compute() = 0;
|
||||
virtual uint result() const = 0;
|
||||
virtual void cleanup() = 0;
|
||||
};
|
||||
|
||||
class A2Task1 {
|
||||
public:
|
||||
A2Task1(uint problemSize);
|
||||
A2Task1(std::vector<uint> input);
|
||||
|
||||
bool evaluateSolution(A2Task1Solution& solution);
|
||||
|
||||
private:
|
||||
void computeReference();
|
||||
|
||||
std::vector<uint> input;
|
||||
uint reference;
|
||||
};
|
||||
40
include/A2Task2.h
Normal file
40
include/A2Task2.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
#include "helper.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
#include "task_common.h"
|
||||
|
||||
class A2Task2Solution {
|
||||
public:
|
||||
float mstime;
|
||||
|
||||
virtual void prepare(const std::vector<uint> &input) = 0;
|
||||
virtual void compute() = 0;
|
||||
virtual std::vector<uint> result() const = 0;
|
||||
virtual void cleanup() = 0;
|
||||
};
|
||||
|
||||
class A2Task2 {
|
||||
public:
|
||||
A2Task2(uint problemSize);
|
||||
A2Task2(std::vector<uint> input);
|
||||
|
||||
bool evaluateSolution(A2Task2Solution& solution);
|
||||
size_t size() const {
|
||||
return input.size();
|
||||
}
|
||||
|
||||
private:
|
||||
void computeReference();
|
||||
|
||||
std::vector<uint> input;
|
||||
std::vector<uint> reference;
|
||||
};
|
||||
8
include/helper.h
Normal file
8
include/helper.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
#if defined(WORKING_DIR)
|
||||
inline std::string workingDir = std::string(WORKING_DIR) + "/";
|
||||
#else
|
||||
inline std::string workingDir = std::string("./");
|
||||
#endif
|
||||
15
include/host_timer.h
Normal file
15
include/host_timer.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
|
||||
class HostTimer {
|
||||
private:
|
||||
using clock = std::chrono::high_resolution_clock;
|
||||
|
||||
clock::time_point start;
|
||||
|
||||
public:
|
||||
HostTimer();
|
||||
void reset();
|
||||
double elapsed() const;
|
||||
};
|
||||
47
include/initialization.h
Normal file
47
include/initialization.h
Normal file
@@ -0,0 +1,47 @@
|
||||
#ifndef INITIALIZATION
|
||||
#define INITIALIZATION
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <cstring>
|
||||
|
||||
struct AppResources
|
||||
{
|
||||
vk::Instance instance;
|
||||
vk::DebugUtilsMessengerEXT dbgUtilsMgr;
|
||||
vk::PhysicalDevice pDevice;
|
||||
vk::PhysicalDeviceProperties2 pDeviceProperties;
|
||||
vk::PhysicalDeviceSubgroupProperties pDeviceSubgroupProperties;
|
||||
|
||||
vk::Device device;
|
||||
vk::Queue computeQueue, transferQueue;
|
||||
uint32_t cQ, tQ;
|
||||
vk::CommandPool computeCommandPool, transferCommandPool;
|
||||
vk::QueryPool queryPool;
|
||||
|
||||
void destroy();
|
||||
};
|
||||
|
||||
VKAPI_ATTR VkBool32 VKAPI_CALL
|
||||
debugUtilsMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
||||
VkDebugUtilsMessageTypeFlagsEXT messageTypes,
|
||||
VkDebugUtilsMessengerCallbackDataEXT const *pCallbackData,
|
||||
void * /*pUserData*/);
|
||||
vk::DebugUtilsMessengerCreateInfoEXT makeDebugUtilsMessengerCreateInfoEXT();
|
||||
|
||||
void selectPhysicalDevice(vk::Instance &instance, vk::PhysicalDevice &pDevice);
|
||||
void createInstance(vk::Instance &instance, vk::DebugUtilsMessengerEXT &debugUtilsMessenger,
|
||||
std::string appName, std::string engineName);
|
||||
void createLogicalDevice(vk::Instance &instance, vk::PhysicalDevice &pDevice, vk::Device &device);
|
||||
std::tuple<uint32_t, uint32_t> getComputeAndTransferQueues(vk::PhysicalDevice &pDevice);
|
||||
void createCommandPool(vk::Device &device, vk::CommandPool &commandPool, uint32_t queueIndex);
|
||||
void destroyInstance(vk::Instance &instance, vk::DebugUtilsMessengerEXT &debugUtilsMessenger);
|
||||
void destroyLogicalDevice(vk::Device &device);
|
||||
void destroyCommandPool(vk::Device &device, vk::CommandPool &commandPool);
|
||||
|
||||
void createTimestampQueryPool(vk::Device &device, vk::QueryPool &queryPool, uint32_t queryCount);
|
||||
void destroyQueryPool(vk::Device &device, vk::QueryPool &queryPool);
|
||||
|
||||
|
||||
void printDeviceCapabilities(vk::PhysicalDevice &pDevice);
|
||||
|
||||
void initApp(AppResources &app);
|
||||
#endif
|
||||
7
include/renderdoc.h
Normal file
7
include/renderdoc.h
Normal file
@@ -0,0 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
namespace renderdoc {
|
||||
void initialize();
|
||||
void startCapture();
|
||||
void endCapture();
|
||||
}
|
||||
47
include/task_common.h
Normal file
47
include/task_common.h
Normal file
@@ -0,0 +1,47 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
#ifndef EX_TEMPLATE
|
||||
#define EX_TEMPLATE
|
||||
|
||||
namespace Cmn {
|
||||
void createDescriptorSetLayout(vk::Device &device,
|
||||
std::vector<vk::DescriptorSetLayoutBinding> &bindings, vk::DescriptorSetLayout &descLayout);
|
||||
void addStorage(std::vector<vk::DescriptorSetLayoutBinding> &bindings, uint32_t binding);
|
||||
|
||||
void allocateDescriptorSet(vk::Device &device, vk::DescriptorSet &descSet, vk::DescriptorPool &descPool,
|
||||
vk::DescriptorSetLayout &descLayout);
|
||||
void bindBuffers(vk::Device &device, vk::Buffer &b, vk::DescriptorSet &set, uint32_t binding);
|
||||
|
||||
void createDescriptorPool(vk::Device &device,
|
||||
std::vector<vk::DescriptorSetLayoutBinding> &bindings, vk::DescriptorPool &descPool, uint32_t numDescriptors = 1);
|
||||
void createPipeline(vk::Device &device, vk::Pipeline &pipeline,
|
||||
vk::PipelineLayout &pipLayout, vk::SpecializationInfo &specInfo, vk::ShaderModule &sModule);
|
||||
void createShader(vk::Device &device, vk::ShaderModule &shaderModule, const std::string &filename);
|
||||
|
||||
}
|
||||
|
||||
struct TaskResources
|
||||
{
|
||||
//std::vector<Buffer> buffers; move this to user code
|
||||
vk::ShaderModule cShader;
|
||||
|
||||
vk::DescriptorSetLayout descriptorSetLayout;
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
vk::DescriptorSet descriptorSet;
|
||||
vk::DescriptorPool descriptorPool;
|
||||
|
||||
vk::Pipeline pipeline;
|
||||
vk::PipelineLayout pipelineLayout;
|
||||
|
||||
void destroy(vk::Device &device);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
113
include/utils.h
Normal file
113
include/utils.h
Normal file
@@ -0,0 +1,113 @@
|
||||
#ifndef UTILS
|
||||
#define UTILS
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
#define CAST(a) static_cast<uint32_t>(a.size())
|
||||
struct Buffer
|
||||
{
|
||||
vk::Buffer buf;
|
||||
vk::DeviceMemory mem;
|
||||
};
|
||||
|
||||
typedef uint32_t uint;
|
||||
|
||||
template<typename T, typename V>
|
||||
T ceilDiv(T x, V y) {
|
||||
return x / y + (x % y != 0);
|
||||
}
|
||||
|
||||
std::vector<char> readFile(const std::string &filename);
|
||||
std::string formatSize(uint64_t size);
|
||||
uint32_t findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties, vk::PhysicalDevice &pdevice);
|
||||
void createBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||
const vk::DeviceSize &size, vk::BufferUsageFlags usage,
|
||||
vk::MemoryPropertyFlags properties, std::string name, vk::Buffer &buffer, vk::DeviceMemory &bufferMemory);
|
||||
void createBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||
const vk::DeviceSize &size, vk::BufferUsageFlags usage,
|
||||
vk::MemoryPropertyFlags properties, std::string name, Buffer &buffer);
|
||||
void destroyBuffer(vk::Device &device, Buffer &buffer);
|
||||
void copyBuffer(vk::Device &device, vk::Queue &q, vk::CommandPool &commandPool,
|
||||
const vk::Buffer &srcBuffer, vk::Buffer &dstBuffer, vk::DeviceSize byteSize);
|
||||
|
||||
vk::CommandBuffer beginSingleTimeCommands(vk::Device &device, vk::CommandPool &commandPool);
|
||||
void endSingleTimeCommands(vk::Device &device, vk::Queue &q,
|
||||
vk::CommandPool &commandPool, vk::CommandBuffer &commandBuffer);
|
||||
|
||||
Buffer addHostCoherentBuffer(vk::PhysicalDevice &pDevice, vk::Device &device, vk::DeviceSize size, std::string name);
|
||||
Buffer addDeviceOnlyBuffer(vk::PhysicalDevice &pDevice, vk::Device &device, vk::DeviceSize size, std::string name);
|
||||
|
||||
template <typename T>
|
||||
void fillDeviceBuffer(vk::Device &device, vk::DeviceMemory &mem, const std::vector<T> &input)
|
||||
{
|
||||
void *data = device.mapMemory(mem, 0, input.size() * sizeof(T), vk::MemoryMapFlags());
|
||||
memcpy(data, input.data(), static_cast<size_t>(input.size() * sizeof(T)));
|
||||
device.unmapMemory(mem);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillHostBuffer(vk::Device &device, vk::DeviceMemory &mem, std::vector<T> &output)
|
||||
{
|
||||
// copy memory from mem to output
|
||||
void *data = device.mapMemory(mem, 0, output.size() * sizeof(T), vk::MemoryMapFlags());
|
||||
memcpy(output.data(), data, static_cast<size_t>(output.size() * sizeof(T)));
|
||||
device.unmapMemory(mem);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillDeviceWithStagingBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||
vk::CommandPool &commandPool, vk::Queue &q,
|
||||
Buffer &b, const std::vector<T> &data)
|
||||
{
|
||||
// Buffer b requires the eTransferSrc bit
|
||||
// data (host) -> staging (device) -> Buffer b (device)
|
||||
vk::Buffer staging;
|
||||
vk::DeviceMemory mem;
|
||||
vk::DeviceSize byteSize = data.size() * sizeof(T);
|
||||
|
||||
createBuffer(pDevice, device, byteSize, vk::BufferUsageFlagBits::eTransferSrc,
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible, "staging",
|
||||
staging, mem);
|
||||
// V host -> staging V
|
||||
fillDeviceBuffer<T>(device, mem, data);
|
||||
// V staging -> buffer V
|
||||
copyBuffer(device, q, commandPool, staging, b.buf, byteSize);
|
||||
device.destroyBuffer(staging);
|
||||
device.freeMemory(mem);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillHostWithStagingBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||
vk::CommandPool &commandPool, vk::Queue &q,
|
||||
const Buffer &b, std::vector<T> &data)
|
||||
{
|
||||
// Buffer b requires the eTransferDst bit
|
||||
// Buffer b (device) -> staging (device) -> data (host)
|
||||
vk::Buffer staging;
|
||||
vk::DeviceMemory mem;
|
||||
vk::DeviceSize byteSize = data.size() * sizeof(T);
|
||||
|
||||
createBuffer(pDevice, device, byteSize, vk::BufferUsageFlagBits::eTransferDst,
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible, "staging",
|
||||
staging, mem);
|
||||
// V buffer -> staging V
|
||||
copyBuffer(device, q, commandPool, b.buf, staging, byteSize);
|
||||
// V staging -> host V
|
||||
fillHostBuffer<T>(device, mem, data);
|
||||
|
||||
device.destroyBuffer(staging);
|
||||
device.freeMemory(mem);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void setObjectName(vk::Device &device, T handle, std::string name)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
vk::DebugUtilsObjectNameInfoEXT infoEXT(handle.objectType, uint64_t(static_cast<typename T::CType>(handle)), name.c_str());
|
||||
device.setDebugUtilsObjectNameEXT(infoEXT);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
21
shaders/A2Task1Interleaved.comp
Normal file
21
shaders/A2Task1Interleaved.comp
Normal file
@@ -0,0 +1,21 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
uint stride;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inoutBufer {uint v[];};
|
||||
|
||||
void main() {
|
||||
// TODO: Kernel implementation
|
||||
}
|
||||
24
shaders/A2Task1KernelDecomposition.comp
Normal file
24
shaders/A2Task1KernelDecomposition.comp
Normal file
@@ -0,0 +1,24 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
uint offset;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||
layout(binding = 1) buffer outBuffer { uint g_v[]; };
|
||||
|
||||
// TODO: Shared variables
|
||||
|
||||
void main() {
|
||||
// TODO: Kernel implementation
|
||||
}
|
||||
24
shaders/A2Task1KernelDecompositionAtomic.comp
Normal file
24
shaders/A2Task1KernelDecompositionAtomic.comp
Normal file
@@ -0,0 +1,24 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
uint offset;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||
layout(binding = 1) buffer outBuffer { uint g_v[]; };
|
||||
|
||||
// TODO: Shared variables
|
||||
|
||||
void main() {
|
||||
// TODO: Kernel implementation
|
||||
}
|
||||
24
shaders/A2Task1KernelDecompositionUnroll.comp
Normal file
24
shaders/A2Task1KernelDecompositionUnroll.comp
Normal file
@@ -0,0 +1,24 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
uint offset;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||
layout(binding = 1) buffer outBuffer { uint g_v[]; };
|
||||
|
||||
// TODO: Shared variables
|
||||
|
||||
void main() {
|
||||
// TODO: Kernel implementation
|
||||
}
|
||||
21
shaders/A2Task1Sequential.comp
Normal file
21
shaders/A2Task1Sequential.comp
Normal file
@@ -0,0 +1,21 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
uint offset;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inoutBufer { uint v[]; };
|
||||
|
||||
void main() {
|
||||
// TODO: Kernel implementation
|
||||
}
|
||||
52
shaders/A2Task2KernelDecomposition.comp
Normal file
52
shaders/A2Task2KernelDecomposition.comp
Normal file
@@ -0,0 +1,52 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
|
||||
// Why did we not have conflicts in the Reduction?
|
||||
// Because of the sequential addressing (here we use interleaved => we have conflicts).
|
||||
// TODO: tailor to your architecture (these parameter work for virtually all NVIDIA GPUs)
|
||||
#define NUM_BANKS 32
|
||||
#define NUM_BANKS_LOG 5
|
||||
#define SIMD_GROUP_SIZE 32
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inoutBufer {uint array[];};
|
||||
layout(binding = 1) buffer offsetBufer {uint higherLevelArray[];};
|
||||
|
||||
// TODO: Shared variables
|
||||
|
||||
// Bank conflicts
|
||||
#define AVOID_BANK_CONFLICTS
|
||||
#ifdef AVOID_BANK_CONFLICTS
|
||||
// TODO: define your conflict-free macro here
|
||||
#else
|
||||
#define OFFSET(A) (A)
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void main()
|
||||
{
|
||||
// TODO: Kernel implementation
|
||||
|
||||
// Cache first half of elements in the local memory
|
||||
// Cache second half of elements
|
||||
|
||||
// Perform up-sweep
|
||||
|
||||
// Unroll the last steps when arrived at warp size
|
||||
// Set the last element to 0
|
||||
|
||||
|
||||
// Perform down-sweep
|
||||
}
|
||||
25
shaders/A2Task2KernelDecompositionOffset.comp
Normal file
25
shaders/A2Task2KernelDecompositionOffset.comp
Normal file
@@ -0,0 +1,25 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
layout (constant_id = 1) const uint SAMPLE_MULTIPLIER = 1;
|
||||
|
||||
// Push constant
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inoutBufer { uint v[]; };
|
||||
layout(binding = 1) buffer offsetBufer { uint g_v[]; };
|
||||
|
||||
// TODO: Shared variables
|
||||
|
||||
void main() {
|
||||
// TODO: Shared variables
|
||||
}
|
||||
23
shaders/A2Task2Naive.comp
Normal file
23
shaders/A2Task2Naive.comp
Normal file
@@ -0,0 +1,23 @@
|
||||
#version 450
|
||||
|
||||
/* built in:
|
||||
in uvec3 gl_NumWorkGroups;
|
||||
in uvec3 gl_WorkGroupID;
|
||||
in uvec3 gl_LocalInvocationID;
|
||||
in uvec3 gl_GlobalInvocationID;
|
||||
in uint gl_LocalInvocationIndex;
|
||||
*/
|
||||
|
||||
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform PushStruct {
|
||||
uint size;
|
||||
uint offset;
|
||||
} p;
|
||||
|
||||
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||
layout(binding = 1) buffer outBufer { uint g_v[]; };
|
||||
|
||||
void main() {
|
||||
// TODO: Kernel implementation
|
||||
}
|
||||
41
src/A2Task1.cpp
Normal file
41
src/A2Task1.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "A2Task1.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
#include "task_common.h"
|
||||
#include "host_timer.h"
|
||||
|
||||
|
||||
A2Task1::A2Task1(uint problemSize) : input(problemSize, 0) {
|
||||
for (auto i = 0; i < problemSize; i++)
|
||||
input[i] = i % 97;
|
||||
computeReference();
|
||||
}
|
||||
|
||||
A2Task1::A2Task1(std::vector<uint> input) : input(input) {
|
||||
computeReference();
|
||||
}
|
||||
|
||||
bool A2Task1::evaluateSolution(A2Task1Solution& solution) {
|
||||
solution.prepare(input);
|
||||
solution.compute();
|
||||
auto result = solution.result();
|
||||
if (reference != result) {
|
||||
std::cout << "error: expected " << reference << ", but got " << result << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void A2Task1::computeReference() {
|
||||
reference = 0;
|
||||
for (auto e : input)
|
||||
reference += e;
|
||||
}
|
||||
85
src/A2Task1Solution/Interleaved.cpp
Normal file
85
src/A2Task1Solution/Interleaved.cpp
Normal file
@@ -0,0 +1,85 @@
|
||||
#include "Interleaved.h"
|
||||
|
||||
#include "host_timer.h"
|
||||
|
||||
A2Task1SolutionInterleaved::A2Task1SolutionInterleaved(AppResources &app, uint workGroupSize) :
|
||||
app(app), workGroupSize(workGroupSize) {}
|
||||
|
||||
void A2Task1SolutionInterleaved::prepare(const std::vector<uint> &input)
|
||||
{
|
||||
mpInput = &input;
|
||||
|
||||
Cmn::addStorage(bindings, 0);
|
||||
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant));
|
||||
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||
|
||||
// Specialization constant for workgroup size
|
||||
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||
{{0U, 0U, sizeof(workGroupSize)}},
|
||||
};
|
||||
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||
CAST(specValues) * sizeof(int), specValues.data());
|
||||
|
||||
Cmn::createShader(app.device, shaderModule, workingDir +"build/shaders/A2Task1Interleaved.comp.spv");
|
||||
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, shaderModule);
|
||||
|
||||
createBuffer(app.pDevice, app.device, mpInput->size() * sizeof((*mpInput)[0]),
|
||||
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eStorageBuffer,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal, "inoutBuffer", inoutBuffer);
|
||||
|
||||
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, input);
|
||||
|
||||
Cmn::createDescriptorPool(app.device, bindings, descriptorPool);
|
||||
Cmn::allocateDescriptorSet(app.device, descriptorSet, descriptorPool, descriptorSetLayout);
|
||||
Cmn::bindBuffers(app.device, inoutBuffer.buf, descriptorSet, 0);
|
||||
}
|
||||
|
||||
void A2Task1SolutionInterleaved::compute()
|
||||
{
|
||||
vk::CommandBufferAllocateInfo allocInfo(
|
||||
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||
|
||||
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||
|
||||
cb.begin(beginInfo);
|
||||
|
||||
// TODO: Implement reduction with interleaved addressing
|
||||
|
||||
cb.end();
|
||||
|
||||
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||
|
||||
HostTimer timer;
|
||||
|
||||
app.computeQueue.submit({submitInfo});
|
||||
app.device.waitIdle();
|
||||
|
||||
mstime = timer.elapsed() * 1000;
|
||||
|
||||
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||
}
|
||||
|
||||
uint A2Task1SolutionInterleaved::result() const
|
||||
{
|
||||
std::vector<uint> result(1, 0);
|
||||
fillHostWithStagingBuffer<uint>(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, result);
|
||||
return result[0];
|
||||
}
|
||||
|
||||
void A2Task1SolutionInterleaved::cleanup()
|
||||
{
|
||||
app.device.destroyDescriptorPool(descriptorPool);
|
||||
|
||||
app.device.destroyPipeline(pipeline);
|
||||
app.device.destroyShaderModule(shaderModule);
|
||||
|
||||
app.device.destroyPipelineLayout(pipelineLayout);
|
||||
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||
bindings.clear();
|
||||
|
||||
destroyBuffer(app.device, inoutBuffer);
|
||||
}
|
||||
42
src/A2Task1Solution/Interleaved.h
Normal file
42
src/A2Task1Solution/Interleaved.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include "A2Task1.h"
|
||||
|
||||
class A2Task1SolutionInterleaved : public A2Task1Solution{
|
||||
public:
|
||||
A2Task1SolutionInterleaved(AppResources &app, uint workGroupSize);
|
||||
|
||||
void prepare(const std::vector<uint> &input) override;
|
||||
void compute() override;
|
||||
uint result() const override;
|
||||
void cleanup() override;
|
||||
|
||||
private:
|
||||
struct PushConstant
|
||||
{
|
||||
uint size;
|
||||
uint stride;
|
||||
};
|
||||
|
||||
AppResources &app;
|
||||
uint workGroupSize;
|
||||
|
||||
const std::vector<uint>* mpInput;
|
||||
|
||||
Buffer inoutBuffer;
|
||||
|
||||
// Descriptor & Pipeline Layout
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
vk::DescriptorSetLayout descriptorSetLayout;
|
||||
vk::PipelineLayout pipelineLayout;
|
||||
|
||||
// Local PPS Pipeline
|
||||
vk::ShaderModule shaderModule;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
// Descriptor Pool
|
||||
vk::DescriptorPool descriptorPool;
|
||||
|
||||
// Per-dispatch data
|
||||
vk::DescriptorSet descriptorSet;
|
||||
};
|
||||
97
src/A2Task1Solution/KernelDecomposition.cpp
Normal file
97
src/A2Task1Solution/KernelDecomposition.cpp
Normal file
@@ -0,0 +1,97 @@
|
||||
#include "KernelDecomposition.h"
|
||||
|
||||
#include "host_timer.h"
|
||||
|
||||
A2Task1SolutionKernelDecomposition::A2Task1SolutionKernelDecomposition(AppResources &app, uint workGroupSize, std::string shaderFileName) :
|
||||
app(app), workGroupSize(workGroupSize), shaderFileName(shaderFileName) {}
|
||||
|
||||
void A2Task1SolutionKernelDecomposition::prepare(const std::vector<uint> &input)
|
||||
{
|
||||
mpInput = &input;
|
||||
|
||||
Cmn::addStorage(bindings, 0);
|
||||
Cmn::addStorage(bindings, 1);
|
||||
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant));
|
||||
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||
|
||||
// Specialization constant for workgroup size
|
||||
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||
{{0U, 0U, sizeof(workGroupSize)}},
|
||||
};
|
||||
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||
CAST(specValues) * sizeof(int), specValues.data());
|
||||
|
||||
Cmn::createShader(app.device, shaderModule, shaderFileName);
|
||||
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, shaderModule);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
createBuffer(app.pDevice, app.device, mpInput->size() * sizeof((*mpInput)[0]),
|
||||
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eStorageBuffer,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal, "buffer_" + std::to_string(i), buffers[i].buf, buffers[i].mem);
|
||||
}
|
||||
|
||||
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[0], input);
|
||||
|
||||
Cmn::createDescriptorPool(app.device, bindings, descriptorPool, 2);
|
||||
for (int i = 0; i < 2; i++)
|
||||
Cmn::allocateDescriptorSet(app.device, descriptorSets[i], descriptorPool, descriptorSetLayout);
|
||||
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[0], 0);
|
||||
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[0], 1);
|
||||
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[1], 0);
|
||||
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[1], 1);
|
||||
}
|
||||
|
||||
void A2Task1SolutionKernelDecomposition::compute()
|
||||
{
|
||||
vk::CommandBufferAllocateInfo allocInfo(
|
||||
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||
|
||||
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||
|
||||
cb.begin(beginInfo);
|
||||
|
||||
// TODO: Implement reduction with kernel decomposition
|
||||
// NOTE: make sure that activeBuffer points to the buffer with the final result in the end
|
||||
// That buffer is read back for the correctness check
|
||||
// (A2Task1SolutionKernelDecomposition::result())
|
||||
// HINT: You can alternate between the two provided descriptor sets to implement ping-pong
|
||||
|
||||
cb.end();
|
||||
|
||||
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||
|
||||
HostTimer timer;
|
||||
|
||||
app.computeQueue.submit({submitInfo});
|
||||
app.device.waitIdle();
|
||||
|
||||
mstime = timer.elapsed() * 1000;
|
||||
|
||||
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||
}
|
||||
|
||||
uint A2Task1SolutionKernelDecomposition::result() const
|
||||
{
|
||||
std::vector<uint> result(1, 0);
|
||||
fillHostWithStagingBuffer<uint>(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[activeBuffer], result);
|
||||
return result[0];
|
||||
}
|
||||
|
||||
void A2Task1SolutionKernelDecomposition::cleanup()
|
||||
{
|
||||
app.device.destroyDescriptorPool(descriptorPool);
|
||||
|
||||
app.device.destroyPipeline(pipeline);
|
||||
app.device.destroyShaderModule(shaderModule);
|
||||
|
||||
app.device.destroyPipelineLayout(pipelineLayout);
|
||||
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||
bindings.clear();
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
destroyBuffer(app.device, buffers[i]);
|
||||
}
|
||||
44
src/A2Task1Solution/KernelDecomposition.h
Normal file
44
src/A2Task1Solution/KernelDecomposition.h
Normal file
@@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include "A2Task1.h"
|
||||
|
||||
class A2Task1SolutionKernelDecomposition : public A2Task1Solution{
|
||||
public:
|
||||
A2Task1SolutionKernelDecomposition(AppResources &app, uint workGroupSize, std::string shaderFileName);
|
||||
|
||||
void prepare(const std::vector<uint> &input) override;
|
||||
void compute() override;
|
||||
uint result() const override;
|
||||
void cleanup() override;
|
||||
|
||||
private:
|
||||
struct PushConstant
|
||||
{
|
||||
uint size;
|
||||
};
|
||||
|
||||
AppResources &app;
|
||||
uint workGroupSize;
|
||||
std::string shaderFileName;
|
||||
|
||||
const std::vector<uint>* mpInput;
|
||||
|
||||
Buffer buffers[2];
|
||||
|
||||
// Descriptor & Pipeline Layout
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
vk::DescriptorSetLayout descriptorSetLayout;
|
||||
vk::PipelineLayout pipelineLayout;
|
||||
|
||||
// Local PPS Pipeline
|
||||
vk::ShaderModule shaderModule;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
// Descriptor Pool
|
||||
vk::DescriptorPool descriptorPool;
|
||||
|
||||
// Per-dispatch data
|
||||
vk::DescriptorSet descriptorSets[2];
|
||||
|
||||
uint activeBuffer = 0;
|
||||
};
|
||||
90
src/A2Task1Solution/Sequential.cpp
Normal file
90
src/A2Task1Solution/Sequential.cpp
Normal file
@@ -0,0 +1,90 @@
|
||||
#include "Sequential.h"
|
||||
|
||||
#include "host_timer.h"
|
||||
|
||||
A2Task1SolutionSequential::A2Task1SolutionSequential(AppResources &app, uint workGroupSize) :
|
||||
app(app), workGroupSize(workGroupSize) {}
|
||||
|
||||
void A2Task1SolutionSequential::prepare(const std::vector<uint> &input)
|
||||
{
|
||||
mpInput = &input;
|
||||
|
||||
Cmn::addStorage(bindings, 0);
|
||||
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant));
|
||||
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||
|
||||
// Specialization constant for workgroup size
|
||||
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||
{{0U, 0U, sizeof(workGroupSize)}},
|
||||
};
|
||||
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||
CAST(specValues) * sizeof(int), specValues.data());
|
||||
|
||||
Cmn::createShader(app.device, shaderModule, workingDir +"build/shaders/A2Task1Sequential.comp.spv");
|
||||
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, shaderModule);
|
||||
|
||||
createBuffer(app.pDevice, app.device, mpInput->size() * sizeof((*mpInput)[0]),
|
||||
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eStorageBuffer,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal, "inoutBuffer", inoutBuffer.buf, inoutBuffer.mem);
|
||||
|
||||
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, input);
|
||||
|
||||
Cmn::createDescriptorPool(app.device, bindings, descriptorPool);
|
||||
Cmn::allocateDescriptorSet(app.device, descriptorSet, descriptorPool, descriptorSetLayout);
|
||||
Cmn::bindBuffers(app.device, inoutBuffer.buf, descriptorSet, 0);
|
||||
}
|
||||
|
||||
void A2Task1SolutionSequential::compute()
|
||||
{
|
||||
vk::CommandBufferAllocateInfo allocInfo(
|
||||
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||
|
||||
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||
|
||||
cb.begin(beginInfo);
|
||||
|
||||
// TODO: Implement reduction with sequential addressing
|
||||
|
||||
cb.end();
|
||||
|
||||
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||
|
||||
HostTimer timer;
|
||||
|
||||
app.computeQueue.submit({submitInfo});
|
||||
app.device.waitIdle();
|
||||
|
||||
mstime = timer.elapsed() * 1000;
|
||||
|
||||
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||
}
|
||||
|
||||
uint A2Task1SolutionSequential::result() const
|
||||
{
|
||||
std::vector<uint> result(1, 0);
|
||||
fillHostWithStagingBuffer<uint>(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, result);
|
||||
return result[0];
|
||||
}
|
||||
|
||||
void A2Task1SolutionSequential::cleanup()
|
||||
{
|
||||
app.device.destroyDescriptorPool(descriptorPool);
|
||||
|
||||
app.device.destroyPipeline(pipeline);
|
||||
app.device.destroyShaderModule(shaderModule);
|
||||
|
||||
app.device.destroyPipelineLayout(pipelineLayout);
|
||||
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||
bindings.clear();
|
||||
|
||||
auto Bclean = [&](Buffer &b){
|
||||
app.device.destroyBuffer(b.buf);
|
||||
app.device.freeMemory(b.mem);
|
||||
};
|
||||
|
||||
Bclean(inoutBuffer);
|
||||
}
|
||||
42
src/A2Task1Solution/Sequential.h
Normal file
42
src/A2Task1Solution/Sequential.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include "A2Task1.h"
|
||||
|
||||
class A2Task1SolutionSequential : public A2Task1Solution{
|
||||
public:
|
||||
A2Task1SolutionSequential(AppResources &app, uint workGroupSize);
|
||||
|
||||
void prepare(const std::vector<uint> &input) override;
|
||||
void compute() override;
|
||||
uint result() const override;
|
||||
void cleanup() override;
|
||||
|
||||
private:
|
||||
struct PushConstant
|
||||
{
|
||||
uint size;
|
||||
uint offset;
|
||||
};
|
||||
|
||||
AppResources &app;
|
||||
uint workGroupSize;
|
||||
|
||||
const std::vector<uint>* mpInput;
|
||||
|
||||
Buffer inoutBuffer;
|
||||
|
||||
// Descriptor & Pipeline Layout
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
vk::DescriptorSetLayout descriptorSetLayout;
|
||||
vk::PipelineLayout pipelineLayout;
|
||||
|
||||
// Local PPS Pipeline
|
||||
vk::ShaderModule shaderModule;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
// Descriptor Pool
|
||||
vk::DescriptorPool descriptorPool;
|
||||
|
||||
// Per-dispatch data
|
||||
vk::DescriptorSet descriptorSet;
|
||||
};
|
||||
42
src/A2Task2.cpp
Normal file
42
src/A2Task2.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
#include "A2Task2.h"
|
||||
|
||||
A2Task2::A2Task2(uint problemSize) : input(problemSize, 0) {
|
||||
for (auto i = 0; i < problemSize; i++)
|
||||
input[i] = i % 97;
|
||||
computeReference();
|
||||
}
|
||||
|
||||
A2Task2::A2Task2(std::vector<uint> input) : input(input) {
|
||||
computeReference();
|
||||
}
|
||||
|
||||
void A2Task2::computeReference() {
|
||||
reference.reserve(input.size());
|
||||
uint acc = 0;
|
||||
for (auto i = 0; i < input.size(); i++) {
|
||||
acc += input[i];
|
||||
reference.push_back(acc);
|
||||
}
|
||||
}
|
||||
|
||||
bool A2Task2::evaluateSolution(A2Task2Solution& solution) {
|
||||
solution.prepare(input);
|
||||
solution.compute();
|
||||
auto result = solution.result();
|
||||
|
||||
if (result.size() != reference.size()) {
|
||||
std::cout << "error: result and reference vector size don't match!";
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint i = 0; i < reference.size(); i++) {
|
||||
if (result[i] != reference[i]) {
|
||||
std::cout << "error: result and reference don't match at index " << i << "!" << std::endl;
|
||||
std::cout << "\tresult: " << result[i] << std::endl;
|
||||
std::cout << "\treference: " << reference[i] << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
114
src/A2Task2Solution/KernelDecomposition.cpp
Normal file
114
src/A2Task2Solution/KernelDecomposition.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
#include "KernelDecomposition.h"
|
||||
|
||||
#include "host_timer.h"
|
||||
|
||||
A2Task2SolutionKernelDecomposition::A2Task2SolutionKernelDecomposition(AppResources& app, uint workGroupSize): app(app),
|
||||
workGroupSize(workGroupSize) {
|
||||
}
|
||||
|
||||
void A2Task2SolutionKernelDecomposition::prepare(const std::vector<uint>& input) {
|
||||
workSize = input.size();
|
||||
|
||||
// Descriptor & Pipeline Layout
|
||||
Cmn::addStorage(bindings, 0);
|
||||
Cmn::addStorage(bindings, 1);
|
||||
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushStruct));
|
||||
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||
|
||||
// Specialization constant for workgroup size
|
||||
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||
{{0U, 0U, sizeof(workGroupSize)}},
|
||||
};
|
||||
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||
CAST(specValues) * sizeof(int), specValues.data());
|
||||
|
||||
// Local PPS Pipeline
|
||||
Cmn::createShader(app.device, cShaderLocalPPS, workingDir + "build/shaders/A2Task2KernelDecomposition.comp.spv");
|
||||
Cmn::createPipeline(app.device, pipelineLocalPPS, pipelineLayout, specInfo, cShaderLocalPPS);
|
||||
|
||||
// Local PPS Offset Pipeline
|
||||
Cmn::createShader(app.device, cShaderLocalPPSOffset,
|
||||
workingDir + "build/shaders/A2Task2KernelDecompositionOffset.comp.spv");
|
||||
Cmn::createPipeline(app.device, pipelineLocalPPSOffset, pipelineLayout, specInfo, cShaderLocalPPSOffset);
|
||||
|
||||
// ### create buffers, get their index in the task.buffers[] array ###
|
||||
using BFlag = vk::BufferUsageFlagBits;
|
||||
auto makeDLocalBuffer = [ this ](vk::BufferUsageFlags usage, vk::DeviceSize size, std::string name) -> Buffer {
|
||||
Buffer b;
|
||||
createBuffer(app.pDevice, app.device, size, usage, vk::MemoryPropertyFlagBits::eDeviceLocal, name, b.buf,
|
||||
b.mem);
|
||||
return b;
|
||||
};
|
||||
|
||||
inoutBuffers.push_back(makeDLocalBuffer(BFlag::eTransferDst | BFlag::eTransferSrc | BFlag::eStorageBuffer,
|
||||
input.size() * sizeof(uint32_t), "buffer_inout_0"));
|
||||
|
||||
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffers[0],
|
||||
input);
|
||||
|
||||
// TO DO create additional buffers (by pushing into inoutBuffers) and descriptors (by pushing into descriptorSets)
|
||||
// You need to create an appropriately-sized DescriptorPool first
|
||||
}
|
||||
|
||||
void A2Task2SolutionKernelDecomposition::compute() {
|
||||
vk::CommandBufferAllocateInfo allocInfo(
|
||||
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||
vk::CommandBuffer cb = app.device.allocateCommandBuffers(allocInfo)[0];
|
||||
|
||||
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||
|
||||
cb.begin(beginInfo);
|
||||
|
||||
// TODO: Implement efficient version of scan
|
||||
// Make sure that the local prefix sum works before you start experimenting with large arrays
|
||||
|
||||
cb.end();
|
||||
|
||||
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||
|
||||
HostTimer timer;
|
||||
|
||||
app.computeQueue.submit({submitInfo});
|
||||
app.device.waitIdle();
|
||||
|
||||
mstime = timer.elapsed() * 1000;
|
||||
|
||||
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||
}
|
||||
|
||||
std::vector<uint> A2Task2SolutionKernelDecomposition::result() const {
|
||||
std::vector<uint> result(workSize, 0);
|
||||
fillHostWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffers[0],
|
||||
result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void A2Task2SolutionKernelDecomposition::cleanup() {
|
||||
|
||||
app.device.destroyDescriptorPool(descriptorPool);
|
||||
|
||||
app.device.destroyPipeline(pipelineLocalPPSOffset);
|
||||
app.device.destroyShaderModule(cShaderLocalPPSOffset);
|
||||
|
||||
app.device.destroyPipeline(pipelineLocalPPS);
|
||||
app.device.destroyShaderModule(cShaderLocalPPS);
|
||||
|
||||
app.device.destroyPipelineLayout(pipelineLayout);
|
||||
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||
bindings.clear();
|
||||
|
||||
auto Bclean = [&](Buffer& b) {
|
||||
app.device.destroyBuffer(b.buf);
|
||||
app.device.freeMemory(b.mem);
|
||||
};
|
||||
|
||||
for (auto inoutBuffer: inoutBuffers) {
|
||||
Bclean(inoutBuffer);
|
||||
}
|
||||
|
||||
inoutBuffers.clear();
|
||||
}
|
||||
55
src/A2Task2Solution/KernelDecomposition.h
Normal file
55
src/A2Task2Solution/KernelDecomposition.h
Normal file
@@ -0,0 +1,55 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
#include "task_common.h"
|
||||
|
||||
#include "A2Task2.h"
|
||||
|
||||
struct A2Task2SolutionKernelDecomposition : A2Task2Solution {
|
||||
public:
|
||||
A2Task2SolutionKernelDecomposition(AppResources &app, uint workGroupSize);
|
||||
|
||||
void prepare(const std::vector<uint> &input) override;
|
||||
void compute() override;
|
||||
std::vector<uint> result() const override;
|
||||
void cleanup() override;
|
||||
|
||||
private:
|
||||
struct PushStruct
|
||||
{
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
AppResources &app;
|
||||
uint workGroupSize;
|
||||
std::string localPPSShaderFileName;
|
||||
|
||||
uint workSize;
|
||||
|
||||
std::vector<Buffer> inoutBuffers;
|
||||
|
||||
// Descriptor & Pipeline Layout
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
vk::DescriptorSetLayout descriptorSetLayout;
|
||||
vk::PipelineLayout pipelineLayout;
|
||||
|
||||
// Local PPS Pipeline
|
||||
vk::ShaderModule cShaderLocalPPS;
|
||||
vk::Pipeline pipelineLocalPPS;
|
||||
|
||||
// Local PPS Offset Pipeline
|
||||
vk::ShaderModule cShaderLocalPPSOffset;
|
||||
vk::Pipeline pipelineLocalPPSOffset;
|
||||
|
||||
// Descriptor Pool
|
||||
vk::DescriptorPool descriptorPool;
|
||||
|
||||
// TODO extend with any additional members you may need
|
||||
};
|
||||
|
||||
100
src/A2Task2Solution/Naive.cpp
Normal file
100
src/A2Task2Solution/Naive.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
#include "Naive.h"
|
||||
|
||||
#include "host_timer.h"
|
||||
|
||||
A2Task2SolutioNaive::A2Task2SolutioNaive(
|
||||
AppResources &app, uint workGroupSize):
|
||||
app(app), workGroupSize(workGroupSize) {}
|
||||
|
||||
void A2Task2SolutioNaive::prepare(const std::vector<uint> &input) {
|
||||
workSize = input.size();
|
||||
|
||||
// Descriptor & Pipeline Layout
|
||||
Cmn::addStorage(bindings, 0);
|
||||
Cmn::addStorage(bindings, 1);
|
||||
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushStruct));
|
||||
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||
|
||||
// Specialization constant for workgroup size
|
||||
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||
{{0U, 0U, sizeof(workGroupSize)}},
|
||||
};
|
||||
std::array<uint32_t, 2> specValues = {workGroupSize}; //for workgroup sizes
|
||||
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||
CAST(specValues) * sizeof(int), specValues.data());
|
||||
|
||||
// Local PPS Offset Pipeline
|
||||
Cmn::createShader(app.device, cShader, workingDir +"build/shaders/A2Task2Naive.comp.spv");
|
||||
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, cShader);
|
||||
|
||||
// ### create buffers, get their index in the task.buffers[] array ###
|
||||
using BFlag = vk::BufferUsageFlagBits;
|
||||
for (int i = 0; i < 2; i++)
|
||||
createBuffer(app.pDevice, app.device, input.size() * sizeof(uint32_t), BFlag::eTransferDst | BFlag::eTransferSrc | BFlag::eStorageBuffer, vk::MemoryPropertyFlagBits::eDeviceLocal, "buffer_" + std::to_string(i), buffers[i]);
|
||||
|
||||
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[0], input);
|
||||
|
||||
Cmn::createDescriptorPool(app.device, bindings, descriptorPool, 2);
|
||||
|
||||
for (uint i = 0; i < 2; i++)
|
||||
Cmn::allocateDescriptorSet(app.device, descriptorSets[i], descriptorPool, descriptorSetLayout);
|
||||
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[0], 0);
|
||||
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[0], 1);
|
||||
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[1], 0);
|
||||
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[1], 1);
|
||||
|
||||
activeBuffer = 0;
|
||||
}
|
||||
|
||||
void A2Task2SolutioNaive::compute() {
|
||||
vk::CommandBufferAllocateInfo allocInfo(
|
||||
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||
|
||||
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||
|
||||
cb.begin(beginInfo);
|
||||
|
||||
cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline);
|
||||
|
||||
// TODO: Implement naive scan
|
||||
// NOTE: make sure that activeBuffer points to the buffer with the final result in the end
|
||||
// That buffer is read back for the correctness check
|
||||
// (A2Task2SolutionNaive::result())
|
||||
// HINT: You can alternate between the two provided descriptor sets to implement ping-pong
|
||||
|
||||
cb.end();
|
||||
|
||||
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||
|
||||
HostTimer timer;
|
||||
|
||||
app.computeQueue.submit({submitInfo});
|
||||
app.device.waitIdle();
|
||||
|
||||
mstime = timer.elapsed() * 1000;
|
||||
|
||||
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||
}
|
||||
|
||||
std::vector<uint> A2Task2SolutioNaive::result() const {
|
||||
std::vector<uint> result(workSize, 0);
|
||||
fillHostWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[activeBuffer], result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void A2Task2SolutioNaive::cleanup() {
|
||||
app.device.destroyDescriptorPool(descriptorPool);
|
||||
|
||||
app.device.destroyPipeline(pipeline);
|
||||
app.device.destroyShaderModule(cShader);
|
||||
|
||||
app.device.destroyPipelineLayout(pipelineLayout);
|
||||
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||
bindings.clear();
|
||||
|
||||
for (auto buffer : buffers)
|
||||
destroyBuffer(app.device, buffer);
|
||||
}
|
||||
53
src/A2Task2Solution/Naive.h
Normal file
53
src/A2Task2Solution/Naive.h
Normal file
@@ -0,0 +1,53 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
#include "task_common.h"
|
||||
|
||||
#include "A2Task2.h"
|
||||
|
||||
struct A2Task2SolutioNaive : A2Task2Solution {
|
||||
public:
|
||||
A2Task2SolutioNaive(AppResources &app, uint workGroupSize);
|
||||
|
||||
void prepare(const std::vector<uint> &input) override;
|
||||
void compute() override;
|
||||
std::vector<uint> result() const override;
|
||||
void cleanup() override;
|
||||
|
||||
private:
|
||||
struct PushStruct
|
||||
{
|
||||
uint size;
|
||||
uint offset;
|
||||
};
|
||||
|
||||
AppResources &app;
|
||||
uint workGroupSize;
|
||||
|
||||
uint workSize;
|
||||
|
||||
Buffer buffers[2];
|
||||
|
||||
// Descriptor & Pipeline Layout
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
vk::DescriptorSetLayout descriptorSetLayout;
|
||||
vk::PipelineLayout pipelineLayout;
|
||||
|
||||
vk::ShaderModule cShader;
|
||||
vk::Pipeline pipeline;
|
||||
|
||||
// Descriptor Pool
|
||||
vk::DescriptorPool descriptorPool;
|
||||
|
||||
// Descriptors
|
||||
vk::DescriptorSet descriptorSets[2];
|
||||
|
||||
uint activeBuffer = 0;
|
||||
};
|
||||
|
||||
15
src/host_timer.cpp
Normal file
15
src/host_timer.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
#include "host_timer.h"
|
||||
|
||||
HostTimer::HostTimer() {
|
||||
reset();
|
||||
}
|
||||
|
||||
void HostTimer::reset() {
|
||||
start = clock::now();
|
||||
}
|
||||
|
||||
double HostTimer::elapsed() const {
|
||||
auto end = clock::now();
|
||||
std::chrono::duration<double> duration = end - start;
|
||||
return duration.count();
|
||||
}
|
||||
518
src/initialization.cpp
Normal file
518
src/initialization.cpp
Normal file
@@ -0,0 +1,518 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
|
||||
#define VK_ENABLE_BETA_EXTENSIONS
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
||||
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
// Here you create the instance and physical / logical device and maybe compute/transfer queues
|
||||
// Also check if device is suitable etc
|
||||
|
||||
struct DeviceSelectionCache {
|
||||
uint32_t vendorID;
|
||||
uint32_t deviceID;
|
||||
};
|
||||
|
||||
#ifdef NDEBUG
|
||||
const bool enableValidationLayers = false;
|
||||
#else
|
||||
const bool enableValidationLayers = true;
|
||||
#endif
|
||||
|
||||
const std::vector<const char*> validationLayers = {
|
||||
#ifndef NDEBUG
|
||||
"VK_LAYER_KHRONOS_validation"
|
||||
#endif
|
||||
};
|
||||
const std::vector<const char*> instanceExtensions = {
|
||||
#ifndef NDEBUG
|
||||
VK_EXT_DEBUG_UTILS_EXTENSION_NAME,
|
||||
#endif
|
||||
};
|
||||
|
||||
const std::vector<const char*> extensionNames = {
|
||||
#ifndef NDEBUG
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
void AppResources::destroy()
|
||||
{
|
||||
this->device.destroyQueryPool(this->queryPool);
|
||||
//this->device.freeCommandBuffers(this->computeCommandPool, 1U, &this->computeCommandBuffer);
|
||||
//this->device.freeCommandBuffers(this->transferCommandPool, 1U, &this->transferCommandBuffer);
|
||||
this->device.destroyCommandPool(this->computeCommandPool);
|
||||
//this->device.destroyCommandPool(this->transferCommandPool);
|
||||
|
||||
this->device.destroy();
|
||||
|
||||
#ifndef NDEBUG
|
||||
this->instance.destroyDebugUtilsMessengerEXT(this->dbgUtilsMgr);
|
||||
#endif
|
||||
this->instance.destroy();
|
||||
}
|
||||
void initApp(AppResources& app)
|
||||
{
|
||||
createInstance(app.instance, app.dbgUtilsMgr, "Assignment1, Task 1", "Idkwhattowrite");
|
||||
|
||||
selectPhysicalDevice(app.instance, app.pDevice);
|
||||
auto chain = app.pDevice.getProperties2<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceSubgroupProperties>();
|
||||
app.pDeviceProperties = chain.get<vk::PhysicalDeviceProperties2>();
|
||||
app.pDeviceSubgroupProperties = chain.get<vk::PhysicalDeviceSubgroupProperties>();
|
||||
std::tie(app.cQ, app.tQ) = getComputeAndTransferQueues(app.pDevice);
|
||||
createLogicalDevice(app.instance, app.pDevice, app.device);
|
||||
|
||||
app.device.getQueue(app.cQ, 0U, &app.computeQueue);
|
||||
app.transferQueue = app.computeQueue;
|
||||
app.tQ = app.cQ;
|
||||
//app.device.getQueue(app.tQ, 0U, &app.transferQueue);
|
||||
//createCommandPool(app.device, app.transferCommandPool, app.tQ);
|
||||
|
||||
createCommandPool(app.device, app.computeCommandPool, app.cQ);
|
||||
app.transferCommandPool = app.computeCommandPool;
|
||||
|
||||
createTimestampQueryPool(app.device, app.queryPool, 2);
|
||||
}
|
||||
|
||||
|
||||
//This is the function in which errors will go through to be displayed.
|
||||
|
||||
VKAPI_ATTR VkBool32 VKAPI_CALL
|
||||
debugUtilsMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
||||
VkDebugUtilsMessageTypeFlagsEXT messageTypes,
|
||||
VkDebugUtilsMessengerCallbackDataEXT const* pCallbackData,
|
||||
void* /*pUserData*/)
|
||||
{
|
||||
if (enableValidationLayers)
|
||||
{
|
||||
if (pCallbackData->messageIdNumber == 648835635)
|
||||
{
|
||||
// UNASSIGNED-khronos-Validation-debug-build-warning-message
|
||||
return VK_FALSE;
|
||||
}
|
||||
if (pCallbackData->messageIdNumber == 767975156)
|
||||
{
|
||||
// UNASSIGNED-BestPractices-vkCreateInstance-specialuse-extension
|
||||
return VK_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
std::cerr << vk::to_string(static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(messageSeverity)) << ": "
|
||||
<< vk::to_string(static_cast<vk::DebugUtilsMessageTypeFlagsEXT>(messageTypes)) << ":\n";
|
||||
std::cerr << "\t"
|
||||
<< "messageIDName = <" << pCallbackData->pMessageIdName << ">\n";
|
||||
std::cerr << "\t"
|
||||
<< "messageIdNumber = " << pCallbackData->messageIdNumber << "\n";
|
||||
std::cerr << "\t"
|
||||
<< "message = <" << pCallbackData->pMessage << ">\n";
|
||||
if (0 < pCallbackData->queueLabelCount)
|
||||
{
|
||||
std::cerr << "\t"
|
||||
<< "Queue Labels:\n";
|
||||
for (uint8_t i = 0; i < pCallbackData->queueLabelCount; i++)
|
||||
{
|
||||
std::cerr << "\t\t"
|
||||
<< "labelName = <" << pCallbackData->pQueueLabels[i].pLabelName << ">\n";
|
||||
}
|
||||
}
|
||||
if (0 < pCallbackData->cmdBufLabelCount)
|
||||
{
|
||||
std::cerr << "\t"
|
||||
<< "CommandBuffer Labels:\n";
|
||||
for (uint8_t i = 0; i < pCallbackData->cmdBufLabelCount; i++)
|
||||
{
|
||||
std::cerr << "\t\t"
|
||||
<< "labelName = <" << pCallbackData->pCmdBufLabels[i].pLabelName << ">\n";
|
||||
}
|
||||
}
|
||||
if (0 < pCallbackData->objectCount)
|
||||
{
|
||||
std::cerr << "\t"
|
||||
<< "Objects:\n";
|
||||
for (uint8_t i = 0; i < pCallbackData->objectCount; i++)
|
||||
{
|
||||
std::cerr << "\t\t"
|
||||
<< "Object " << i << "\n";
|
||||
std::cerr << "\t\t\t"
|
||||
<< "objectType = "
|
||||
<< vk::to_string(static_cast<vk::ObjectType>(pCallbackData->pObjects[i].objectType)) << "\n";
|
||||
std::cerr << "\t\t\t"
|
||||
<< "objectHandle = " << pCallbackData->pObjects[i].objectHandle << "\n";
|
||||
if (pCallbackData->pObjects[i].pObjectName)
|
||||
{
|
||||
std::cerr << "\t\t\t"
|
||||
<< "objectName = <" << pCallbackData->pObjects[i].pObjectName << ">\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
return VK_TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
This function fills the structure with flags indicating
|
||||
which error messages should go through
|
||||
*/
|
||||
vk::DebugUtilsMessengerCreateInfoEXT makeDebugUtilsMessengerCreateInfoEXT()
|
||||
{
|
||||
|
||||
using SEVERITY = vk::DebugUtilsMessageSeverityFlagBitsEXT; // for readability
|
||||
using MESSAGE = vk::DebugUtilsMessageTypeFlagBitsEXT;
|
||||
return { {},
|
||||
SEVERITY::eWarning | SEVERITY::eError,
|
||||
MESSAGE::eGeneral | MESSAGE::ePerformance | MESSAGE::eValidation,
|
||||
&debugUtilsMessengerCallback };
|
||||
}
|
||||
|
||||
/*
|
||||
The dynamic loader allows us to access many extensions
|
||||
Required before creating instance for loading the extension VK_EXT_DEBUG_UTILS_EXTENSION_NAME
|
||||
*/
|
||||
void initDynamicLoader()
|
||||
{
|
||||
#if VK_HEADER_VERSION >= 301
|
||||
using VulkanDynamicLoader = vk::detail::DynamicLoader;
|
||||
#else
|
||||
using VulkanDynamicLoader = vk::DynamicLoader;
|
||||
#endif
|
||||
static VulkanDynamicLoader dl;
|
||||
static PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
|
||||
}
|
||||
|
||||
|
||||
void createInstance(vk::Instance& instance, vk::DebugUtilsMessengerEXT& debugUtilsMessenger,
|
||||
std::string appName, std::string engineName)
|
||||
{
|
||||
initDynamicLoader();
|
||||
vk::ApplicationInfo applicationInfo(appName.c_str(), 1, engineName.c_str(), 1, VK_API_VERSION_1_2);
|
||||
|
||||
//Initialize the InstanceCreateInfo
|
||||
vk::InstanceCreateInfo instanceCreateInfo( //flags, pAppInfo, layerCount, layerNames, extcount, extNames
|
||||
{}, &applicationInfo,
|
||||
static_cast<uint32_t>(validationLayers.size()), validationLayers.data(),
|
||||
static_cast<uint32_t>(instanceExtensions.size()), instanceExtensions.data());
|
||||
|
||||
// DebugInfo: use of StructureChain instead of pNext
|
||||
// DebugUtils is used to catch errors from the instance
|
||||
vk::DebugUtilsMessengerCreateInfoEXT debugCreateInfo = makeDebugUtilsMessengerCreateInfoEXT();
|
||||
// The StructureChain fills the pNext member of the struct in a typesafe way
|
||||
// This is only possible with vulkan-hpp, in plain vulkan there is no typechecking
|
||||
vk::StructureChain<vk::InstanceCreateInfo, vk::DebugUtilsMessengerCreateInfoEXT> chain =
|
||||
{ instanceCreateInfo, debugCreateInfo };
|
||||
|
||||
if (!enableValidationLayers) //For Release mode
|
||||
chain.unlink<vk::DebugUtilsMessengerCreateInfoEXT>();
|
||||
|
||||
// Create an Instance
|
||||
instance = vk::createInstance(chain.get<vk::InstanceCreateInfo>());
|
||||
|
||||
// Update the dispatcher to use instance related extensions
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance);
|
||||
|
||||
if (enableValidationLayers)
|
||||
debugUtilsMessenger = instance.createDebugUtilsMessengerEXT(makeDebugUtilsMessengerCreateInfoEXT());
|
||||
}
|
||||
|
||||
|
||||
std::tuple<uint32_t, uint32_t> getComputeAndTransferQueues(vk::PhysicalDevice& pDevice)
|
||||
{
|
||||
uint32_t tq = -1;
|
||||
std::optional<uint32_t> otq;
|
||||
uint32_t cq = -1;
|
||||
std::optional<uint32_t> ocq;
|
||||
|
||||
using Chain = vk::StructureChain<vk::QueueFamilyProperties2, vk::QueueFamilyCheckpointPropertiesNV>;
|
||||
using QFB = vk::QueueFlagBits;
|
||||
#if VK_HEADER_VERSION >= 301
|
||||
using VulkanDispatchLoaderDynamic = vk::detail::DispatchLoaderDynamic;
|
||||
#else
|
||||
using VulkanDispatchLoaderDynamic = vk::DispatchLoaderDynamic;
|
||||
#endif
|
||||
auto queueFamilyProperties2 = pDevice.getQueueFamilyProperties2<Chain, std::allocator<Chain>, VulkanDispatchLoaderDynamic>();
|
||||
|
||||
for (uint32_t j = 0; j < queueFamilyProperties2.size(); j++)
|
||||
{
|
||||
vk::QueueFamilyProperties const& properties =
|
||||
queueFamilyProperties2[static_cast<size_t>(j)].get<vk::QueueFamilyProperties2>().queueFamilyProperties;
|
||||
|
||||
if (properties.queueFlags & QFB::eCompute)
|
||||
{
|
||||
if (!(properties.queueFlags & QFB::eGraphics ||
|
||||
properties.queueFlags & QFB::eProtected))
|
||||
ocq = j; // When a queue supports only compute and not graphics we want to use that
|
||||
cq = j;
|
||||
}
|
||||
|
||||
if (properties.queueFlags & QFB::eTransfer)
|
||||
{
|
||||
if (!(properties.queueFlags & QFB::eCompute ||
|
||||
properties.queueFlags & QFB::eGraphics ||
|
||||
properties.queueFlags & QFB::eProtected))
|
||||
otq = j; // When a queue supports only transfer, we want to use this one
|
||||
tq = j;
|
||||
}
|
||||
}
|
||||
|
||||
if (otq.has_value())
|
||||
tq = otq.value();
|
||||
if (ocq.has_value())
|
||||
cq = ocq.value();
|
||||
return std::tuple<uint32_t, uint32_t>(cq, tq);
|
||||
}
|
||||
void selectPhysicalDevice(vk::Instance& instance, vk::PhysicalDevice& pDevice)
|
||||
{
|
||||
// Takes the first one
|
||||
std::vector<vk::PhysicalDevice> physDs = instance.enumeratePhysicalDevices();
|
||||
|
||||
const static char* cache_name = "device_selection_cache";
|
||||
const static char* recreation_message = "To select a new device, delete the file \"device_selection_cache\" in your working directory before executing the framework.";
|
||||
|
||||
std::ifstream ifile(cache_name, std::ios::binary);
|
||||
if (ifile.is_open()) {
|
||||
DeviceSelectionCache cache;
|
||||
ifile.read(reinterpret_cast<char*>(&cache), sizeof(cache));
|
||||
ifile.close();
|
||||
for (auto physD : physDs) {
|
||||
auto props = physD.getProperties2().properties;
|
||||
if (props.vendorID == cache.vendorID && props.deviceID == cache.deviceID) {
|
||||
std::cout << "Selecting previously selected device: \"" << props.deviceName << "\"" << std::endl;
|
||||
std::cout << recreation_message << std::endl;
|
||||
pDevice = physD;
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::cout << "Previously selected device was not found." << std::endl;
|
||||
}
|
||||
else {
|
||||
std::cout << "No previous device selection found." << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Select one of the available devices:" << std::endl;
|
||||
|
||||
for (int i = 0; i < physDs.size(); i++) {
|
||||
auto props = physDs[i].getProperties2().properties;
|
||||
std::cout << i << ")\t" << props.deviceName.data() << std::endl;
|
||||
}
|
||||
|
||||
uint32_t i;
|
||||
while (true) {
|
||||
std::cout << "Enter device number: ";
|
||||
std::cin >> i;
|
||||
if (i < physDs.size()) break;
|
||||
}
|
||||
|
||||
auto props = physDs[i].getProperties2().properties;
|
||||
DeviceSelectionCache cache;
|
||||
cache.vendorID = props.vendorID;
|
||||
cache.deviceID = props.deviceID;
|
||||
|
||||
std::ofstream ofile(cache_name, std::ios::out | std::ios::binary);
|
||||
ofile.write(reinterpret_cast<const char*>(&cache), sizeof(cache));
|
||||
ofile.close();
|
||||
std::cout << "Selected device: \"" << props.deviceName.data() << "\"" << std::endl
|
||||
<< "This device will be automatically selected in the future." << std::endl
|
||||
<< recreation_message << std::endl;
|
||||
|
||||
pDevice = physDs[i];
|
||||
}
|
||||
|
||||
// The logical device holds the queues and will be used in almost every call from now on
|
||||
|
||||
void createLogicalDevice(vk::Instance& instance, vk::PhysicalDevice& pDevice, vk::Device& device)
|
||||
{
|
||||
|
||||
//First get the queues
|
||||
uint32_t cQ, tQ;
|
||||
std::tie(cQ, tQ) = getComputeAndTransferQueues(pDevice);
|
||||
std::vector<vk::DeviceQueueCreateInfo> queuesInfo;
|
||||
// flags, queueFamily, queueCount, queuePriority
|
||||
float prio = 1.f;
|
||||
vk::DeviceQueueCreateInfo computeInfo({}, cQ, 1U, &prio);
|
||||
vk::DeviceQueueCreateInfo transferInfo({}, tQ, 1U, &prio);
|
||||
|
||||
queuesInfo.push_back(computeInfo);
|
||||
//queuesInfo.push_back(transferInfo);
|
||||
// {}, queueCreateInfoCount, pQueueCreateInfos, enabledLayerCount, ppEnabledLayerNames, enabledExtensionCount, ppEnabledExtensionNames, pEnabledFeatures
|
||||
|
||||
std::vector extensionNames_(extensionNames);
|
||||
|
||||
auto deviceExtensionProperties = pDevice.enumerateDeviceExtensionProperties();
|
||||
bool enable_portability_subset = false;;
|
||||
for (auto ext : deviceExtensionProperties) {
|
||||
if (strcmp(ext.extensionName.data(), VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME) == 0) {
|
||||
enable_portability_subset = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (enable_portability_subset) {
|
||||
extensionNames_.push_back(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
vk::DeviceCreateInfo dci({}, CAST(queuesInfo), queuesInfo.data(),
|
||||
CAST(validationLayers), validationLayers.data(),
|
||||
CAST(extensionNames_), extensionNames_.data()); // no extension
|
||||
|
||||
device = pDevice.createDevice(dci);
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(device);
|
||||
|
||||
setObjectName(device, device, "This is my lovely device !");
|
||||
}
|
||||
void createCommandPool(vk::Device& device, vk::CommandPool& commandPool, uint32_t queueIndex)
|
||||
{
|
||||
vk::CommandPoolCreateInfo cpi(vk::CommandPoolCreateFlags(), queueIndex);
|
||||
commandPool = device.createCommandPool(cpi);
|
||||
}
|
||||
|
||||
void destroyInstance(vk::Instance& instance, vk::DebugUtilsMessengerEXT& debugUtilsMessenger)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
instance.destroyDebugUtilsMessengerEXT(debugUtilsMessenger);
|
||||
#endif
|
||||
instance.destroy();
|
||||
}
|
||||
void destroyLogicalDevice(vk::Device& device)
|
||||
{
|
||||
device.destroy();
|
||||
}
|
||||
|
||||
void destroyCommandPool(vk::Device& device, vk::CommandPool& commandPool)
|
||||
{
|
||||
device.destroyCommandPool(commandPool);
|
||||
commandPool = vk::CommandPool();
|
||||
}
|
||||
|
||||
void showAvailableQueues(vk::PhysicalDevice& pDevice, bool diagExt)
|
||||
{
|
||||
|
||||
using Chain = vk::StructureChain<vk::QueueFamilyProperties2, vk::QueueFamilyCheckpointPropertiesNV>;
|
||||
#if VK_HEADER_VERSION >= 301
|
||||
using VulkanDispatchLoaderDynamic = vk::detail::DispatchLoaderDynamic;
|
||||
#else
|
||||
using VulkanDispatchLoaderDynamic = vk::DispatchLoaderDynamic;
|
||||
#endif
|
||||
auto queueFamilyProperties2 = pDevice.getQueueFamilyProperties2<Chain, std::allocator<Chain>, VulkanDispatchLoaderDynamic>();
|
||||
|
||||
for (size_t j = 0; j < queueFamilyProperties2.size(); j++)
|
||||
{
|
||||
std::cout << "\t"
|
||||
<< "QueueFamily " << j << "\n";
|
||||
vk::QueueFamilyProperties const& properties =
|
||||
queueFamilyProperties2[j].get<vk::QueueFamilyProperties2>().queueFamilyProperties;
|
||||
std::cout << "\t\t"
|
||||
<< "QueueFamilyProperties:\n";
|
||||
std::cout << "\t\t\t"
|
||||
<< "queueFlags = " << vk::to_string(properties.queueFlags) << "\n";
|
||||
std::cout << "\t\t\t"
|
||||
<< "queueCount = " << properties.queueCount << "\n";
|
||||
std::cout << "\t\t\t"
|
||||
<< "timestampValidBits = " << properties.timestampValidBits << "\n";
|
||||
std::cout << "\t\t\t"
|
||||
<< "minImageTransferGranularity = " << properties.minImageTransferGranularity.width << " x "
|
||||
<< properties.minImageTransferGranularity.height << " x "
|
||||
<< properties.minImageTransferGranularity.depth << "\n";
|
||||
std::cout << "\n";
|
||||
|
||||
if (diagExt)
|
||||
{
|
||||
vk::QueueFamilyCheckpointPropertiesNV const& checkpointProperties =
|
||||
queueFamilyProperties2[j].get<vk::QueueFamilyCheckpointPropertiesNV>();
|
||||
std::cout << "\t\t"
|
||||
<< "CheckPointPropertiesNV:\n";
|
||||
std::cout << "\t\t\t"
|
||||
<< "checkpointExecutionStageMask = "
|
||||
<< vk::to_string(checkpointProperties.checkpointExecutionStageMask) << "\n";
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void createTimestampQueryPool(vk::Device& device, vk::QueryPool& queryPool, uint32_t queryCount)
|
||||
{
|
||||
vk::QueryPoolCreateInfo createInfo({}, vk::QueryType::eTimestamp, queryCount);
|
||||
queryPool = device.createQueryPool(createInfo);
|
||||
}
|
||||
|
||||
void destroyQueryPool(vk::Device& device, vk::QueryPool& queryPool)
|
||||
{
|
||||
device.destroyQueryPool(queryPool);
|
||||
queryPool = vk::QueryPool();
|
||||
}
|
||||
|
||||
void printDeviceCapabilities(vk::PhysicalDevice& pDevice)
|
||||
{
|
||||
//vk::PhysicalDeviceFeatures features = physicalDevice.getFeatures();
|
||||
std::vector<vk::ExtensionProperties> ext = pDevice.enumerateDeviceExtensionProperties();
|
||||
std::vector<vk::LayerProperties> layers = pDevice.enumerateDeviceLayerProperties();
|
||||
vk::PhysicalDeviceMemoryProperties memoryProperties = pDevice.getMemoryProperties();
|
||||
vk::PhysicalDeviceProperties properties = pDevice.getProperties();
|
||||
vk::PhysicalDeviceType dt = properties.deviceType;
|
||||
|
||||
std::cout << "====================" << std::endl
|
||||
<< "Device Name: " << properties.deviceName << std::endl
|
||||
<< "Device ID: " << properties.deviceID << std::endl
|
||||
<< "Device Type: " << vk::to_string(properties.deviceType) << std::endl
|
||||
<< "Driver Version: " << properties.driverVersion << std::endl
|
||||
<< "API Version: " << properties.apiVersion << std::endl
|
||||
<< "====================" << std::endl
|
||||
<< std::endl;
|
||||
|
||||
bool budgetExt = false;
|
||||
bool diagExt = false;
|
||||
std::cout << "This device supports the following extensions (" << ext.size() << "): " << std::endl;
|
||||
for (vk::ExtensionProperties e : ext)
|
||||
{
|
||||
std::cout << std::string(e.extensionName.data()) << std::endl;
|
||||
if (std::string(e.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME)
|
||||
budgetExt = true;
|
||||
if (std::string(e.extensionName.data()) == VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME)
|
||||
diagExt = true;
|
||||
}
|
||||
|
||||
std::cout << "This device supports the following memory types (" << memoryProperties.memoryTypeCount << "): " << std::endl;
|
||||
uint32_t c = 0U;
|
||||
for (vk::MemoryType e : memoryProperties.memoryTypes)
|
||||
{
|
||||
if (c > memoryProperties.memoryTypeCount)
|
||||
break;
|
||||
|
||||
std::cout << e.heapIndex << "\t ";
|
||||
std::cout << vk::to_string(e.propertyFlags) << std::endl;
|
||||
c++;
|
||||
}
|
||||
std::cout << "====================" << std::endl
|
||||
<< std::endl;
|
||||
|
||||
if (budgetExt)
|
||||
{
|
||||
std::cout << "This device has the following heaps (" << memoryProperties.memoryHeapCount << "): " << std::endl;
|
||||
c = 0U;
|
||||
for (vk::MemoryHeap e : memoryProperties.memoryHeaps)
|
||||
{
|
||||
if (c > memoryProperties.memoryHeapCount)
|
||||
break;
|
||||
|
||||
std::cout << "Size: " << formatSize(e.size) << "\t ";
|
||||
std::cout << vk::to_string(e.flags) << std::endl;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "====================" << std::endl
|
||||
<< std::endl
|
||||
<< "This device has the following layers (" << layers.size() << "): " << std::endl;
|
||||
for (vk::LayerProperties l : layers)
|
||||
std::cout << std::string(l.layerName.data()) << "\t : " << std::string(l.description.data()) << std::endl;
|
||||
std::cout << "====================" << std::endl
|
||||
<< std::endl;
|
||||
|
||||
showAvailableQueues(pDevice, diagExt);
|
||||
}
|
||||
136
src/main.cpp
Normal file
136
src/main.cpp
Normal file
@@ -0,0 +1,136 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
#include "A2Task1.h"
|
||||
#include "A2Task2.h"
|
||||
#include "A2Task1Solution/Sequential.h"
|
||||
#include "A2Task1Solution/Interleaved.h"
|
||||
#include "A2Task1Solution/KernelDecomposition.h"
|
||||
#include "A2Task2Solution/Naive.h"
|
||||
#include "A2Task2Solution/KernelDecomposition.h"
|
||||
#include "renderdoc.h"
|
||||
|
||||
void run_A2_task1(AppResources &app){
|
||||
size_t size = 128*1024*1024;
|
||||
A2Task1 a2Task1(size);
|
||||
std::cout<<"====== A2 TASK 1 ======" <<std::endl;
|
||||
auto evaluateTask1Solution = [&](A2Task1Solution* solution, std::string name, int N=10) {
|
||||
std::cout << "[Task1] evaluating " << name << " with size: "<<size<< std::endl;
|
||||
bool pass = true;
|
||||
float mstime = 0.f;
|
||||
for (int i = 0; i < N; i++) {
|
||||
pass &= a2Task1.evaluateSolution(*solution);
|
||||
solution->cleanup();
|
||||
mstime += solution->mstime / N;
|
||||
|
||||
if (!pass) break;
|
||||
}
|
||||
|
||||
if (pass) {
|
||||
std::cout << "TEST PASSED. Execution time: " << mstime<< " ms, "
|
||||
<< "Throughput: " << size / mstime / 1000000 << " GE/s" << std::endl;
|
||||
} else {
|
||||
std::cout << "TEST FAILED" << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
A2Task1SolutionInterleaved interleavedSolution(app, 128);
|
||||
evaluateTask1Solution(&interleavedSolution, "Interleaved");
|
||||
|
||||
A2Task1SolutionSequential sequentialSolution(app, 128);
|
||||
evaluateTask1Solution(&sequentialSolution, "Sequential");
|
||||
|
||||
A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv");
|
||||
evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition");
|
||||
|
||||
A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv");
|
||||
evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll");
|
||||
|
||||
A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv");
|
||||
evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic");
|
||||
}
|
||||
void run_A2_task2(AppResources& app){
|
||||
|
||||
size_t size = 128*1024*1024;
|
||||
std::cout<<"====== A2 TASK 2 ======" <<std::endl;
|
||||
|
||||
// This is used for testing local kernel decomposition without extension to arbitrary arrays.
|
||||
// Must be power of two and <= 1024!
|
||||
size_t sizeLocal = 128;
|
||||
|
||||
A2Task2 a2Task2(size);
|
||||
A2Task2 a2Task2Local(sizeLocal);
|
||||
|
||||
auto evaluateTask2Solution = [&](A2Task2 *task, A2Task2Solution* solution, std::string name, int N) {
|
||||
std::cout << "[Task2] evaluating " << name << " with size: "<< task->size() << std::endl;
|
||||
|
||||
bool pass = true;
|
||||
float mstime = 0.f;
|
||||
for (int i = 0; i < N; i++) {
|
||||
pass &= task->evaluateSolution(*solution);
|
||||
solution->cleanup();
|
||||
mstime += solution->mstime / N;
|
||||
|
||||
if (!pass) break;
|
||||
}
|
||||
|
||||
if (pass) {
|
||||
std::cout << "Execution time: " << mstime<< " ms, "
|
||||
<< "Throughput: " << task->size() / mstime / 1000000 << " GE/s" << std::endl;
|
||||
std::cout << "TEST PASSED" << std::endl;
|
||||
} else {
|
||||
std::cout << "TEST FAILED" << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
A2Task2SolutioNaive naiveSolution(app, 128);
|
||||
evaluateTask2Solution(&a2Task2, &naiveSolution, "Naive",5);
|
||||
|
||||
A2Task2SolutionKernelDecomposition kernelDecompositionSolutionLocal(app, sizeLocal);
|
||||
evaluateTask2Solution(&a2Task2Local, &kernelDecompositionSolutionLocal, "Kernel Decomposition that fits in one workgroup (normal if 'slow')",5);
|
||||
|
||||
A2Task2SolutionKernelDecomposition kernelDecompositionSolution(app, 128);
|
||||
evaluateTask2Solution(&a2Task2, &kernelDecompositionSolution, "Kernel Decomposition",5);
|
||||
|
||||
}
|
||||
int main()
|
||||
{
|
||||
try
|
||||
{
|
||||
AppResources app;
|
||||
|
||||
initApp(app);
|
||||
|
||||
renderdoc::initialize();
|
||||
renderdoc::startCapture();
|
||||
|
||||
run_A2_task1(app);
|
||||
|
||||
run_A2_task2(app);
|
||||
|
||||
renderdoc::endCapture();
|
||||
|
||||
app.destroy();
|
||||
}
|
||||
catch (vk::SystemError &err)
|
||||
{
|
||||
std::cout << "vk::SystemError: " << err.what() << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
catch (std::exception &err)
|
||||
{
|
||||
std::cout << "std::exception: " << err.what() << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cout << "unknown error\n";
|
||||
exit(-1);
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
50
src/renderdoc.cpp
Normal file
50
src/renderdoc.cpp
Normal file
@@ -0,0 +1,50 @@
|
||||
#include "renderdoc.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#ifdef ENABLE_RENDERDOC
|
||||
#include "renderdoc_app.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#elif __linux__
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
static RENDERDOC_API_1_1_2 *rdoc_api = nullptr;
|
||||
#endif
|
||||
|
||||
namespace renderdoc {
|
||||
void initialize() {
|
||||
#ifdef ENABLE_RENDERDOC
|
||||
pRENDERDOC_GetAPI RENDERDOC_GetAPI = nullptr;
|
||||
|
||||
#ifdef _WIN32
|
||||
if(HMODULE mod = GetModuleHandleA("renderdoc.dll"))
|
||||
RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(mod, "RENDERDOC_GetAPI");
|
||||
#elif __linux__
|
||||
if(void *mod = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD))
|
||||
RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym(mod, "RENDERDOC_GetAPI");
|
||||
#endif
|
||||
|
||||
if (RENDERDOC_GetAPI != nullptr) {
|
||||
int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, (void **)&rdoc_api);
|
||||
assert(ret == 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void startCapture() {
|
||||
#ifdef ENABLE_RENDERDOC
|
||||
if (rdoc_api)
|
||||
rdoc_api->StartFrameCapture(nullptr, nullptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
void endCapture() {
|
||||
#ifdef ENABLE_RENDERDOC
|
||||
if (rdoc_api)
|
||||
rdoc_api->EndFrameCapture(nullptr, nullptr);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
116
src/task_common.cpp
Normal file
116
src/task_common.cpp
Normal file
@@ -0,0 +1,116 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "task_common.h"
|
||||
#include "initialization.h"
|
||||
#include "utils.h"
|
||||
|
||||
namespace Cmn {
|
||||
//We have a binding vector ready to become a descriptorSetLayout
|
||||
void createDescriptorSetLayout(vk::Device& device,
|
||||
std::vector<vk::DescriptorSetLayoutBinding>& bindings,
|
||||
vk::DescriptorSetLayout& descLayout) {
|
||||
vk::DescriptorSetLayoutCreateInfo layoutInfo(
|
||||
{},
|
||||
CAST(bindings), // Number of binding infos
|
||||
bindings.data() // Array of binding infos
|
||||
);
|
||||
descLayout = device.createDescriptorSetLayout(layoutInfo);
|
||||
}
|
||||
|
||||
void addStorage(std::vector<vk::DescriptorSetLayoutBinding>& bindings, uint32_t binding) {
|
||||
//Bindings needed for DescriptorSetLayout
|
||||
//The DescriptorType eStorageBuffer is used in our case as storage buffer for compute shader
|
||||
//The ID binding(argument) is needed in the shader
|
||||
//DescriptorCount is set to 1U
|
||||
bindings.push_back(vk::DescriptorSetLayoutBinding(
|
||||
binding, // The binding number of this entry
|
||||
vk::DescriptorType::eStorageBuffer, // Type of resource descriptors used for this binding
|
||||
1U, // Number of descriptors contained in the binding
|
||||
vk::ShaderStageFlagBits::eCompute) // All defined shader stages can access the resource
|
||||
);
|
||||
}
|
||||
|
||||
void allocateDescriptorSet(vk::Device& device, vk::DescriptorSet& descSet, vk::DescriptorPool& descPool,
|
||||
vk::DescriptorSetLayout& descLayout) {
|
||||
// You can technically allocate multiple layouts at once, we don't need that (so we put 1)
|
||||
vk::DescriptorSetAllocateInfo descAllocInfo(descPool, 1U, &descLayout);
|
||||
// Therefore the vector is length one, we want to take its (only) element
|
||||
descSet = device.allocateDescriptorSets(descAllocInfo)[0];
|
||||
}
|
||||
|
||||
|
||||
//Binding our DescriptorSet to Buffer
|
||||
//VK_WHOLE_SIZE is specified to bind the entire Buffer
|
||||
//DescriptorType eStorageBuffer in our case should be coherant with DescriptorSetLayout
|
||||
//WriteDescriptorSets(creates array) and updateDescriptorSets can be used only once
|
||||
void bindBuffers(vk::Device& device, vk::Buffer& b, vk::DescriptorSet& set, uint32_t binding) {
|
||||
// Buffer info and data offset info
|
||||
vk::DescriptorBufferInfo descInfo(
|
||||
b, // Buffer to get data from
|
||||
0ULL, // Position of start of data
|
||||
VK_WHOLE_SIZE // Size of data
|
||||
);
|
||||
|
||||
// Binding index in the shader V
|
||||
vk::WriteDescriptorSet write(set, binding, 0U, 1U,
|
||||
vk::DescriptorType::eStorageBuffer, nullptr, &descInfo);
|
||||
device.updateDescriptorSets(1U, &write, 0U, nullptr);
|
||||
}
|
||||
|
||||
void createPipeline(vk::Device& device, vk::Pipeline& pipeline,
|
||||
vk::PipelineLayout& pipLayout, vk::SpecializationInfo& specInfo,
|
||||
vk::ShaderModule& sModule) {
|
||||
vk::PipelineShaderStageCreateInfo stageInfo(vk::PipelineShaderStageCreateFlags(),
|
||||
vk::ShaderStageFlagBits::eCompute, sModule,
|
||||
"main", &specInfo);
|
||||
|
||||
vk::ComputePipelineCreateInfo computeInfo(vk::PipelineCreateFlags(), stageInfo, pipLayout);
|
||||
|
||||
// This is a workaround: ideally there should not be a ".value"
|
||||
// This should be fixed in later releases of the SDK
|
||||
pipeline = device.createComputePipeline(nullptr, computeInfo, nullptr).value;
|
||||
}
|
||||
|
||||
//Number of DescriptorSets is one by default
|
||||
void createDescriptorPool(vk::Device& device,
|
||||
std::vector<vk::DescriptorSetLayoutBinding>& bindings, vk::DescriptorPool& descPool,
|
||||
uint32_t numDescriptorSets) {
|
||||
vk::DescriptorPoolSize descriptorPoolSize = vk::DescriptorPoolSize(
|
||||
vk::DescriptorType::eStorageBuffer, bindings.size() * numDescriptorSets);
|
||||
vk::DescriptorPoolCreateInfo descriptorPoolCI = vk::DescriptorPoolCreateInfo(
|
||||
vk::DescriptorPoolCreateFlags(), numDescriptorSets, 1U, &descriptorPoolSize);
|
||||
|
||||
descPool = device.createDescriptorPool(descriptorPoolCI);
|
||||
}
|
||||
|
||||
|
||||
void createShader(vk::Device& device, vk::ShaderModule& shaderModule, const std::string& filename) {
|
||||
std::vector<char> cshader = readFile(filename);
|
||||
// Shader Module creation information
|
||||
vk::ShaderModuleCreateInfo smi(
|
||||
{},
|
||||
static_cast<uint32_t>(cshader.size()), // Size of code
|
||||
reinterpret_cast<const uint32_t *>(cshader.data())); // Pointer to code (of uint32_t pointer type)
|
||||
shaderModule = device.createShaderModule(smi);
|
||||
}
|
||||
}
|
||||
|
||||
void TaskResources::destroy(vk::Device& device) {
|
||||
//Destroy all the resources we created in reverse order
|
||||
//Pipeline Should be destroyed before PipelineLayout
|
||||
device.destroyPipeline(this->pipeline);
|
||||
//PipelineLayout should be destroyed before DescriptorPool
|
||||
device.destroyPipelineLayout(this->pipelineLayout);
|
||||
//DescriptorPool should be destroyed before the DescriptorSetLayout
|
||||
device.destroyDescriptorPool(this->descriptorPool);
|
||||
device.destroyDescriptorSetLayout(this->descriptorSetLayout);
|
||||
device.destroyShaderModule(this->cShader);
|
||||
//The DescriptorSet does not need to be destroyed, It is managed by DescriptorPool.
|
||||
|
||||
std::cout << std::endl
|
||||
<< "destroyed everything successfully in task" << std::endl;
|
||||
}
|
||||
109
src/utils.cpp
Normal file
109
src/utils.cpp
Normal file
@@ -0,0 +1,109 @@
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
std::vector<char> readFile(const std::string& filename) {
|
||||
std::ifstream file(filename, std::ios::ate | std::ios::binary);
|
||||
|
||||
if (!file.is_open()) {
|
||||
std::string error = "failed to open file: " + filename;
|
||||
throw std::runtime_error(error);
|
||||
}
|
||||
size_t fileSize = (size_t) file.tellg();
|
||||
|
||||
std::vector<char> buffer(fileSize);
|
||||
file.seekg(0);
|
||||
file.read(buffer.data(), fileSize);
|
||||
file.close();
|
||||
// uncomment for debug
|
||||
//std::cout << "read " << buffer.size() << " bytes of data in file " << filename << std::endl;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
std::string formatSize(uint64_t size) {
|
||||
std::ostringstream oss;
|
||||
if (size < 1024) {
|
||||
oss << size << " B";
|
||||
} else if (size < 1024 * 1024) {
|
||||
oss << size / 1024.f << " KB";
|
||||
} else if (size < 1024 * 1024 * 1024) {
|
||||
oss << size / (1024.0f * 1024.0f) << " MB";
|
||||
} else {
|
||||
oss << size / (1024.0f * 1024.0f * 1024.0f) << " GB";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
uint32_t findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties, vk::PhysicalDevice& pdevice) {
|
||||
vk::PhysicalDeviceMemoryProperties memProperties = pdevice.getMemoryProperties();
|
||||
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
|
||||
if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
throw std::runtime_error("failed to find suitable memory type!");
|
||||
}
|
||||
|
||||
void createBuffer(vk::PhysicalDevice& pDevice, vk::Device& device,
|
||||
const vk::DeviceSize& size, vk::BufferUsageFlags usage,
|
||||
vk::MemoryPropertyFlags properties, std::string name, vk::Buffer& buffer,
|
||||
vk::DeviceMemory& bufferMemory) {
|
||||
vk::BufferCreateInfo inBufferInfo({}, size, usage);
|
||||
buffer = device.createBuffer(inBufferInfo);
|
||||
setObjectName(device, buffer, name);
|
||||
|
||||
vk::MemoryRequirements memReq = device.getBufferMemoryRequirements(buffer);
|
||||
vk::MemoryAllocateInfo allocInfo(memReq.size,
|
||||
findMemoryType(memReq.memoryTypeBits, properties, pDevice));
|
||||
|
||||
bufferMemory = device.allocateMemory(allocInfo);
|
||||
device.bindBufferMemory(buffer, bufferMemory, 0U);
|
||||
}
|
||||
|
||||
void createBuffer(vk::PhysicalDevice& pDevice, vk::Device& device,
|
||||
const vk::DeviceSize& size, vk::BufferUsageFlags usage,
|
||||
vk::MemoryPropertyFlags properties, std::string name, Buffer& buffer) {
|
||||
createBuffer(pDevice, device, size, usage, properties, name, buffer.buf, buffer.mem);
|
||||
}
|
||||
|
||||
void destroyBuffer(vk::Device& device, Buffer& buffer) {
|
||||
device.destroyBuffer(buffer.buf);
|
||||
device.freeMemory(buffer.mem);
|
||||
}
|
||||
|
||||
void copyBuffer(vk::Device& device, vk::Queue& q, vk::CommandPool& commandPool,
|
||||
const vk::Buffer& srcBuffer, vk::Buffer& dstBuffer, vk::DeviceSize byteSize) {
|
||||
vk::CommandBuffer commandBuffer = beginSingleTimeCommands(device, commandPool);
|
||||
|
||||
vk::BufferCopy copyRegion(0ULL, 0ULL, byteSize);
|
||||
commandBuffer.copyBuffer(srcBuffer, dstBuffer, 1, ©Region);
|
||||
|
||||
endSingleTimeCommands(device, q, commandPool, commandBuffer);
|
||||
}
|
||||
|
||||
vk::CommandBuffer beginSingleTimeCommands(vk::Device& device, vk::CommandPool& commandPool) {
|
||||
vk::CommandBufferAllocateInfo allocInfo(commandPool, vk::CommandBufferLevel::ePrimary, 1);
|
||||
|
||||
vk::CommandBuffer commandBuffer = device.allocateCommandBuffers(allocInfo)[0];
|
||||
|
||||
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||
commandBuffer.begin(beginInfo);
|
||||
|
||||
return commandBuffer;
|
||||
}
|
||||
|
||||
void endSingleTimeCommands(vk::Device& device, vk::Queue& q,
|
||||
vk::CommandPool& commandPool, vk::CommandBuffer& commandBuffer) {
|
||||
commandBuffer.end();
|
||||
vk::SubmitInfo submitInfo(0U, nullptr, nullptr, 1U, &commandBuffer);
|
||||
q.submit({submitInfo}, nullptr);
|
||||
q.waitIdle();
|
||||
device.freeCommandBuffers(commandPool, 1, &commandBuffer);
|
||||
}
|
||||
Reference in New Issue
Block a user