init commit
This commit is contained in:
45
.gitignore
vendored
Normal file
45
.gitignore
vendored
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# Compiled Object files
|
||||||
|
**/.DS_Store
|
||||||
|
*.slo
|
||||||
|
*.lo
|
||||||
|
*.o
|
||||||
|
*.obj
|
||||||
|
|
||||||
|
# Precompiled Headers
|
||||||
|
*.gch
|
||||||
|
*.pch
|
||||||
|
|
||||||
|
# Compiled Dynamic libraries
|
||||||
|
*.so
|
||||||
|
*.dylib
|
||||||
|
*.dll
|
||||||
|
|
||||||
|
# Fortran module files
|
||||||
|
*.mod
|
||||||
|
*.smod
|
||||||
|
|
||||||
|
# Compiled Static libraries
|
||||||
|
*.lai
|
||||||
|
*.la
|
||||||
|
*.lib
|
||||||
|
|
||||||
|
# Executables
|
||||||
|
*.exe
|
||||||
|
*.out
|
||||||
|
*.app
|
||||||
|
|
||||||
|
**/cmake-build-debug
|
||||||
|
**/CMakeCache.txt
|
||||||
|
**/cmake_install.cmake
|
||||||
|
**/install_manifest.txt
|
||||||
|
**/CMakeFiles/
|
||||||
|
**/CTestTestfile.cmake
|
||||||
|
**/*.cbp
|
||||||
|
**/CMakeScripts
|
||||||
|
**/compile_commands.json
|
||||||
|
|
||||||
|
include/divisible/*
|
||||||
|
|
||||||
|
build/
|
||||||
|
.cache/
|
||||||
|
.vscode/
|
||||||
77
CMakeLists.txt
Normal file
77
CMakeLists.txt
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.16)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
|
project(Assignment2)
|
||||||
|
|
||||||
|
function(add_shader TARGET SHADER)
|
||||||
|
find_program(GLSLC glslc)
|
||||||
|
|
||||||
|
set(current-shader-path ${CMAKE_CURRENT_SOURCE_DIR}/${SHADER})
|
||||||
|
set(current-output-path ${CMAKE_CURRENT_SOURCE_DIR}/build/${SHADER}.spv)
|
||||||
|
|
||||||
|
file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/build)
|
||||||
|
get_filename_component(current-output-dir ${current-output-path} DIRECTORY)
|
||||||
|
file(MAKE_DIRECTORY ${current-output-dir})
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${current-output-path}
|
||||||
|
COMMAND ${GLSLC} --target-env=vulkan1.2 -o ${current-output-path} ${current-shader-path}
|
||||||
|
DEPENDS ${current-shader-path}
|
||||||
|
IMPLICIT_DEPENDS CXX ${current-shader-path}
|
||||||
|
VERBATIM)
|
||||||
|
|
||||||
|
# Make sure our build depends on this output.
|
||||||
|
set_source_files_properties(${current-output-path} PROPERTIES GENERATED TRUE)
|
||||||
|
target_sources(${TARGET} PRIVATE ${current-output-path})
|
||||||
|
endfunction(add_shader)
|
||||||
|
|
||||||
|
find_package(Vulkan REQUIRED)
|
||||||
|
|
||||||
|
if (DEFINED ENV{RENDERDOC_PATH})
|
||||||
|
set(RENDERDOC_PATH ENV{RENDERDOC_PATH})
|
||||||
|
elseif (WIN32)
|
||||||
|
if(EXISTS "C:\\Program Files\\RenderDoc")
|
||||||
|
set(RENDERDOC_PATH "C:\\Program Files\\RenderDoc")
|
||||||
|
endif()
|
||||||
|
else ()
|
||||||
|
#LINUX PATH HERE
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
set(SOURCE_FILE
|
||||||
|
src/main.cpp
|
||||||
|
src/task_common.cpp
|
||||||
|
src/host_timer.cpp
|
||||||
|
src/initialization.cpp
|
||||||
|
src/renderdoc.cpp
|
||||||
|
src/utils.cpp
|
||||||
|
src/A2Task1.cpp
|
||||||
|
src/A2Task2.cpp
|
||||||
|
src/A2Task1Solution/Interleaved.cpp
|
||||||
|
src/A2Task1Solution/KernelDecomposition.cpp
|
||||||
|
src/A2Task1Solution/Sequential.cpp
|
||||||
|
src/A2Task2Solution/KernelDecomposition.cpp
|
||||||
|
src/A2Task2Solution/Naive.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_compile_definitions(WORKING_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
|
||||||
|
add_executable(${PROJECT_NAME} ${SOURCE_FILE})
|
||||||
|
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task1Interleaved.comp)
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task1KernelDecomposition.comp)
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task1KernelDecompositionAtomic.comp)
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task1KernelDecompositionUnroll.comp)
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task1Sequential.comp)
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task2KernelDecomposition.comp)
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task2KernelDecompositionOffset.comp)
|
||||||
|
add_shader(${PROJECT_NAME} shaders/A2Task2Naive.comp)
|
||||||
|
|
||||||
|
target_include_directories(${PROJECT_NAME} PUBLIC ./include)
|
||||||
|
target_include_directories(${PROJECT_NAME} PRIVATE ${Vulkan_INCLUDE_DIRS})
|
||||||
|
target_link_libraries(${PROJECT_NAME} Vulkan::Vulkan)
|
||||||
|
target_compile_definitions(${PROJECT_NAME} PRIVATE)
|
||||||
|
if (RENDERDOC_PATH)
|
||||||
|
target_include_directories(${PROJECT_NAME} PRIVATE /usr/local/renderdoc_1.33/include)
|
||||||
|
target_include_directories(${PROJECT_NAME} PRIVATE ${RENDERDOC_PATH})
|
||||||
|
target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_RENDERDOC)
|
||||||
|
endif ()
|
||||||
37
include/A2Task1.h
Normal file
37
include/A2Task1.h
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "helper.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "task_common.h"
|
||||||
|
|
||||||
|
class A2Task1Solution {
|
||||||
|
public:
|
||||||
|
float mstime;
|
||||||
|
|
||||||
|
virtual void prepare(const std::vector<uint> &input) = 0;
|
||||||
|
virtual void compute() = 0;
|
||||||
|
virtual uint result() const = 0;
|
||||||
|
virtual void cleanup() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class A2Task1 {
|
||||||
|
public:
|
||||||
|
A2Task1(uint problemSize);
|
||||||
|
A2Task1(std::vector<uint> input);
|
||||||
|
|
||||||
|
bool evaluateSolution(A2Task1Solution& solution);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void computeReference();
|
||||||
|
|
||||||
|
std::vector<uint> input;
|
||||||
|
uint reference;
|
||||||
|
};
|
||||||
40
include/A2Task2.h
Normal file
40
include/A2Task2.h
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "helper.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "task_common.h"
|
||||||
|
|
||||||
|
class A2Task2Solution {
|
||||||
|
public:
|
||||||
|
float mstime;
|
||||||
|
|
||||||
|
virtual void prepare(const std::vector<uint> &input) = 0;
|
||||||
|
virtual void compute() = 0;
|
||||||
|
virtual std::vector<uint> result() const = 0;
|
||||||
|
virtual void cleanup() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class A2Task2 {
|
||||||
|
public:
|
||||||
|
A2Task2(uint problemSize);
|
||||||
|
A2Task2(std::vector<uint> input);
|
||||||
|
|
||||||
|
bool evaluateSolution(A2Task2Solution& solution);
|
||||||
|
size_t size() const {
|
||||||
|
return input.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void computeReference();
|
||||||
|
|
||||||
|
std::vector<uint> input;
|
||||||
|
std::vector<uint> reference;
|
||||||
|
};
|
||||||
8
include/helper.h
Normal file
8
include/helper.h
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#if defined(WORKING_DIR)
|
||||||
|
inline std::string workingDir = std::string(WORKING_DIR) + "/";
|
||||||
|
#else
|
||||||
|
inline std::string workingDir = std::string("./");
|
||||||
|
#endif
|
||||||
15
include/host_timer.h
Normal file
15
include/host_timer.h
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
class HostTimer {
|
||||||
|
private:
|
||||||
|
using clock = std::chrono::high_resolution_clock;
|
||||||
|
|
||||||
|
clock::time_point start;
|
||||||
|
|
||||||
|
public:
|
||||||
|
HostTimer();
|
||||||
|
void reset();
|
||||||
|
double elapsed() const;
|
||||||
|
};
|
||||||
47
include/initialization.h
Normal file
47
include/initialization.h
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#ifndef INITIALIZATION
|
||||||
|
#define INITIALIZATION
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
struct AppResources
|
||||||
|
{
|
||||||
|
vk::Instance instance;
|
||||||
|
vk::DebugUtilsMessengerEXT dbgUtilsMgr;
|
||||||
|
vk::PhysicalDevice pDevice;
|
||||||
|
vk::PhysicalDeviceProperties2 pDeviceProperties;
|
||||||
|
vk::PhysicalDeviceSubgroupProperties pDeviceSubgroupProperties;
|
||||||
|
|
||||||
|
vk::Device device;
|
||||||
|
vk::Queue computeQueue, transferQueue;
|
||||||
|
uint32_t cQ, tQ;
|
||||||
|
vk::CommandPool computeCommandPool, transferCommandPool;
|
||||||
|
vk::QueryPool queryPool;
|
||||||
|
|
||||||
|
void destroy();
|
||||||
|
};
|
||||||
|
|
||||||
|
VKAPI_ATTR VkBool32 VKAPI_CALL
|
||||||
|
debugUtilsMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
||||||
|
VkDebugUtilsMessageTypeFlagsEXT messageTypes,
|
||||||
|
VkDebugUtilsMessengerCallbackDataEXT const *pCallbackData,
|
||||||
|
void * /*pUserData*/);
|
||||||
|
vk::DebugUtilsMessengerCreateInfoEXT makeDebugUtilsMessengerCreateInfoEXT();
|
||||||
|
|
||||||
|
void selectPhysicalDevice(vk::Instance &instance, vk::PhysicalDevice &pDevice);
|
||||||
|
void createInstance(vk::Instance &instance, vk::DebugUtilsMessengerEXT &debugUtilsMessenger,
|
||||||
|
std::string appName, std::string engineName);
|
||||||
|
void createLogicalDevice(vk::Instance &instance, vk::PhysicalDevice &pDevice, vk::Device &device);
|
||||||
|
std::tuple<uint32_t, uint32_t> getComputeAndTransferQueues(vk::PhysicalDevice &pDevice);
|
||||||
|
void createCommandPool(vk::Device &device, vk::CommandPool &commandPool, uint32_t queueIndex);
|
||||||
|
void destroyInstance(vk::Instance &instance, vk::DebugUtilsMessengerEXT &debugUtilsMessenger);
|
||||||
|
void destroyLogicalDevice(vk::Device &device);
|
||||||
|
void destroyCommandPool(vk::Device &device, vk::CommandPool &commandPool);
|
||||||
|
|
||||||
|
void createTimestampQueryPool(vk::Device &device, vk::QueryPool &queryPool, uint32_t queryCount);
|
||||||
|
void destroyQueryPool(vk::Device &device, vk::QueryPool &queryPool);
|
||||||
|
|
||||||
|
|
||||||
|
void printDeviceCapabilities(vk::PhysicalDevice &pDevice);
|
||||||
|
|
||||||
|
void initApp(AppResources &app);
|
||||||
|
#endif
|
||||||
7
include/renderdoc.h
Normal file
7
include/renderdoc.h
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace renderdoc {
|
||||||
|
void initialize();
|
||||||
|
void startCapture();
|
||||||
|
void endCapture();
|
||||||
|
}
|
||||||
47
include/task_common.h
Normal file
47
include/task_common.h
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#ifndef EX_TEMPLATE
|
||||||
|
#define EX_TEMPLATE
|
||||||
|
|
||||||
|
namespace Cmn {
|
||||||
|
void createDescriptorSetLayout(vk::Device &device,
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> &bindings, vk::DescriptorSetLayout &descLayout);
|
||||||
|
void addStorage(std::vector<vk::DescriptorSetLayoutBinding> &bindings, uint32_t binding);
|
||||||
|
|
||||||
|
void allocateDescriptorSet(vk::Device &device, vk::DescriptorSet &descSet, vk::DescriptorPool &descPool,
|
||||||
|
vk::DescriptorSetLayout &descLayout);
|
||||||
|
void bindBuffers(vk::Device &device, vk::Buffer &b, vk::DescriptorSet &set, uint32_t binding);
|
||||||
|
|
||||||
|
void createDescriptorPool(vk::Device &device,
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> &bindings, vk::DescriptorPool &descPool, uint32_t numDescriptors = 1);
|
||||||
|
void createPipeline(vk::Device &device, vk::Pipeline &pipeline,
|
||||||
|
vk::PipelineLayout &pipLayout, vk::SpecializationInfo &specInfo, vk::ShaderModule &sModule);
|
||||||
|
void createShader(vk::Device &device, vk::ShaderModule &shaderModule, const std::string &filename);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TaskResources
|
||||||
|
{
|
||||||
|
//std::vector<Buffer> buffers; move this to user code
|
||||||
|
vk::ShaderModule cShader;
|
||||||
|
|
||||||
|
vk::DescriptorSetLayout descriptorSetLayout;
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||||
|
vk::DescriptorSet descriptorSet;
|
||||||
|
vk::DescriptorPool descriptorPool;
|
||||||
|
|
||||||
|
vk::Pipeline pipeline;
|
||||||
|
vk::PipelineLayout pipelineLayout;
|
||||||
|
|
||||||
|
void destroy(vk::Device &device);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
113
include/utils.h
Normal file
113
include/utils.h
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
#ifndef UTILS
|
||||||
|
#define UTILS
|
||||||
|
#include <vector>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
|
||||||
|
#define CAST(a) static_cast<uint32_t>(a.size())
|
||||||
|
struct Buffer
|
||||||
|
{
|
||||||
|
vk::Buffer buf;
|
||||||
|
vk::DeviceMemory mem;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef uint32_t uint;
|
||||||
|
|
||||||
|
template<typename T, typename V>
|
||||||
|
T ceilDiv(T x, V y) {
|
||||||
|
return x / y + (x % y != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<char> readFile(const std::string &filename);
|
||||||
|
std::string formatSize(uint64_t size);
|
||||||
|
uint32_t findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties, vk::PhysicalDevice &pdevice);
|
||||||
|
void createBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||||
|
const vk::DeviceSize &size, vk::BufferUsageFlags usage,
|
||||||
|
vk::MemoryPropertyFlags properties, std::string name, vk::Buffer &buffer, vk::DeviceMemory &bufferMemory);
|
||||||
|
void createBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||||
|
const vk::DeviceSize &size, vk::BufferUsageFlags usage,
|
||||||
|
vk::MemoryPropertyFlags properties, std::string name, Buffer &buffer);
|
||||||
|
void destroyBuffer(vk::Device &device, Buffer &buffer);
|
||||||
|
void copyBuffer(vk::Device &device, vk::Queue &q, vk::CommandPool &commandPool,
|
||||||
|
const vk::Buffer &srcBuffer, vk::Buffer &dstBuffer, vk::DeviceSize byteSize);
|
||||||
|
|
||||||
|
vk::CommandBuffer beginSingleTimeCommands(vk::Device &device, vk::CommandPool &commandPool);
|
||||||
|
void endSingleTimeCommands(vk::Device &device, vk::Queue &q,
|
||||||
|
vk::CommandPool &commandPool, vk::CommandBuffer &commandBuffer);
|
||||||
|
|
||||||
|
Buffer addHostCoherentBuffer(vk::PhysicalDevice &pDevice, vk::Device &device, vk::DeviceSize size, std::string name);
|
||||||
|
Buffer addDeviceOnlyBuffer(vk::PhysicalDevice &pDevice, vk::Device &device, vk::DeviceSize size, std::string name);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void fillDeviceBuffer(vk::Device &device, vk::DeviceMemory &mem, const std::vector<T> &input)
|
||||||
|
{
|
||||||
|
void *data = device.mapMemory(mem, 0, input.size() * sizeof(T), vk::MemoryMapFlags());
|
||||||
|
memcpy(data, input.data(), static_cast<size_t>(input.size() * sizeof(T)));
|
||||||
|
device.unmapMemory(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void fillHostBuffer(vk::Device &device, vk::DeviceMemory &mem, std::vector<T> &output)
|
||||||
|
{
|
||||||
|
// copy memory from mem to output
|
||||||
|
void *data = device.mapMemory(mem, 0, output.size() * sizeof(T), vk::MemoryMapFlags());
|
||||||
|
memcpy(output.data(), data, static_cast<size_t>(output.size() * sizeof(T)));
|
||||||
|
device.unmapMemory(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void fillDeviceWithStagingBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||||
|
vk::CommandPool &commandPool, vk::Queue &q,
|
||||||
|
Buffer &b, const std::vector<T> &data)
|
||||||
|
{
|
||||||
|
// Buffer b requires the eTransferSrc bit
|
||||||
|
// data (host) -> staging (device) -> Buffer b (device)
|
||||||
|
vk::Buffer staging;
|
||||||
|
vk::DeviceMemory mem;
|
||||||
|
vk::DeviceSize byteSize = data.size() * sizeof(T);
|
||||||
|
|
||||||
|
createBuffer(pDevice, device, byteSize, vk::BufferUsageFlagBits::eTransferSrc,
|
||||||
|
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible, "staging",
|
||||||
|
staging, mem);
|
||||||
|
// V host -> staging V
|
||||||
|
fillDeviceBuffer<T>(device, mem, data);
|
||||||
|
// V staging -> buffer V
|
||||||
|
copyBuffer(device, q, commandPool, staging, b.buf, byteSize);
|
||||||
|
device.destroyBuffer(staging);
|
||||||
|
device.freeMemory(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void fillHostWithStagingBuffer(vk::PhysicalDevice &pDevice, vk::Device &device,
|
||||||
|
vk::CommandPool &commandPool, vk::Queue &q,
|
||||||
|
const Buffer &b, std::vector<T> &data)
|
||||||
|
{
|
||||||
|
// Buffer b requires the eTransferDst bit
|
||||||
|
// Buffer b (device) -> staging (device) -> data (host)
|
||||||
|
vk::Buffer staging;
|
||||||
|
vk::DeviceMemory mem;
|
||||||
|
vk::DeviceSize byteSize = data.size() * sizeof(T);
|
||||||
|
|
||||||
|
createBuffer(pDevice, device, byteSize, vk::BufferUsageFlagBits::eTransferDst,
|
||||||
|
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible, "staging",
|
||||||
|
staging, mem);
|
||||||
|
// V buffer -> staging V
|
||||||
|
copyBuffer(device, q, commandPool, b.buf, staging, byteSize);
|
||||||
|
// V staging -> host V
|
||||||
|
fillHostBuffer<T>(device, mem, data);
|
||||||
|
|
||||||
|
device.destroyBuffer(staging);
|
||||||
|
device.freeMemory(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void setObjectName(vk::Device &device, T handle, std::string name)
|
||||||
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
vk::DebugUtilsObjectNameInfoEXT infoEXT(handle.objectType, uint64_t(static_cast<typename T::CType>(handle)), name.c_str());
|
||||||
|
device.setDebugUtilsObjectNameEXT(infoEXT);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
21
shaders/A2Task1Interleaved.comp
Normal file
21
shaders/A2Task1Interleaved.comp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
uint stride;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inoutBufer {uint v[];};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// TODO: Kernel implementation
|
||||||
|
}
|
||||||
24
shaders/A2Task1KernelDecomposition.comp
Normal file
24
shaders/A2Task1KernelDecomposition.comp
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
uint offset;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||||
|
layout(binding = 1) buffer outBuffer { uint g_v[]; };
|
||||||
|
|
||||||
|
// TODO: Shared variables
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// TODO: Kernel implementation
|
||||||
|
}
|
||||||
24
shaders/A2Task1KernelDecompositionAtomic.comp
Normal file
24
shaders/A2Task1KernelDecompositionAtomic.comp
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
uint offset;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||||
|
layout(binding = 1) buffer outBuffer { uint g_v[]; };
|
||||||
|
|
||||||
|
// TODO: Shared variables
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// TODO: Kernel implementation
|
||||||
|
}
|
||||||
24
shaders/A2Task1KernelDecompositionUnroll.comp
Normal file
24
shaders/A2Task1KernelDecompositionUnroll.comp
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
uint offset;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||||
|
layout(binding = 1) buffer outBuffer { uint g_v[]; };
|
||||||
|
|
||||||
|
// TODO: Shared variables
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// TODO: Kernel implementation
|
||||||
|
}
|
||||||
21
shaders/A2Task1Sequential.comp
Normal file
21
shaders/A2Task1Sequential.comp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
uint offset;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inoutBufer { uint v[]; };
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// TODO: Kernel implementation
|
||||||
|
}
|
||||||
52
shaders/A2Task2KernelDecomposition.comp
Normal file
52
shaders/A2Task2KernelDecomposition.comp
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Why did we not have conflicts in the Reduction?
|
||||||
|
// Because of the sequential addressing (here we use interleaved => we have conflicts).
|
||||||
|
// TODO: tailor to your architecture (these parameter work for virtually all NVIDIA GPUs)
|
||||||
|
#define NUM_BANKS 32
|
||||||
|
#define NUM_BANKS_LOG 5
|
||||||
|
#define SIMD_GROUP_SIZE 32
|
||||||
|
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inoutBufer {uint array[];};
|
||||||
|
layout(binding = 1) buffer offsetBufer {uint higherLevelArray[];};
|
||||||
|
|
||||||
|
// TODO: Shared variables
|
||||||
|
|
||||||
|
// Bank conflicts
|
||||||
|
#define AVOID_BANK_CONFLICTS
|
||||||
|
#ifdef AVOID_BANK_CONFLICTS
|
||||||
|
// TODO: define your conflict-free macro here
|
||||||
|
#else
|
||||||
|
#define OFFSET(A) (A)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
// TODO: Kernel implementation
|
||||||
|
|
||||||
|
// Cache first half of elements in the local memory
|
||||||
|
// Cache second half of elements
|
||||||
|
|
||||||
|
// Perform up-sweep
|
||||||
|
|
||||||
|
// Unroll the last steps when arrived at warp size
|
||||||
|
// Set the last element to 0
|
||||||
|
|
||||||
|
|
||||||
|
// Perform down-sweep
|
||||||
|
}
|
||||||
25
shaders/A2Task2KernelDecompositionOffset.comp
Normal file
25
shaders/A2Task2KernelDecompositionOffset.comp
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
layout (constant_id = 1) const uint SAMPLE_MULTIPLIER = 1;
|
||||||
|
|
||||||
|
// Push constant
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inoutBufer { uint v[]; };
|
||||||
|
layout(binding = 1) buffer offsetBufer { uint g_v[]; };
|
||||||
|
|
||||||
|
// TODO: Shared variables
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// TODO: Shared variables
|
||||||
|
}
|
||||||
23
shaders/A2Task2Naive.comp
Normal file
23
shaders/A2Task2Naive.comp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
/* built in:
|
||||||
|
in uvec3 gl_NumWorkGroups;
|
||||||
|
in uvec3 gl_WorkGroupID;
|
||||||
|
in uvec3 gl_LocalInvocationID;
|
||||||
|
in uvec3 gl_GlobalInvocationID;
|
||||||
|
in uint gl_LocalInvocationIndex;
|
||||||
|
*/
|
||||||
|
|
||||||
|
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushStruct {
|
||||||
|
uint size;
|
||||||
|
uint offset;
|
||||||
|
} p;
|
||||||
|
|
||||||
|
layout(binding = 0) buffer inBuffer { uint v[]; };
|
||||||
|
layout(binding = 1) buffer outBufer { uint g_v[]; };
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// TODO: Kernel implementation
|
||||||
|
}
|
||||||
41
src/A2Task1.cpp
Normal file
41
src/A2Task1.cpp
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#include "A2Task1.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "task_common.h"
|
||||||
|
#include "host_timer.h"
|
||||||
|
|
||||||
|
|
||||||
|
A2Task1::A2Task1(uint problemSize) : input(problemSize, 0) {
|
||||||
|
for (auto i = 0; i < problemSize; i++)
|
||||||
|
input[i] = i % 97;
|
||||||
|
computeReference();
|
||||||
|
}
|
||||||
|
|
||||||
|
A2Task1::A2Task1(std::vector<uint> input) : input(input) {
|
||||||
|
computeReference();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool A2Task1::evaluateSolution(A2Task1Solution& solution) {
|
||||||
|
solution.prepare(input);
|
||||||
|
solution.compute();
|
||||||
|
auto result = solution.result();
|
||||||
|
if (reference != result) {
|
||||||
|
std::cout << "error: expected " << reference << ", but got " << result << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task1::computeReference() {
|
||||||
|
reference = 0;
|
||||||
|
for (auto e : input)
|
||||||
|
reference += e;
|
||||||
|
}
|
||||||
85
src/A2Task1Solution/Interleaved.cpp
Normal file
85
src/A2Task1Solution/Interleaved.cpp
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
#include "Interleaved.h"
|
||||||
|
|
||||||
|
#include "host_timer.h"
|
||||||
|
|
||||||
|
A2Task1SolutionInterleaved::A2Task1SolutionInterleaved(AppResources &app, uint workGroupSize) :
|
||||||
|
app(app), workGroupSize(workGroupSize) {}
|
||||||
|
|
||||||
|
void A2Task1SolutionInterleaved::prepare(const std::vector<uint> &input)
|
||||||
|
{
|
||||||
|
mpInput = &input;
|
||||||
|
|
||||||
|
Cmn::addStorage(bindings, 0);
|
||||||
|
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||||
|
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant));
|
||||||
|
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||||
|
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||||
|
|
||||||
|
// Specialization constant for workgroup size
|
||||||
|
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||||
|
{{0U, 0U, sizeof(workGroupSize)}},
|
||||||
|
};
|
||||||
|
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||||
|
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||||
|
CAST(specValues) * sizeof(int), specValues.data());
|
||||||
|
|
||||||
|
Cmn::createShader(app.device, shaderModule, workingDir +"build/shaders/A2Task1Interleaved.comp.spv");
|
||||||
|
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, shaderModule);
|
||||||
|
|
||||||
|
createBuffer(app.pDevice, app.device, mpInput->size() * sizeof((*mpInput)[0]),
|
||||||
|
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eStorageBuffer,
|
||||||
|
vk::MemoryPropertyFlagBits::eDeviceLocal, "inoutBuffer", inoutBuffer);
|
||||||
|
|
||||||
|
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, input);
|
||||||
|
|
||||||
|
Cmn::createDescriptorPool(app.device, bindings, descriptorPool);
|
||||||
|
Cmn::allocateDescriptorSet(app.device, descriptorSet, descriptorPool, descriptorSetLayout);
|
||||||
|
Cmn::bindBuffers(app.device, inoutBuffer.buf, descriptorSet, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task1SolutionInterleaved::compute()
|
||||||
|
{
|
||||||
|
vk::CommandBufferAllocateInfo allocInfo(
|
||||||
|
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||||
|
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||||
|
|
||||||
|
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||||
|
|
||||||
|
cb.begin(beginInfo);
|
||||||
|
|
||||||
|
// TODO: Implement reduction with interleaved addressing
|
||||||
|
|
||||||
|
cb.end();
|
||||||
|
|
||||||
|
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||||
|
|
||||||
|
HostTimer timer;
|
||||||
|
|
||||||
|
app.computeQueue.submit({submitInfo});
|
||||||
|
app.device.waitIdle();
|
||||||
|
|
||||||
|
mstime = timer.elapsed() * 1000;
|
||||||
|
|
||||||
|
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint A2Task1SolutionInterleaved::result() const
|
||||||
|
{
|
||||||
|
std::vector<uint> result(1, 0);
|
||||||
|
fillHostWithStagingBuffer<uint>(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, result);
|
||||||
|
return result[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task1SolutionInterleaved::cleanup()
|
||||||
|
{
|
||||||
|
app.device.destroyDescriptorPool(descriptorPool);
|
||||||
|
|
||||||
|
app.device.destroyPipeline(pipeline);
|
||||||
|
app.device.destroyShaderModule(shaderModule);
|
||||||
|
|
||||||
|
app.device.destroyPipelineLayout(pipelineLayout);
|
||||||
|
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||||
|
bindings.clear();
|
||||||
|
|
||||||
|
destroyBuffer(app.device, inoutBuffer);
|
||||||
|
}
|
||||||
42
src/A2Task1Solution/Interleaved.h
Normal file
42
src/A2Task1Solution/Interleaved.h
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "A2Task1.h"
|
||||||
|
|
||||||
|
class A2Task1SolutionInterleaved : public A2Task1Solution{
|
||||||
|
public:
|
||||||
|
A2Task1SolutionInterleaved(AppResources &app, uint workGroupSize);
|
||||||
|
|
||||||
|
void prepare(const std::vector<uint> &input) override;
|
||||||
|
void compute() override;
|
||||||
|
uint result() const override;
|
||||||
|
void cleanup() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct PushConstant
|
||||||
|
{
|
||||||
|
uint size;
|
||||||
|
uint stride;
|
||||||
|
};
|
||||||
|
|
||||||
|
AppResources &app;
|
||||||
|
uint workGroupSize;
|
||||||
|
|
||||||
|
const std::vector<uint>* mpInput;
|
||||||
|
|
||||||
|
Buffer inoutBuffer;
|
||||||
|
|
||||||
|
// Descriptor & Pipeline Layout
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||||
|
vk::DescriptorSetLayout descriptorSetLayout;
|
||||||
|
vk::PipelineLayout pipelineLayout;
|
||||||
|
|
||||||
|
// Local PPS Pipeline
|
||||||
|
vk::ShaderModule shaderModule;
|
||||||
|
vk::Pipeline pipeline;
|
||||||
|
|
||||||
|
// Descriptor Pool
|
||||||
|
vk::DescriptorPool descriptorPool;
|
||||||
|
|
||||||
|
// Per-dispatch data
|
||||||
|
vk::DescriptorSet descriptorSet;
|
||||||
|
};
|
||||||
97
src/A2Task1Solution/KernelDecomposition.cpp
Normal file
97
src/A2Task1Solution/KernelDecomposition.cpp
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
#include "KernelDecomposition.h"
|
||||||
|
|
||||||
|
#include "host_timer.h"
|
||||||
|
|
||||||
|
A2Task1SolutionKernelDecomposition::A2Task1SolutionKernelDecomposition(AppResources &app, uint workGroupSize, std::string shaderFileName) :
|
||||||
|
app(app), workGroupSize(workGroupSize), shaderFileName(shaderFileName) {}
|
||||||
|
|
||||||
|
void A2Task1SolutionKernelDecomposition::prepare(const std::vector<uint> &input)
|
||||||
|
{
|
||||||
|
mpInput = &input;
|
||||||
|
|
||||||
|
Cmn::addStorage(bindings, 0);
|
||||||
|
Cmn::addStorage(bindings, 1);
|
||||||
|
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||||
|
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant));
|
||||||
|
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||||
|
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||||
|
|
||||||
|
// Specialization constant for workgroup size
|
||||||
|
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||||
|
{{0U, 0U, sizeof(workGroupSize)}},
|
||||||
|
};
|
||||||
|
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||||
|
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||||
|
CAST(specValues) * sizeof(int), specValues.data());
|
||||||
|
|
||||||
|
Cmn::createShader(app.device, shaderModule, shaderFileName);
|
||||||
|
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, shaderModule);
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
createBuffer(app.pDevice, app.device, mpInput->size() * sizeof((*mpInput)[0]),
|
||||||
|
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eStorageBuffer,
|
||||||
|
vk::MemoryPropertyFlagBits::eDeviceLocal, "buffer_" + std::to_string(i), buffers[i].buf, buffers[i].mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[0], input);
|
||||||
|
|
||||||
|
Cmn::createDescriptorPool(app.device, bindings, descriptorPool, 2);
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
Cmn::allocateDescriptorSet(app.device, descriptorSets[i], descriptorPool, descriptorSetLayout);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[0], 0);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[0], 1);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[1], 0);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[1], 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task1SolutionKernelDecomposition::compute()
|
||||||
|
{
|
||||||
|
vk::CommandBufferAllocateInfo allocInfo(
|
||||||
|
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||||
|
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||||
|
|
||||||
|
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||||
|
|
||||||
|
cb.begin(beginInfo);
|
||||||
|
|
||||||
|
// TODO: Implement reduction with kernel decomposition
|
||||||
|
// NOTE: make sure that activeBuffer points to the buffer with the final result in the end
|
||||||
|
// That buffer is read back for the correctness check
|
||||||
|
// (A2Task1SolutionKernelDecomposition::result())
|
||||||
|
// HINT: You can alternate between the two provided descriptor sets to implement ping-pong
|
||||||
|
|
||||||
|
cb.end();
|
||||||
|
|
||||||
|
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||||
|
|
||||||
|
HostTimer timer;
|
||||||
|
|
||||||
|
app.computeQueue.submit({submitInfo});
|
||||||
|
app.device.waitIdle();
|
||||||
|
|
||||||
|
mstime = timer.elapsed() * 1000;
|
||||||
|
|
||||||
|
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint A2Task1SolutionKernelDecomposition::result() const
|
||||||
|
{
|
||||||
|
std::vector<uint> result(1, 0);
|
||||||
|
fillHostWithStagingBuffer<uint>(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[activeBuffer], result);
|
||||||
|
return result[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task1SolutionKernelDecomposition::cleanup()
|
||||||
|
{
|
||||||
|
app.device.destroyDescriptorPool(descriptorPool);
|
||||||
|
|
||||||
|
app.device.destroyPipeline(pipeline);
|
||||||
|
app.device.destroyShaderModule(shaderModule);
|
||||||
|
|
||||||
|
app.device.destroyPipelineLayout(pipelineLayout);
|
||||||
|
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||||
|
bindings.clear();
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
destroyBuffer(app.device, buffers[i]);
|
||||||
|
}
|
||||||
44
src/A2Task1Solution/KernelDecomposition.h
Normal file
44
src/A2Task1Solution/KernelDecomposition.h
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "A2Task1.h"
|
||||||
|
|
||||||
|
class A2Task1SolutionKernelDecomposition : public A2Task1Solution{
|
||||||
|
public:
|
||||||
|
A2Task1SolutionKernelDecomposition(AppResources &app, uint workGroupSize, std::string shaderFileName);
|
||||||
|
|
||||||
|
void prepare(const std::vector<uint> &input) override;
|
||||||
|
void compute() override;
|
||||||
|
uint result() const override;
|
||||||
|
void cleanup() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct PushConstant
|
||||||
|
{
|
||||||
|
uint size;
|
||||||
|
};
|
||||||
|
|
||||||
|
AppResources &app;
|
||||||
|
uint workGroupSize;
|
||||||
|
std::string shaderFileName;
|
||||||
|
|
||||||
|
const std::vector<uint>* mpInput;
|
||||||
|
|
||||||
|
Buffer buffers[2];
|
||||||
|
|
||||||
|
// Descriptor & Pipeline Layout
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||||
|
vk::DescriptorSetLayout descriptorSetLayout;
|
||||||
|
vk::PipelineLayout pipelineLayout;
|
||||||
|
|
||||||
|
// Local PPS Pipeline
|
||||||
|
vk::ShaderModule shaderModule;
|
||||||
|
vk::Pipeline pipeline;
|
||||||
|
|
||||||
|
// Descriptor Pool
|
||||||
|
vk::DescriptorPool descriptorPool;
|
||||||
|
|
||||||
|
// Per-dispatch data
|
||||||
|
vk::DescriptorSet descriptorSets[2];
|
||||||
|
|
||||||
|
uint activeBuffer = 0;
|
||||||
|
};
|
||||||
90
src/A2Task1Solution/Sequential.cpp
Normal file
90
src/A2Task1Solution/Sequential.cpp
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
#include "Sequential.h"
|
||||||
|
|
||||||
|
#include "host_timer.h"
|
||||||
|
|
||||||
|
A2Task1SolutionSequential::A2Task1SolutionSequential(AppResources &app, uint workGroupSize) :
|
||||||
|
app(app), workGroupSize(workGroupSize) {}
|
||||||
|
|
||||||
|
void A2Task1SolutionSequential::prepare(const std::vector<uint> &input)
|
||||||
|
{
|
||||||
|
mpInput = &input;
|
||||||
|
|
||||||
|
Cmn::addStorage(bindings, 0);
|
||||||
|
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||||
|
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushConstant));
|
||||||
|
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||||
|
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||||
|
|
||||||
|
// Specialization constant for workgroup size
|
||||||
|
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||||
|
{{0U, 0U, sizeof(workGroupSize)}},
|
||||||
|
};
|
||||||
|
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||||
|
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||||
|
CAST(specValues) * sizeof(int), specValues.data());
|
||||||
|
|
||||||
|
Cmn::createShader(app.device, shaderModule, workingDir +"build/shaders/A2Task1Sequential.comp.spv");
|
||||||
|
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, shaderModule);
|
||||||
|
|
||||||
|
createBuffer(app.pDevice, app.device, mpInput->size() * sizeof((*mpInput)[0]),
|
||||||
|
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eStorageBuffer,
|
||||||
|
vk::MemoryPropertyFlagBits::eDeviceLocal, "inoutBuffer", inoutBuffer.buf, inoutBuffer.mem);
|
||||||
|
|
||||||
|
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, input);
|
||||||
|
|
||||||
|
Cmn::createDescriptorPool(app.device, bindings, descriptorPool);
|
||||||
|
Cmn::allocateDescriptorSet(app.device, descriptorSet, descriptorPool, descriptorSetLayout);
|
||||||
|
Cmn::bindBuffers(app.device, inoutBuffer.buf, descriptorSet, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task1SolutionSequential::compute()
|
||||||
|
{
|
||||||
|
vk::CommandBufferAllocateInfo allocInfo(
|
||||||
|
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||||
|
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||||
|
|
||||||
|
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||||
|
|
||||||
|
cb.begin(beginInfo);
|
||||||
|
|
||||||
|
// TODO: Implement reduction with sequential addressing
|
||||||
|
|
||||||
|
cb.end();
|
||||||
|
|
||||||
|
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||||
|
|
||||||
|
HostTimer timer;
|
||||||
|
|
||||||
|
app.computeQueue.submit({submitInfo});
|
||||||
|
app.device.waitIdle();
|
||||||
|
|
||||||
|
mstime = timer.elapsed() * 1000;
|
||||||
|
|
||||||
|
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint A2Task1SolutionSequential::result() const
|
||||||
|
{
|
||||||
|
std::vector<uint> result(1, 0);
|
||||||
|
fillHostWithStagingBuffer<uint>(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffer, result);
|
||||||
|
return result[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task1SolutionSequential::cleanup()
|
||||||
|
{
|
||||||
|
app.device.destroyDescriptorPool(descriptorPool);
|
||||||
|
|
||||||
|
app.device.destroyPipeline(pipeline);
|
||||||
|
app.device.destroyShaderModule(shaderModule);
|
||||||
|
|
||||||
|
app.device.destroyPipelineLayout(pipelineLayout);
|
||||||
|
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||||
|
bindings.clear();
|
||||||
|
|
||||||
|
auto Bclean = [&](Buffer &b){
|
||||||
|
app.device.destroyBuffer(b.buf);
|
||||||
|
app.device.freeMemory(b.mem);
|
||||||
|
};
|
||||||
|
|
||||||
|
Bclean(inoutBuffer);
|
||||||
|
}
|
||||||
42
src/A2Task1Solution/Sequential.h
Normal file
42
src/A2Task1Solution/Sequential.h
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "A2Task1.h"
|
||||||
|
|
||||||
|
class A2Task1SolutionSequential : public A2Task1Solution{
|
||||||
|
public:
|
||||||
|
A2Task1SolutionSequential(AppResources &app, uint workGroupSize);
|
||||||
|
|
||||||
|
void prepare(const std::vector<uint> &input) override;
|
||||||
|
void compute() override;
|
||||||
|
uint result() const override;
|
||||||
|
void cleanup() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct PushConstant
|
||||||
|
{
|
||||||
|
uint size;
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
AppResources &app;
|
||||||
|
uint workGroupSize;
|
||||||
|
|
||||||
|
const std::vector<uint>* mpInput;
|
||||||
|
|
||||||
|
Buffer inoutBuffer;
|
||||||
|
|
||||||
|
// Descriptor & Pipeline Layout
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||||
|
vk::DescriptorSetLayout descriptorSetLayout;
|
||||||
|
vk::PipelineLayout pipelineLayout;
|
||||||
|
|
||||||
|
// Local PPS Pipeline
|
||||||
|
vk::ShaderModule shaderModule;
|
||||||
|
vk::Pipeline pipeline;
|
||||||
|
|
||||||
|
// Descriptor Pool
|
||||||
|
vk::DescriptorPool descriptorPool;
|
||||||
|
|
||||||
|
// Per-dispatch data
|
||||||
|
vk::DescriptorSet descriptorSet;
|
||||||
|
};
|
||||||
42
src/A2Task2.cpp
Normal file
42
src/A2Task2.cpp
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
#include "A2Task2.h"
|
||||||
|
|
||||||
|
A2Task2::A2Task2(uint problemSize) : input(problemSize, 0) {
|
||||||
|
for (auto i = 0; i < problemSize; i++)
|
||||||
|
input[i] = i % 97;
|
||||||
|
computeReference();
|
||||||
|
}
|
||||||
|
|
||||||
|
A2Task2::A2Task2(std::vector<uint> input) : input(input) {
|
||||||
|
computeReference();
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task2::computeReference() {
|
||||||
|
reference.reserve(input.size());
|
||||||
|
uint acc = 0;
|
||||||
|
for (auto i = 0; i < input.size(); i++) {
|
||||||
|
acc += input[i];
|
||||||
|
reference.push_back(acc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool A2Task2::evaluateSolution(A2Task2Solution& solution) {
|
||||||
|
solution.prepare(input);
|
||||||
|
solution.compute();
|
||||||
|
auto result = solution.result();
|
||||||
|
|
||||||
|
if (result.size() != reference.size()) {
|
||||||
|
std::cout << "error: result and reference vector size don't match!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint i = 0; i < reference.size(); i++) {
|
||||||
|
if (result[i] != reference[i]) {
|
||||||
|
std::cout << "error: result and reference don't match at index " << i << "!" << std::endl;
|
||||||
|
std::cout << "\tresult: " << result[i] << std::endl;
|
||||||
|
std::cout << "\treference: " << reference[i] << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
114
src/A2Task2Solution/KernelDecomposition.cpp
Normal file
114
src/A2Task2Solution/KernelDecomposition.cpp
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
#include "KernelDecomposition.h"
|
||||||
|
|
||||||
|
#include "host_timer.h"
|
||||||
|
|
||||||
|
A2Task2SolutionKernelDecomposition::A2Task2SolutionKernelDecomposition(AppResources& app, uint workGroupSize): app(app),
|
||||||
|
workGroupSize(workGroupSize) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task2SolutionKernelDecomposition::prepare(const std::vector<uint>& input) {
|
||||||
|
workSize = input.size();
|
||||||
|
|
||||||
|
// Descriptor & Pipeline Layout
|
||||||
|
Cmn::addStorage(bindings, 0);
|
||||||
|
Cmn::addStorage(bindings, 1);
|
||||||
|
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||||
|
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushStruct));
|
||||||
|
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||||
|
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||||
|
|
||||||
|
// Specialization constant for workgroup size
|
||||||
|
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||||
|
{{0U, 0U, sizeof(workGroupSize)}},
|
||||||
|
};
|
||||||
|
std::array<uint32_t, 1> specValues = {workGroupSize}; //for workgroup sizes
|
||||||
|
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||||
|
CAST(specValues) * sizeof(int), specValues.data());
|
||||||
|
|
||||||
|
// Local PPS Pipeline
|
||||||
|
Cmn::createShader(app.device, cShaderLocalPPS, workingDir + "build/shaders/A2Task2KernelDecomposition.comp.spv");
|
||||||
|
Cmn::createPipeline(app.device, pipelineLocalPPS, pipelineLayout, specInfo, cShaderLocalPPS);
|
||||||
|
|
||||||
|
// Local PPS Offset Pipeline
|
||||||
|
Cmn::createShader(app.device, cShaderLocalPPSOffset,
|
||||||
|
workingDir + "build/shaders/A2Task2KernelDecompositionOffset.comp.spv");
|
||||||
|
Cmn::createPipeline(app.device, pipelineLocalPPSOffset, pipelineLayout, specInfo, cShaderLocalPPSOffset);
|
||||||
|
|
||||||
|
// ### create buffers, get their index in the task.buffers[] array ###
|
||||||
|
using BFlag = vk::BufferUsageFlagBits;
|
||||||
|
auto makeDLocalBuffer = [ this ](vk::BufferUsageFlags usage, vk::DeviceSize size, std::string name) -> Buffer {
|
||||||
|
Buffer b;
|
||||||
|
createBuffer(app.pDevice, app.device, size, usage, vk::MemoryPropertyFlagBits::eDeviceLocal, name, b.buf,
|
||||||
|
b.mem);
|
||||||
|
return b;
|
||||||
|
};
|
||||||
|
|
||||||
|
inoutBuffers.push_back(makeDLocalBuffer(BFlag::eTransferDst | BFlag::eTransferSrc | BFlag::eStorageBuffer,
|
||||||
|
input.size() * sizeof(uint32_t), "buffer_inout_0"));
|
||||||
|
|
||||||
|
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffers[0],
|
||||||
|
input);
|
||||||
|
|
||||||
|
// TO DO create additional buffers (by pushing into inoutBuffers) and descriptors (by pushing into descriptorSets)
|
||||||
|
// You need to create an appropriately-sized DescriptorPool first
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task2SolutionKernelDecomposition::compute() {
|
||||||
|
vk::CommandBufferAllocateInfo allocInfo(
|
||||||
|
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||||
|
vk::CommandBuffer cb = app.device.allocateCommandBuffers(allocInfo)[0];
|
||||||
|
|
||||||
|
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||||
|
|
||||||
|
cb.begin(beginInfo);
|
||||||
|
|
||||||
|
// TODO: Implement efficient version of scan
|
||||||
|
// Make sure that the local prefix sum works before you start experimenting with large arrays
|
||||||
|
|
||||||
|
cb.end();
|
||||||
|
|
||||||
|
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||||
|
|
||||||
|
HostTimer timer;
|
||||||
|
|
||||||
|
app.computeQueue.submit({submitInfo});
|
||||||
|
app.device.waitIdle();
|
||||||
|
|
||||||
|
mstime = timer.elapsed() * 1000;
|
||||||
|
|
||||||
|
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint> A2Task2SolutionKernelDecomposition::result() const {
|
||||||
|
std::vector<uint> result(workSize, 0);
|
||||||
|
fillHostWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, inoutBuffers[0],
|
||||||
|
result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void A2Task2SolutionKernelDecomposition::cleanup() {
|
||||||
|
|
||||||
|
app.device.destroyDescriptorPool(descriptorPool);
|
||||||
|
|
||||||
|
app.device.destroyPipeline(pipelineLocalPPSOffset);
|
||||||
|
app.device.destroyShaderModule(cShaderLocalPPSOffset);
|
||||||
|
|
||||||
|
app.device.destroyPipeline(pipelineLocalPPS);
|
||||||
|
app.device.destroyShaderModule(cShaderLocalPPS);
|
||||||
|
|
||||||
|
app.device.destroyPipelineLayout(pipelineLayout);
|
||||||
|
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||||
|
bindings.clear();
|
||||||
|
|
||||||
|
auto Bclean = [&](Buffer& b) {
|
||||||
|
app.device.destroyBuffer(b.buf);
|
||||||
|
app.device.freeMemory(b.mem);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto inoutBuffer: inoutBuffers) {
|
||||||
|
Bclean(inoutBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
inoutBuffers.clear();
|
||||||
|
}
|
||||||
55
src/A2Task2Solution/KernelDecomposition.h
Normal file
55
src/A2Task2Solution/KernelDecomposition.h
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "task_common.h"
|
||||||
|
|
||||||
|
#include "A2Task2.h"
|
||||||
|
|
||||||
|
struct A2Task2SolutionKernelDecomposition : A2Task2Solution {
|
||||||
|
public:
|
||||||
|
A2Task2SolutionKernelDecomposition(AppResources &app, uint workGroupSize);
|
||||||
|
|
||||||
|
void prepare(const std::vector<uint> &input) override;
|
||||||
|
void compute() override;
|
||||||
|
std::vector<uint> result() const override;
|
||||||
|
void cleanup() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct PushStruct
|
||||||
|
{
|
||||||
|
uint32_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
AppResources &app;
|
||||||
|
uint workGroupSize;
|
||||||
|
std::string localPPSShaderFileName;
|
||||||
|
|
||||||
|
uint workSize;
|
||||||
|
|
||||||
|
std::vector<Buffer> inoutBuffers;
|
||||||
|
|
||||||
|
// Descriptor & Pipeline Layout
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||||
|
vk::DescriptorSetLayout descriptorSetLayout;
|
||||||
|
vk::PipelineLayout pipelineLayout;
|
||||||
|
|
||||||
|
// Local PPS Pipeline
|
||||||
|
vk::ShaderModule cShaderLocalPPS;
|
||||||
|
vk::Pipeline pipelineLocalPPS;
|
||||||
|
|
||||||
|
// Local PPS Offset Pipeline
|
||||||
|
vk::ShaderModule cShaderLocalPPSOffset;
|
||||||
|
vk::Pipeline pipelineLocalPPSOffset;
|
||||||
|
|
||||||
|
// Descriptor Pool
|
||||||
|
vk::DescriptorPool descriptorPool;
|
||||||
|
|
||||||
|
// TODO extend with any additional members you may need
|
||||||
|
};
|
||||||
|
|
||||||
100
src/A2Task2Solution/Naive.cpp
Normal file
100
src/A2Task2Solution/Naive.cpp
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
#include "Naive.h"
|
||||||
|
|
||||||
|
#include "host_timer.h"
|
||||||
|
|
||||||
|
A2Task2SolutioNaive::A2Task2SolutioNaive(
|
||||||
|
AppResources &app, uint workGroupSize):
|
||||||
|
app(app), workGroupSize(workGroupSize) {}
|
||||||
|
|
||||||
|
void A2Task2SolutioNaive::prepare(const std::vector<uint> &input) {
|
||||||
|
workSize = input.size();
|
||||||
|
|
||||||
|
// Descriptor & Pipeline Layout
|
||||||
|
Cmn::addStorage(bindings, 0);
|
||||||
|
Cmn::addStorage(bindings, 1);
|
||||||
|
Cmn::createDescriptorSetLayout(app.device, bindings, descriptorSetLayout);
|
||||||
|
vk::PushConstantRange pcr(vk::ShaderStageFlagBits::eCompute, 0, sizeof(PushStruct));
|
||||||
|
vk::PipelineLayoutCreateInfo pipInfo(vk::PipelineLayoutCreateFlags(), 1U, &descriptorSetLayout, 1U, &pcr);
|
||||||
|
pipelineLayout = app.device.createPipelineLayout(pipInfo);
|
||||||
|
|
||||||
|
// Specialization constant for workgroup size
|
||||||
|
std::array<vk::SpecializationMapEntry, 1> specEntries = std::array<vk::SpecializationMapEntry, 1>{
|
||||||
|
{{0U, 0U, sizeof(workGroupSize)}},
|
||||||
|
};
|
||||||
|
std::array<uint32_t, 2> specValues = {workGroupSize}; //for workgroup sizes
|
||||||
|
vk::SpecializationInfo specInfo = vk::SpecializationInfo(CAST(specEntries), specEntries.data(),
|
||||||
|
CAST(specValues) * sizeof(int), specValues.data());
|
||||||
|
|
||||||
|
// Local PPS Offset Pipeline
|
||||||
|
Cmn::createShader(app.device, cShader, workingDir +"build/shaders/A2Task2Naive.comp.spv");
|
||||||
|
Cmn::createPipeline(app.device, pipeline, pipelineLayout, specInfo, cShader);
|
||||||
|
|
||||||
|
// ### create buffers, get their index in the task.buffers[] array ###
|
||||||
|
using BFlag = vk::BufferUsageFlagBits;
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
createBuffer(app.pDevice, app.device, input.size() * sizeof(uint32_t), BFlag::eTransferDst | BFlag::eTransferSrc | BFlag::eStorageBuffer, vk::MemoryPropertyFlagBits::eDeviceLocal, "buffer_" + std::to_string(i), buffers[i]);
|
||||||
|
|
||||||
|
fillDeviceWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[0], input);
|
||||||
|
|
||||||
|
Cmn::createDescriptorPool(app.device, bindings, descriptorPool, 2);
|
||||||
|
|
||||||
|
for (uint i = 0; i < 2; i++)
|
||||||
|
Cmn::allocateDescriptorSet(app.device, descriptorSets[i], descriptorPool, descriptorSetLayout);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[0], 0);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[0], 1);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[1].buf, descriptorSets[1], 0);
|
||||||
|
Cmn::bindBuffers(app.device, buffers[0].buf, descriptorSets[1], 1);
|
||||||
|
|
||||||
|
activeBuffer = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task2SolutioNaive::compute() {
|
||||||
|
vk::CommandBufferAllocateInfo allocInfo(
|
||||||
|
app.computeCommandPool, vk::CommandBufferLevel::ePrimary, 1U);
|
||||||
|
vk::CommandBuffer cb = app.device.allocateCommandBuffers( allocInfo )[0];
|
||||||
|
|
||||||
|
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||||
|
|
||||||
|
cb.begin(beginInfo);
|
||||||
|
|
||||||
|
cb.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline);
|
||||||
|
|
||||||
|
// TODO: Implement naive scan
|
||||||
|
// NOTE: make sure that activeBuffer points to the buffer with the final result in the end
|
||||||
|
// That buffer is read back for the correctness check
|
||||||
|
// (A2Task2SolutionNaive::result())
|
||||||
|
// HINT: You can alternate between the two provided descriptor sets to implement ping-pong
|
||||||
|
|
||||||
|
cb.end();
|
||||||
|
|
||||||
|
vk::SubmitInfo submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cb);
|
||||||
|
|
||||||
|
HostTimer timer;
|
||||||
|
|
||||||
|
app.computeQueue.submit({submitInfo});
|
||||||
|
app.device.waitIdle();
|
||||||
|
|
||||||
|
mstime = timer.elapsed() * 1000;
|
||||||
|
|
||||||
|
app.device.freeCommandBuffers(app.computeCommandPool, 1U, &cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint> A2Task2SolutioNaive::result() const {
|
||||||
|
std::vector<uint> result(workSize, 0);
|
||||||
|
fillHostWithStagingBuffer(app.pDevice, app.device, app.transferCommandPool, app.transferQueue, buffers[activeBuffer], result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void A2Task2SolutioNaive::cleanup() {
|
||||||
|
app.device.destroyDescriptorPool(descriptorPool);
|
||||||
|
|
||||||
|
app.device.destroyPipeline(pipeline);
|
||||||
|
app.device.destroyShaderModule(cShader);
|
||||||
|
|
||||||
|
app.device.destroyPipelineLayout(pipelineLayout);
|
||||||
|
app.device.destroyDescriptorSetLayout(descriptorSetLayout);
|
||||||
|
bindings.clear();
|
||||||
|
|
||||||
|
for (auto buffer : buffers)
|
||||||
|
destroyBuffer(app.device, buffer);
|
||||||
|
}
|
||||||
53
src/A2Task2Solution/Naive.h
Normal file
53
src/A2Task2Solution/Naive.h
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "task_common.h"
|
||||||
|
|
||||||
|
#include "A2Task2.h"
|
||||||
|
|
||||||
|
struct A2Task2SolutioNaive : A2Task2Solution {
|
||||||
|
public:
|
||||||
|
A2Task2SolutioNaive(AppResources &app, uint workGroupSize);
|
||||||
|
|
||||||
|
void prepare(const std::vector<uint> &input) override;
|
||||||
|
void compute() override;
|
||||||
|
std::vector<uint> result() const override;
|
||||||
|
void cleanup() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct PushStruct
|
||||||
|
{
|
||||||
|
uint size;
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
AppResources &app;
|
||||||
|
uint workGroupSize;
|
||||||
|
|
||||||
|
uint workSize;
|
||||||
|
|
||||||
|
Buffer buffers[2];
|
||||||
|
|
||||||
|
// Descriptor & Pipeline Layout
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||||
|
vk::DescriptorSetLayout descriptorSetLayout;
|
||||||
|
vk::PipelineLayout pipelineLayout;
|
||||||
|
|
||||||
|
vk::ShaderModule cShader;
|
||||||
|
vk::Pipeline pipeline;
|
||||||
|
|
||||||
|
// Descriptor Pool
|
||||||
|
vk::DescriptorPool descriptorPool;
|
||||||
|
|
||||||
|
// Descriptors
|
||||||
|
vk::DescriptorSet descriptorSets[2];
|
||||||
|
|
||||||
|
uint activeBuffer = 0;
|
||||||
|
};
|
||||||
|
|
||||||
15
src/host_timer.cpp
Normal file
15
src/host_timer.cpp
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#include "host_timer.h"
|
||||||
|
|
||||||
|
HostTimer::HostTimer() {
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostTimer::reset() {
|
||||||
|
start = clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
double HostTimer::elapsed() const {
|
||||||
|
auto end = clock::now();
|
||||||
|
std::chrono::duration<double> duration = end - start;
|
||||||
|
return duration.count();
|
||||||
|
}
|
||||||
518
src/initialization.cpp
Normal file
518
src/initialization.cpp
Normal file
@@ -0,0 +1,518 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cstring>
|
||||||
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
#define VK_ENABLE_BETA_EXTENSIONS
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
||||||
|
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
// Here you create the instance and physical / logical device and maybe compute/transfer queues
|
||||||
|
// Also check if device is suitable etc
|
||||||
|
|
||||||
|
struct DeviceSelectionCache {
|
||||||
|
uint32_t vendorID;
|
||||||
|
uint32_t deviceID;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef NDEBUG
|
||||||
|
const bool enableValidationLayers = false;
|
||||||
|
#else
|
||||||
|
const bool enableValidationLayers = true;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const std::vector<const char*> validationLayers = {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
"VK_LAYER_KHRONOS_validation"
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
const std::vector<const char*> instanceExtensions = {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
VK_EXT_DEBUG_UTILS_EXTENSION_NAME,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<const char*> extensionNames = {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
void AppResources::destroy()
|
||||||
|
{
|
||||||
|
this->device.destroyQueryPool(this->queryPool);
|
||||||
|
//this->device.freeCommandBuffers(this->computeCommandPool, 1U, &this->computeCommandBuffer);
|
||||||
|
//this->device.freeCommandBuffers(this->transferCommandPool, 1U, &this->transferCommandBuffer);
|
||||||
|
this->device.destroyCommandPool(this->computeCommandPool);
|
||||||
|
//this->device.destroyCommandPool(this->transferCommandPool);
|
||||||
|
|
||||||
|
this->device.destroy();
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
this->instance.destroyDebugUtilsMessengerEXT(this->dbgUtilsMgr);
|
||||||
|
#endif
|
||||||
|
this->instance.destroy();
|
||||||
|
}
|
||||||
|
void initApp(AppResources& app)
|
||||||
|
{
|
||||||
|
createInstance(app.instance, app.dbgUtilsMgr, "Assignment1, Task 1", "Idkwhattowrite");
|
||||||
|
|
||||||
|
selectPhysicalDevice(app.instance, app.pDevice);
|
||||||
|
auto chain = app.pDevice.getProperties2<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceSubgroupProperties>();
|
||||||
|
app.pDeviceProperties = chain.get<vk::PhysicalDeviceProperties2>();
|
||||||
|
app.pDeviceSubgroupProperties = chain.get<vk::PhysicalDeviceSubgroupProperties>();
|
||||||
|
std::tie(app.cQ, app.tQ) = getComputeAndTransferQueues(app.pDevice);
|
||||||
|
createLogicalDevice(app.instance, app.pDevice, app.device);
|
||||||
|
|
||||||
|
app.device.getQueue(app.cQ, 0U, &app.computeQueue);
|
||||||
|
app.transferQueue = app.computeQueue;
|
||||||
|
app.tQ = app.cQ;
|
||||||
|
//app.device.getQueue(app.tQ, 0U, &app.transferQueue);
|
||||||
|
//createCommandPool(app.device, app.transferCommandPool, app.tQ);
|
||||||
|
|
||||||
|
createCommandPool(app.device, app.computeCommandPool, app.cQ);
|
||||||
|
app.transferCommandPool = app.computeCommandPool;
|
||||||
|
|
||||||
|
createTimestampQueryPool(app.device, app.queryPool, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//This is the function in which errors will go through to be displayed.
|
||||||
|
|
||||||
|
VKAPI_ATTR VkBool32 VKAPI_CALL
|
||||||
|
debugUtilsMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
||||||
|
VkDebugUtilsMessageTypeFlagsEXT messageTypes,
|
||||||
|
VkDebugUtilsMessengerCallbackDataEXT const* pCallbackData,
|
||||||
|
void* /*pUserData*/)
|
||||||
|
{
|
||||||
|
if (enableValidationLayers)
|
||||||
|
{
|
||||||
|
if (pCallbackData->messageIdNumber == 648835635)
|
||||||
|
{
|
||||||
|
// UNASSIGNED-khronos-Validation-debug-build-warning-message
|
||||||
|
return VK_FALSE;
|
||||||
|
}
|
||||||
|
if (pCallbackData->messageIdNumber == 767975156)
|
||||||
|
{
|
||||||
|
// UNASSIGNED-BestPractices-vkCreateInstance-specialuse-extension
|
||||||
|
return VK_FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cerr << vk::to_string(static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(messageSeverity)) << ": "
|
||||||
|
<< vk::to_string(static_cast<vk::DebugUtilsMessageTypeFlagsEXT>(messageTypes)) << ":\n";
|
||||||
|
std::cerr << "\t"
|
||||||
|
<< "messageIDName = <" << pCallbackData->pMessageIdName << ">\n";
|
||||||
|
std::cerr << "\t"
|
||||||
|
<< "messageIdNumber = " << pCallbackData->messageIdNumber << "\n";
|
||||||
|
std::cerr << "\t"
|
||||||
|
<< "message = <" << pCallbackData->pMessage << ">\n";
|
||||||
|
if (0 < pCallbackData->queueLabelCount)
|
||||||
|
{
|
||||||
|
std::cerr << "\t"
|
||||||
|
<< "Queue Labels:\n";
|
||||||
|
for (uint8_t i = 0; i < pCallbackData->queueLabelCount; i++)
|
||||||
|
{
|
||||||
|
std::cerr << "\t\t"
|
||||||
|
<< "labelName = <" << pCallbackData->pQueueLabels[i].pLabelName << ">\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (0 < pCallbackData->cmdBufLabelCount)
|
||||||
|
{
|
||||||
|
std::cerr << "\t"
|
||||||
|
<< "CommandBuffer Labels:\n";
|
||||||
|
for (uint8_t i = 0; i < pCallbackData->cmdBufLabelCount; i++)
|
||||||
|
{
|
||||||
|
std::cerr << "\t\t"
|
||||||
|
<< "labelName = <" << pCallbackData->pCmdBufLabels[i].pLabelName << ">\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (0 < pCallbackData->objectCount)
|
||||||
|
{
|
||||||
|
std::cerr << "\t"
|
||||||
|
<< "Objects:\n";
|
||||||
|
for (uint8_t i = 0; i < pCallbackData->objectCount; i++)
|
||||||
|
{
|
||||||
|
std::cerr << "\t\t"
|
||||||
|
<< "Object " << i << "\n";
|
||||||
|
std::cerr << "\t\t\t"
|
||||||
|
<< "objectType = "
|
||||||
|
<< vk::to_string(static_cast<vk::ObjectType>(pCallbackData->pObjects[i].objectType)) << "\n";
|
||||||
|
std::cerr << "\t\t\t"
|
||||||
|
<< "objectHandle = " << pCallbackData->pObjects[i].objectHandle << "\n";
|
||||||
|
if (pCallbackData->pObjects[i].pObjectName)
|
||||||
|
{
|
||||||
|
std::cerr << "\t\t\t"
|
||||||
|
<< "objectName = <" << pCallbackData->pObjects[i].pObjectName << ">\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return VK_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
This function fills the structure with flags indicating
|
||||||
|
which error messages should go through
|
||||||
|
*/
|
||||||
|
vk::DebugUtilsMessengerCreateInfoEXT makeDebugUtilsMessengerCreateInfoEXT()
|
||||||
|
{
|
||||||
|
|
||||||
|
using SEVERITY = vk::DebugUtilsMessageSeverityFlagBitsEXT; // for readability
|
||||||
|
using MESSAGE = vk::DebugUtilsMessageTypeFlagBitsEXT;
|
||||||
|
return { {},
|
||||||
|
SEVERITY::eWarning | SEVERITY::eError,
|
||||||
|
MESSAGE::eGeneral | MESSAGE::ePerformance | MESSAGE::eValidation,
|
||||||
|
&debugUtilsMessengerCallback };
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
The dynamic loader allows us to access many extensions
|
||||||
|
Required before creating instance for loading the extension VK_EXT_DEBUG_UTILS_EXTENSION_NAME
|
||||||
|
*/
|
||||||
|
void initDynamicLoader()
|
||||||
|
{
|
||||||
|
#if VK_HEADER_VERSION >= 301
|
||||||
|
using VulkanDynamicLoader = vk::detail::DynamicLoader;
|
||||||
|
#else
|
||||||
|
using VulkanDynamicLoader = vk::DynamicLoader;
|
||||||
|
#endif
|
||||||
|
static VulkanDynamicLoader dl;
|
||||||
|
static PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
|
||||||
|
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void createInstance(vk::Instance& instance, vk::DebugUtilsMessengerEXT& debugUtilsMessenger,
|
||||||
|
std::string appName, std::string engineName)
|
||||||
|
{
|
||||||
|
initDynamicLoader();
|
||||||
|
vk::ApplicationInfo applicationInfo(appName.c_str(), 1, engineName.c_str(), 1, VK_API_VERSION_1_2);
|
||||||
|
|
||||||
|
//Initialize the InstanceCreateInfo
|
||||||
|
vk::InstanceCreateInfo instanceCreateInfo( //flags, pAppInfo, layerCount, layerNames, extcount, extNames
|
||||||
|
{}, &applicationInfo,
|
||||||
|
static_cast<uint32_t>(validationLayers.size()), validationLayers.data(),
|
||||||
|
static_cast<uint32_t>(instanceExtensions.size()), instanceExtensions.data());
|
||||||
|
|
||||||
|
// DebugInfo: use of StructureChain instead of pNext
|
||||||
|
// DebugUtils is used to catch errors from the instance
|
||||||
|
vk::DebugUtilsMessengerCreateInfoEXT debugCreateInfo = makeDebugUtilsMessengerCreateInfoEXT();
|
||||||
|
// The StructureChain fills the pNext member of the struct in a typesafe way
|
||||||
|
// This is only possible with vulkan-hpp, in plain vulkan there is no typechecking
|
||||||
|
vk::StructureChain<vk::InstanceCreateInfo, vk::DebugUtilsMessengerCreateInfoEXT> chain =
|
||||||
|
{ instanceCreateInfo, debugCreateInfo };
|
||||||
|
|
||||||
|
if (!enableValidationLayers) //For Release mode
|
||||||
|
chain.unlink<vk::DebugUtilsMessengerCreateInfoEXT>();
|
||||||
|
|
||||||
|
// Create an Instance
|
||||||
|
instance = vk::createInstance(chain.get<vk::InstanceCreateInfo>());
|
||||||
|
|
||||||
|
// Update the dispatcher to use instance related extensions
|
||||||
|
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance);
|
||||||
|
|
||||||
|
if (enableValidationLayers)
|
||||||
|
debugUtilsMessenger = instance.createDebugUtilsMessengerEXT(makeDebugUtilsMessengerCreateInfoEXT());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::tuple<uint32_t, uint32_t> getComputeAndTransferQueues(vk::PhysicalDevice& pDevice)
|
||||||
|
{
|
||||||
|
uint32_t tq = -1;
|
||||||
|
std::optional<uint32_t> otq;
|
||||||
|
uint32_t cq = -1;
|
||||||
|
std::optional<uint32_t> ocq;
|
||||||
|
|
||||||
|
using Chain = vk::StructureChain<vk::QueueFamilyProperties2, vk::QueueFamilyCheckpointPropertiesNV>;
|
||||||
|
using QFB = vk::QueueFlagBits;
|
||||||
|
#if VK_HEADER_VERSION >= 301
|
||||||
|
using VulkanDispatchLoaderDynamic = vk::detail::DispatchLoaderDynamic;
|
||||||
|
#else
|
||||||
|
using VulkanDispatchLoaderDynamic = vk::DispatchLoaderDynamic;
|
||||||
|
#endif
|
||||||
|
auto queueFamilyProperties2 = pDevice.getQueueFamilyProperties2<Chain, std::allocator<Chain>, VulkanDispatchLoaderDynamic>();
|
||||||
|
|
||||||
|
for (uint32_t j = 0; j < queueFamilyProperties2.size(); j++)
|
||||||
|
{
|
||||||
|
vk::QueueFamilyProperties const& properties =
|
||||||
|
queueFamilyProperties2[static_cast<size_t>(j)].get<vk::QueueFamilyProperties2>().queueFamilyProperties;
|
||||||
|
|
||||||
|
if (properties.queueFlags & QFB::eCompute)
|
||||||
|
{
|
||||||
|
if (!(properties.queueFlags & QFB::eGraphics ||
|
||||||
|
properties.queueFlags & QFB::eProtected))
|
||||||
|
ocq = j; // When a queue supports only compute and not graphics we want to use that
|
||||||
|
cq = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (properties.queueFlags & QFB::eTransfer)
|
||||||
|
{
|
||||||
|
if (!(properties.queueFlags & QFB::eCompute ||
|
||||||
|
properties.queueFlags & QFB::eGraphics ||
|
||||||
|
properties.queueFlags & QFB::eProtected))
|
||||||
|
otq = j; // When a queue supports only transfer, we want to use this one
|
||||||
|
tq = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (otq.has_value())
|
||||||
|
tq = otq.value();
|
||||||
|
if (ocq.has_value())
|
||||||
|
cq = ocq.value();
|
||||||
|
return std::tuple<uint32_t, uint32_t>(cq, tq);
|
||||||
|
}
|
||||||
|
void selectPhysicalDevice(vk::Instance& instance, vk::PhysicalDevice& pDevice)
|
||||||
|
{
|
||||||
|
// Takes the first one
|
||||||
|
std::vector<vk::PhysicalDevice> physDs = instance.enumeratePhysicalDevices();
|
||||||
|
|
||||||
|
const static char* cache_name = "device_selection_cache";
|
||||||
|
const static char* recreation_message = "To select a new device, delete the file \"device_selection_cache\" in your working directory before executing the framework.";
|
||||||
|
|
||||||
|
std::ifstream ifile(cache_name, std::ios::binary);
|
||||||
|
if (ifile.is_open()) {
|
||||||
|
DeviceSelectionCache cache;
|
||||||
|
ifile.read(reinterpret_cast<char*>(&cache), sizeof(cache));
|
||||||
|
ifile.close();
|
||||||
|
for (auto physD : physDs) {
|
||||||
|
auto props = physD.getProperties2().properties;
|
||||||
|
if (props.vendorID == cache.vendorID && props.deviceID == cache.deviceID) {
|
||||||
|
std::cout << "Selecting previously selected device: \"" << props.deviceName << "\"" << std::endl;
|
||||||
|
std::cout << recreation_message << std::endl;
|
||||||
|
pDevice = physD;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "Previously selected device was not found." << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cout << "No previous device selection found." << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Select one of the available devices:" << std::endl;
|
||||||
|
|
||||||
|
for (int i = 0; i < physDs.size(); i++) {
|
||||||
|
auto props = physDs[i].getProperties2().properties;
|
||||||
|
std::cout << i << ")\t" << props.deviceName.data() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t i;
|
||||||
|
while (true) {
|
||||||
|
std::cout << "Enter device number: ";
|
||||||
|
std::cin >> i;
|
||||||
|
if (i < physDs.size()) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto props = physDs[i].getProperties2().properties;
|
||||||
|
DeviceSelectionCache cache;
|
||||||
|
cache.vendorID = props.vendorID;
|
||||||
|
cache.deviceID = props.deviceID;
|
||||||
|
|
||||||
|
std::ofstream ofile(cache_name, std::ios::out | std::ios::binary);
|
||||||
|
ofile.write(reinterpret_cast<const char*>(&cache), sizeof(cache));
|
||||||
|
ofile.close();
|
||||||
|
std::cout << "Selected device: \"" << props.deviceName.data() << "\"" << std::endl
|
||||||
|
<< "This device will be automatically selected in the future." << std::endl
|
||||||
|
<< recreation_message << std::endl;
|
||||||
|
|
||||||
|
pDevice = physDs[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// The logical device holds the queues and will be used in almost every call from now on
|
||||||
|
|
||||||
|
void createLogicalDevice(vk::Instance& instance, vk::PhysicalDevice& pDevice, vk::Device& device)
|
||||||
|
{
|
||||||
|
|
||||||
|
//First get the queues
|
||||||
|
uint32_t cQ, tQ;
|
||||||
|
std::tie(cQ, tQ) = getComputeAndTransferQueues(pDevice);
|
||||||
|
std::vector<vk::DeviceQueueCreateInfo> queuesInfo;
|
||||||
|
// flags, queueFamily, queueCount, queuePriority
|
||||||
|
float prio = 1.f;
|
||||||
|
vk::DeviceQueueCreateInfo computeInfo({}, cQ, 1U, &prio);
|
||||||
|
vk::DeviceQueueCreateInfo transferInfo({}, tQ, 1U, &prio);
|
||||||
|
|
||||||
|
queuesInfo.push_back(computeInfo);
|
||||||
|
//queuesInfo.push_back(transferInfo);
|
||||||
|
// {}, queueCreateInfoCount, pQueueCreateInfos, enabledLayerCount, ppEnabledLayerNames, enabledExtensionCount, ppEnabledExtensionNames, pEnabledFeatures
|
||||||
|
|
||||||
|
std::vector extensionNames_(extensionNames);
|
||||||
|
|
||||||
|
auto deviceExtensionProperties = pDevice.enumerateDeviceExtensionProperties();
|
||||||
|
bool enable_portability_subset = false;;
|
||||||
|
for (auto ext : deviceExtensionProperties) {
|
||||||
|
if (strcmp(ext.extensionName.data(), VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME) == 0) {
|
||||||
|
enable_portability_subset = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enable_portability_subset) {
|
||||||
|
extensionNames_.push_back(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::DeviceCreateInfo dci({}, CAST(queuesInfo), queuesInfo.data(),
|
||||||
|
CAST(validationLayers), validationLayers.data(),
|
||||||
|
CAST(extensionNames_), extensionNames_.data()); // no extension
|
||||||
|
|
||||||
|
device = pDevice.createDevice(dci);
|
||||||
|
VULKAN_HPP_DEFAULT_DISPATCHER.init(device);
|
||||||
|
|
||||||
|
setObjectName(device, device, "This is my lovely device !");
|
||||||
|
}
|
||||||
|
void createCommandPool(vk::Device& device, vk::CommandPool& commandPool, uint32_t queueIndex)
|
||||||
|
{
|
||||||
|
vk::CommandPoolCreateInfo cpi(vk::CommandPoolCreateFlags(), queueIndex);
|
||||||
|
commandPool = device.createCommandPool(cpi);
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroyInstance(vk::Instance& instance, vk::DebugUtilsMessengerEXT& debugUtilsMessenger)
|
||||||
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
instance.destroyDebugUtilsMessengerEXT(debugUtilsMessenger);
|
||||||
|
#endif
|
||||||
|
instance.destroy();
|
||||||
|
}
|
||||||
|
void destroyLogicalDevice(vk::Device& device)
|
||||||
|
{
|
||||||
|
device.destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroyCommandPool(vk::Device& device, vk::CommandPool& commandPool)
|
||||||
|
{
|
||||||
|
device.destroyCommandPool(commandPool);
|
||||||
|
commandPool = vk::CommandPool();
|
||||||
|
}
|
||||||
|
|
||||||
|
void showAvailableQueues(vk::PhysicalDevice& pDevice, bool diagExt)
|
||||||
|
{
|
||||||
|
|
||||||
|
using Chain = vk::StructureChain<vk::QueueFamilyProperties2, vk::QueueFamilyCheckpointPropertiesNV>;
|
||||||
|
#if VK_HEADER_VERSION >= 301
|
||||||
|
using VulkanDispatchLoaderDynamic = vk::detail::DispatchLoaderDynamic;
|
||||||
|
#else
|
||||||
|
using VulkanDispatchLoaderDynamic = vk::DispatchLoaderDynamic;
|
||||||
|
#endif
|
||||||
|
auto queueFamilyProperties2 = pDevice.getQueueFamilyProperties2<Chain, std::allocator<Chain>, VulkanDispatchLoaderDynamic>();
|
||||||
|
|
||||||
|
for (size_t j = 0; j < queueFamilyProperties2.size(); j++)
|
||||||
|
{
|
||||||
|
std::cout << "\t"
|
||||||
|
<< "QueueFamily " << j << "\n";
|
||||||
|
vk::QueueFamilyProperties const& properties =
|
||||||
|
queueFamilyProperties2[j].get<vk::QueueFamilyProperties2>().queueFamilyProperties;
|
||||||
|
std::cout << "\t\t"
|
||||||
|
<< "QueueFamilyProperties:\n";
|
||||||
|
std::cout << "\t\t\t"
|
||||||
|
<< "queueFlags = " << vk::to_string(properties.queueFlags) << "\n";
|
||||||
|
std::cout << "\t\t\t"
|
||||||
|
<< "queueCount = " << properties.queueCount << "\n";
|
||||||
|
std::cout << "\t\t\t"
|
||||||
|
<< "timestampValidBits = " << properties.timestampValidBits << "\n";
|
||||||
|
std::cout << "\t\t\t"
|
||||||
|
<< "minImageTransferGranularity = " << properties.minImageTransferGranularity.width << " x "
|
||||||
|
<< properties.minImageTransferGranularity.height << " x "
|
||||||
|
<< properties.minImageTransferGranularity.depth << "\n";
|
||||||
|
std::cout << "\n";
|
||||||
|
|
||||||
|
if (diagExt)
|
||||||
|
{
|
||||||
|
vk::QueueFamilyCheckpointPropertiesNV const& checkpointProperties =
|
||||||
|
queueFamilyProperties2[j].get<vk::QueueFamilyCheckpointPropertiesNV>();
|
||||||
|
std::cout << "\t\t"
|
||||||
|
<< "CheckPointPropertiesNV:\n";
|
||||||
|
std::cout << "\t\t\t"
|
||||||
|
<< "checkpointExecutionStageMask = "
|
||||||
|
<< vk::to_string(checkpointProperties.checkpointExecutionStageMask) << "\n";
|
||||||
|
std::cout << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void createTimestampQueryPool(vk::Device& device, vk::QueryPool& queryPool, uint32_t queryCount)
|
||||||
|
{
|
||||||
|
vk::QueryPoolCreateInfo createInfo({}, vk::QueryType::eTimestamp, queryCount);
|
||||||
|
queryPool = device.createQueryPool(createInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroyQueryPool(vk::Device& device, vk::QueryPool& queryPool)
|
||||||
|
{
|
||||||
|
device.destroyQueryPool(queryPool);
|
||||||
|
queryPool = vk::QueryPool();
|
||||||
|
}
|
||||||
|
|
||||||
|
void printDeviceCapabilities(vk::PhysicalDevice& pDevice)
|
||||||
|
{
|
||||||
|
//vk::PhysicalDeviceFeatures features = physicalDevice.getFeatures();
|
||||||
|
std::vector<vk::ExtensionProperties> ext = pDevice.enumerateDeviceExtensionProperties();
|
||||||
|
std::vector<vk::LayerProperties> layers = pDevice.enumerateDeviceLayerProperties();
|
||||||
|
vk::PhysicalDeviceMemoryProperties memoryProperties = pDevice.getMemoryProperties();
|
||||||
|
vk::PhysicalDeviceProperties properties = pDevice.getProperties();
|
||||||
|
vk::PhysicalDeviceType dt = properties.deviceType;
|
||||||
|
|
||||||
|
std::cout << "====================" << std::endl
|
||||||
|
<< "Device Name: " << properties.deviceName << std::endl
|
||||||
|
<< "Device ID: " << properties.deviceID << std::endl
|
||||||
|
<< "Device Type: " << vk::to_string(properties.deviceType) << std::endl
|
||||||
|
<< "Driver Version: " << properties.driverVersion << std::endl
|
||||||
|
<< "API Version: " << properties.apiVersion << std::endl
|
||||||
|
<< "====================" << std::endl
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
bool budgetExt = false;
|
||||||
|
bool diagExt = false;
|
||||||
|
std::cout << "This device supports the following extensions (" << ext.size() << "): " << std::endl;
|
||||||
|
for (vk::ExtensionProperties e : ext)
|
||||||
|
{
|
||||||
|
std::cout << std::string(e.extensionName.data()) << std::endl;
|
||||||
|
if (std::string(e.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME)
|
||||||
|
budgetExt = true;
|
||||||
|
if (std::string(e.extensionName.data()) == VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME)
|
||||||
|
diagExt = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "This device supports the following memory types (" << memoryProperties.memoryTypeCount << "): " << std::endl;
|
||||||
|
uint32_t c = 0U;
|
||||||
|
for (vk::MemoryType e : memoryProperties.memoryTypes)
|
||||||
|
{
|
||||||
|
if (c > memoryProperties.memoryTypeCount)
|
||||||
|
break;
|
||||||
|
|
||||||
|
std::cout << e.heapIndex << "\t ";
|
||||||
|
std::cout << vk::to_string(e.propertyFlags) << std::endl;
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
std::cout << "====================" << std::endl
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
if (budgetExt)
|
||||||
|
{
|
||||||
|
std::cout << "This device has the following heaps (" << memoryProperties.memoryHeapCount << "): " << std::endl;
|
||||||
|
c = 0U;
|
||||||
|
for (vk::MemoryHeap e : memoryProperties.memoryHeaps)
|
||||||
|
{
|
||||||
|
if (c > memoryProperties.memoryHeapCount)
|
||||||
|
break;
|
||||||
|
|
||||||
|
std::cout << "Size: " << formatSize(e.size) << "\t ";
|
||||||
|
std::cout << vk::to_string(e.flags) << std::endl;
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "====================" << std::endl
|
||||||
|
<< std::endl
|
||||||
|
<< "This device has the following layers (" << layers.size() << "): " << std::endl;
|
||||||
|
for (vk::LayerProperties l : layers)
|
||||||
|
std::cout << std::string(l.layerName.data()) << "\t : " << std::string(l.description.data()) << std::endl;
|
||||||
|
std::cout << "====================" << std::endl
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
showAvailableQueues(pDevice, diagExt);
|
||||||
|
}
|
||||||
136
src/main.cpp
Normal file
136
src/main.cpp
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
#include "A2Task1.h"
|
||||||
|
#include "A2Task2.h"
|
||||||
|
#include "A2Task1Solution/Sequential.h"
|
||||||
|
#include "A2Task1Solution/Interleaved.h"
|
||||||
|
#include "A2Task1Solution/KernelDecomposition.h"
|
||||||
|
#include "A2Task2Solution/Naive.h"
|
||||||
|
#include "A2Task2Solution/KernelDecomposition.h"
|
||||||
|
#include "renderdoc.h"
|
||||||
|
|
||||||
|
void run_A2_task1(AppResources &app){
|
||||||
|
size_t size = 128*1024*1024;
|
||||||
|
A2Task1 a2Task1(size);
|
||||||
|
std::cout<<"====== A2 TASK 1 ======" <<std::endl;
|
||||||
|
auto evaluateTask1Solution = [&](A2Task1Solution* solution, std::string name, int N=10) {
|
||||||
|
std::cout << "[Task1] evaluating " << name << " with size: "<<size<< std::endl;
|
||||||
|
bool pass = true;
|
||||||
|
float mstime = 0.f;
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
pass &= a2Task1.evaluateSolution(*solution);
|
||||||
|
solution->cleanup();
|
||||||
|
mstime += solution->mstime / N;
|
||||||
|
|
||||||
|
if (!pass) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pass) {
|
||||||
|
std::cout << "TEST PASSED. Execution time: " << mstime<< " ms, "
|
||||||
|
<< "Throughput: " << size / mstime / 1000000 << " GE/s" << std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout << "TEST FAILED" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
A2Task1SolutionInterleaved interleavedSolution(app, 128);
|
||||||
|
evaluateTask1Solution(&interleavedSolution, "Interleaved");
|
||||||
|
|
||||||
|
A2Task1SolutionSequential sequentialSolution(app, 128);
|
||||||
|
evaluateTask1Solution(&sequentialSolution, "Sequential");
|
||||||
|
|
||||||
|
A2Task1SolutionKernelDecomposition kernelDecompositionSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecomposition.comp.spv");
|
||||||
|
evaluateTask1Solution(&kernelDecompositionSolution, "KernelDecomposition");
|
||||||
|
|
||||||
|
A2Task1SolutionKernelDecomposition kernelDecompositionUnrollSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionUnroll.comp.spv");
|
||||||
|
evaluateTask1Solution(&kernelDecompositionUnrollSolution, "KernelDecomposition Unroll");
|
||||||
|
|
||||||
|
A2Task1SolutionKernelDecomposition kernelDecompositionAtomicSolution(app, 128, workingDir +"build/shaders/A2Task1KernelDecompositionAtomic.comp.spv");
|
||||||
|
evaluateTask1Solution(&kernelDecompositionAtomicSolution, "KernelDecomposition Atomic");
|
||||||
|
}
|
||||||
|
void run_A2_task2(AppResources& app){
|
||||||
|
|
||||||
|
size_t size = 128*1024*1024;
|
||||||
|
std::cout<<"====== A2 TASK 2 ======" <<std::endl;
|
||||||
|
|
||||||
|
// This is used for testing local kernel decomposition without extension to arbitrary arrays.
|
||||||
|
// Must be power of two and <= 1024!
|
||||||
|
size_t sizeLocal = 128;
|
||||||
|
|
||||||
|
A2Task2 a2Task2(size);
|
||||||
|
A2Task2 a2Task2Local(sizeLocal);
|
||||||
|
|
||||||
|
auto evaluateTask2Solution = [&](A2Task2 *task, A2Task2Solution* solution, std::string name, int N) {
|
||||||
|
std::cout << "[Task2] evaluating " << name << " with size: "<< task->size() << std::endl;
|
||||||
|
|
||||||
|
bool pass = true;
|
||||||
|
float mstime = 0.f;
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
pass &= task->evaluateSolution(*solution);
|
||||||
|
solution->cleanup();
|
||||||
|
mstime += solution->mstime / N;
|
||||||
|
|
||||||
|
if (!pass) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pass) {
|
||||||
|
std::cout << "Execution time: " << mstime<< " ms, "
|
||||||
|
<< "Throughput: " << task->size() / mstime / 1000000 << " GE/s" << std::endl;
|
||||||
|
std::cout << "TEST PASSED" << std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout << "TEST FAILED" << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
A2Task2SolutioNaive naiveSolution(app, 128);
|
||||||
|
evaluateTask2Solution(&a2Task2, &naiveSolution, "Naive",5);
|
||||||
|
|
||||||
|
A2Task2SolutionKernelDecomposition kernelDecompositionSolutionLocal(app, sizeLocal);
|
||||||
|
evaluateTask2Solution(&a2Task2Local, &kernelDecompositionSolutionLocal, "Kernel Decomposition that fits in one workgroup (normal if 'slow')",5);
|
||||||
|
|
||||||
|
A2Task2SolutionKernelDecomposition kernelDecompositionSolution(app, 128);
|
||||||
|
evaluateTask2Solution(&a2Task2, &kernelDecompositionSolution, "Kernel Decomposition",5);
|
||||||
|
|
||||||
|
}
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
AppResources app;
|
||||||
|
|
||||||
|
initApp(app);
|
||||||
|
|
||||||
|
renderdoc::initialize();
|
||||||
|
renderdoc::startCapture();
|
||||||
|
|
||||||
|
run_A2_task1(app);
|
||||||
|
|
||||||
|
run_A2_task2(app);
|
||||||
|
|
||||||
|
renderdoc::endCapture();
|
||||||
|
|
||||||
|
app.destroy();
|
||||||
|
}
|
||||||
|
catch (vk::SystemError &err)
|
||||||
|
{
|
||||||
|
std::cout << "vk::SystemError: " << err.what() << std::endl;
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
catch (std::exception &err)
|
||||||
|
{
|
||||||
|
std::cout << "std::exception: " << err.what() << std::endl;
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
std::cout << "unknown error\n";
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
50
src/renderdoc.cpp
Normal file
50
src/renderdoc.cpp
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
#include "renderdoc.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#ifdef ENABLE_RENDERDOC
|
||||||
|
#include "renderdoc_app.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#elif __linux__
|
||||||
|
#include <dlfcn.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static RENDERDOC_API_1_1_2 *rdoc_api = nullptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace renderdoc {
|
||||||
|
void initialize() {
|
||||||
|
#ifdef ENABLE_RENDERDOC
|
||||||
|
pRENDERDOC_GetAPI RENDERDOC_GetAPI = nullptr;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
if(HMODULE mod = GetModuleHandleA("renderdoc.dll"))
|
||||||
|
RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(mod, "RENDERDOC_GetAPI");
|
||||||
|
#elif __linux__
|
||||||
|
if(void *mod = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD))
|
||||||
|
RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym(mod, "RENDERDOC_GetAPI");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (RENDERDOC_GetAPI != nullptr) {
|
||||||
|
int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, (void **)&rdoc_api);
|
||||||
|
assert(ret == 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void startCapture() {
|
||||||
|
#ifdef ENABLE_RENDERDOC
|
||||||
|
if (rdoc_api)
|
||||||
|
rdoc_api->StartFrameCapture(nullptr, nullptr);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void endCapture() {
|
||||||
|
#ifdef ENABLE_RENDERDOC
|
||||||
|
if (rdoc_api)
|
||||||
|
rdoc_api->EndFrameCapture(nullptr, nullptr);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
116
src/task_common.cpp
Normal file
116
src/task_common.cpp
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include "task_common.h"
|
||||||
|
#include "initialization.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
namespace Cmn {
|
||||||
|
//We have a binding vector ready to become a descriptorSetLayout
|
||||||
|
void createDescriptorSetLayout(vk::Device& device,
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding>& bindings,
|
||||||
|
vk::DescriptorSetLayout& descLayout) {
|
||||||
|
vk::DescriptorSetLayoutCreateInfo layoutInfo(
|
||||||
|
{},
|
||||||
|
CAST(bindings), // Number of binding infos
|
||||||
|
bindings.data() // Array of binding infos
|
||||||
|
);
|
||||||
|
descLayout = device.createDescriptorSetLayout(layoutInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
void addStorage(std::vector<vk::DescriptorSetLayoutBinding>& bindings, uint32_t binding) {
|
||||||
|
//Bindings needed for DescriptorSetLayout
|
||||||
|
//The DescriptorType eStorageBuffer is used in our case as storage buffer for compute shader
|
||||||
|
//The ID binding(argument) is needed in the shader
|
||||||
|
//DescriptorCount is set to 1U
|
||||||
|
bindings.push_back(vk::DescriptorSetLayoutBinding(
|
||||||
|
binding, // The binding number of this entry
|
||||||
|
vk::DescriptorType::eStorageBuffer, // Type of resource descriptors used for this binding
|
||||||
|
1U, // Number of descriptors contained in the binding
|
||||||
|
vk::ShaderStageFlagBits::eCompute) // All defined shader stages can access the resource
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void allocateDescriptorSet(vk::Device& device, vk::DescriptorSet& descSet, vk::DescriptorPool& descPool,
|
||||||
|
vk::DescriptorSetLayout& descLayout) {
|
||||||
|
// You can technically allocate multiple layouts at once, we don't need that (so we put 1)
|
||||||
|
vk::DescriptorSetAllocateInfo descAllocInfo(descPool, 1U, &descLayout);
|
||||||
|
// Therefore the vector is length one, we want to take its (only) element
|
||||||
|
descSet = device.allocateDescriptorSets(descAllocInfo)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Binding our DescriptorSet to Buffer
|
||||||
|
//VK_WHOLE_SIZE is specified to bind the entire Buffer
|
||||||
|
//DescriptorType eStorageBuffer in our case should be coherant with DescriptorSetLayout
|
||||||
|
//WriteDescriptorSets(creates array) and updateDescriptorSets can be used only once
|
||||||
|
void bindBuffers(vk::Device& device, vk::Buffer& b, vk::DescriptorSet& set, uint32_t binding) {
|
||||||
|
// Buffer info and data offset info
|
||||||
|
vk::DescriptorBufferInfo descInfo(
|
||||||
|
b, // Buffer to get data from
|
||||||
|
0ULL, // Position of start of data
|
||||||
|
VK_WHOLE_SIZE // Size of data
|
||||||
|
);
|
||||||
|
|
||||||
|
// Binding index in the shader V
|
||||||
|
vk::WriteDescriptorSet write(set, binding, 0U, 1U,
|
||||||
|
vk::DescriptorType::eStorageBuffer, nullptr, &descInfo);
|
||||||
|
device.updateDescriptorSets(1U, &write, 0U, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void createPipeline(vk::Device& device, vk::Pipeline& pipeline,
|
||||||
|
vk::PipelineLayout& pipLayout, vk::SpecializationInfo& specInfo,
|
||||||
|
vk::ShaderModule& sModule) {
|
||||||
|
vk::PipelineShaderStageCreateInfo stageInfo(vk::PipelineShaderStageCreateFlags(),
|
||||||
|
vk::ShaderStageFlagBits::eCompute, sModule,
|
||||||
|
"main", &specInfo);
|
||||||
|
|
||||||
|
vk::ComputePipelineCreateInfo computeInfo(vk::PipelineCreateFlags(), stageInfo, pipLayout);
|
||||||
|
|
||||||
|
// This is a workaround: ideally there should not be a ".value"
|
||||||
|
// This should be fixed in later releases of the SDK
|
||||||
|
pipeline = device.createComputePipeline(nullptr, computeInfo, nullptr).value;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Number of DescriptorSets is one by default
|
||||||
|
void createDescriptorPool(vk::Device& device,
|
||||||
|
std::vector<vk::DescriptorSetLayoutBinding>& bindings, vk::DescriptorPool& descPool,
|
||||||
|
uint32_t numDescriptorSets) {
|
||||||
|
vk::DescriptorPoolSize descriptorPoolSize = vk::DescriptorPoolSize(
|
||||||
|
vk::DescriptorType::eStorageBuffer, bindings.size() * numDescriptorSets);
|
||||||
|
vk::DescriptorPoolCreateInfo descriptorPoolCI = vk::DescriptorPoolCreateInfo(
|
||||||
|
vk::DescriptorPoolCreateFlags(), numDescriptorSets, 1U, &descriptorPoolSize);
|
||||||
|
|
||||||
|
descPool = device.createDescriptorPool(descriptorPoolCI);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void createShader(vk::Device& device, vk::ShaderModule& shaderModule, const std::string& filename) {
|
||||||
|
std::vector<char> cshader = readFile(filename);
|
||||||
|
// Shader Module creation information
|
||||||
|
vk::ShaderModuleCreateInfo smi(
|
||||||
|
{},
|
||||||
|
static_cast<uint32_t>(cshader.size()), // Size of code
|
||||||
|
reinterpret_cast<const uint32_t *>(cshader.data())); // Pointer to code (of uint32_t pointer type)
|
||||||
|
shaderModule = device.createShaderModule(smi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TaskResources::destroy(vk::Device& device) {
|
||||||
|
//Destroy all the resources we created in reverse order
|
||||||
|
//Pipeline Should be destroyed before PipelineLayout
|
||||||
|
device.destroyPipeline(this->pipeline);
|
||||||
|
//PipelineLayout should be destroyed before DescriptorPool
|
||||||
|
device.destroyPipelineLayout(this->pipelineLayout);
|
||||||
|
//DescriptorPool should be destroyed before the DescriptorSetLayout
|
||||||
|
device.destroyDescriptorPool(this->descriptorPool);
|
||||||
|
device.destroyDescriptorSetLayout(this->descriptorSetLayout);
|
||||||
|
device.destroyShaderModule(this->cShader);
|
||||||
|
//The DescriptorSet does not need to be destroyed, It is managed by DescriptorPool.
|
||||||
|
|
||||||
|
std::cout << std::endl
|
||||||
|
<< "destroyed everything successfully in task" << std::endl;
|
||||||
|
}
|
||||||
109
src/utils.cpp
Normal file
109
src/utils.cpp
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <cstring>
|
||||||
|
#include <sstream>
|
||||||
|
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||||
|
#include <vulkan/vulkan.hpp>
|
||||||
|
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
std::vector<char> readFile(const std::string& filename) {
|
||||||
|
std::ifstream file(filename, std::ios::ate | std::ios::binary);
|
||||||
|
|
||||||
|
if (!file.is_open()) {
|
||||||
|
std::string error = "failed to open file: " + filename;
|
||||||
|
throw std::runtime_error(error);
|
||||||
|
}
|
||||||
|
size_t fileSize = (size_t) file.tellg();
|
||||||
|
|
||||||
|
std::vector<char> buffer(fileSize);
|
||||||
|
file.seekg(0);
|
||||||
|
file.read(buffer.data(), fileSize);
|
||||||
|
file.close();
|
||||||
|
// uncomment for debug
|
||||||
|
//std::cout << "read " << buffer.size() << " bytes of data in file " << filename << std::endl;
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string formatSize(uint64_t size) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
if (size < 1024) {
|
||||||
|
oss << size << " B";
|
||||||
|
} else if (size < 1024 * 1024) {
|
||||||
|
oss << size / 1024.f << " KB";
|
||||||
|
} else if (size < 1024 * 1024 * 1024) {
|
||||||
|
oss << size / (1024.0f * 1024.0f) << " MB";
|
||||||
|
} else {
|
||||||
|
oss << size / (1024.0f * 1024.0f * 1024.0f) << " GB";
|
||||||
|
}
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties, vk::PhysicalDevice& pdevice) {
|
||||||
|
vk::PhysicalDeviceMemoryProperties memProperties = pdevice.getMemoryProperties();
|
||||||
|
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
|
||||||
|
if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw std::runtime_error("failed to find suitable memory type!");
|
||||||
|
}
|
||||||
|
|
||||||
|
void createBuffer(vk::PhysicalDevice& pDevice, vk::Device& device,
|
||||||
|
const vk::DeviceSize& size, vk::BufferUsageFlags usage,
|
||||||
|
vk::MemoryPropertyFlags properties, std::string name, vk::Buffer& buffer,
|
||||||
|
vk::DeviceMemory& bufferMemory) {
|
||||||
|
vk::BufferCreateInfo inBufferInfo({}, size, usage);
|
||||||
|
buffer = device.createBuffer(inBufferInfo);
|
||||||
|
setObjectName(device, buffer, name);
|
||||||
|
|
||||||
|
vk::MemoryRequirements memReq = device.getBufferMemoryRequirements(buffer);
|
||||||
|
vk::MemoryAllocateInfo allocInfo(memReq.size,
|
||||||
|
findMemoryType(memReq.memoryTypeBits, properties, pDevice));
|
||||||
|
|
||||||
|
bufferMemory = device.allocateMemory(allocInfo);
|
||||||
|
device.bindBufferMemory(buffer, bufferMemory, 0U);
|
||||||
|
}
|
||||||
|
|
||||||
|
void createBuffer(vk::PhysicalDevice& pDevice, vk::Device& device,
|
||||||
|
const vk::DeviceSize& size, vk::BufferUsageFlags usage,
|
||||||
|
vk::MemoryPropertyFlags properties, std::string name, Buffer& buffer) {
|
||||||
|
createBuffer(pDevice, device, size, usage, properties, name, buffer.buf, buffer.mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroyBuffer(vk::Device& device, Buffer& buffer) {
|
||||||
|
device.destroyBuffer(buffer.buf);
|
||||||
|
device.freeMemory(buffer.mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyBuffer(vk::Device& device, vk::Queue& q, vk::CommandPool& commandPool,
|
||||||
|
const vk::Buffer& srcBuffer, vk::Buffer& dstBuffer, vk::DeviceSize byteSize) {
|
||||||
|
vk::CommandBuffer commandBuffer = beginSingleTimeCommands(device, commandPool);
|
||||||
|
|
||||||
|
vk::BufferCopy copyRegion(0ULL, 0ULL, byteSize);
|
||||||
|
commandBuffer.copyBuffer(srcBuffer, dstBuffer, 1, ©Region);
|
||||||
|
|
||||||
|
endSingleTimeCommands(device, q, commandPool, commandBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::CommandBuffer beginSingleTimeCommands(vk::Device& device, vk::CommandPool& commandPool) {
|
||||||
|
vk::CommandBufferAllocateInfo allocInfo(commandPool, vk::CommandBufferLevel::ePrimary, 1);
|
||||||
|
|
||||||
|
vk::CommandBuffer commandBuffer = device.allocateCommandBuffers(allocInfo)[0];
|
||||||
|
|
||||||
|
vk::CommandBufferBeginInfo beginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
|
||||||
|
commandBuffer.begin(beginInfo);
|
||||||
|
|
||||||
|
return commandBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void endSingleTimeCommands(vk::Device& device, vk::Queue& q,
|
||||||
|
vk::CommandPool& commandPool, vk::CommandBuffer& commandBuffer) {
|
||||||
|
commandBuffer.end();
|
||||||
|
vk::SubmitInfo submitInfo(0U, nullptr, nullptr, 1U, &commandBuffer);
|
||||||
|
q.submit({submitInfo}, nullptr);
|
||||||
|
q.waitIdle();
|
||||||
|
device.freeCommandBuffers(commandPool, 1, &commandBuffer);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user