diff --git a/gpu/vulkan/.gitignore b/gpu/vulkan/.gitignore index 63ea14fd..0497403f 100644 --- a/gpu/vulkan/.gitignore +++ b/gpu/vulkan/.gitignore @@ -1,3 +1,4 @@ list-dev -helloworld shader.spv +simple +simple-shader.spv diff --git a/gpu/vulkan/Makefile b/gpu/vulkan/Makefile index d52776ce..14a8b0b7 100644 --- a/gpu/vulkan/Makefile +++ b/gpu/vulkan/Makefile @@ -1,12 +1,15 @@ #CFLAGS = -std=c++17 -O2 LDFLAGS = -lvulkan -ldl -lpthread -lX11 -lXrandr -lXi +CXXFLAGS += -I/usr/include/glm VULKAN_SDK_PATH = /home/danbev/work/ai/vulkan/1.3.283.0/x86_64 DXC = $(VULKAN_SDK_PATH)/bin/dxc -helloworld: src/helloworld.cpp - g++ $(CFLAGS) -o $@ $< $(LDFLAGS) +simple: src/simple.cpp + g++ $(CXXFLAGS) -o $@ $< $(LDFLAGS) +simple-shader: src/simple.glsl + glslc -fshader-stage=compute $< -o $@.spv shader: src/shader.frag #glslc -fshader-stage=compute --target-env=vulkan1.3 -O $< -o $@.spv diff --git a/gpu/vulkan/src/helloworld.cpp b/gpu/vulkan/src/helloworld.cpp deleted file mode 100644 index c3ea131f..00000000 --- a/gpu/vulkan/src/helloworld.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include -#include - -int main() { - // Application info - VkApplicationInfo appInfo{}; - appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - appInfo.pApplicationName = "Vulkan App"; - appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); - appInfo.pEngineName = "No Engine"; - appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); - appInfo.apiVersion = VK_API_VERSION_1_0; - - // Instance creation info - VkInstanceCreateInfo createInfo{}; - createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - createInfo.pApplicationInfo = &appInfo; - - // Specify extensions - std::vector extensions = { - VK_KHR_SURFACE_EXTENSION_NAME - }; - createInfo.enabledExtensionCount = static_cast(extensions.size()); - createInfo.ppEnabledExtensionNames = extensions.data(); - - // No validation layers for this simple example - createInfo.enabledLayerCount = 0; - - // Create the Vulkan instance - VkInstance instance; - VkResult result = vkCreateInstance(&createInfo, nullptr, &instance); - - if (result != VK_SUCCESS) { - throw std::runtime_error("Failed to create Vulkan instance!"); - } - - std::cout << "Vulkan instance created successfully." << std::endl; - - // Use the Vulkan instance... - - // Clean up - vkDestroyInstance(instance, nullptr); - - return 0; -} diff --git a/gpu/vulkan/src/simple.cpp b/gpu/vulkan/src/simple.cpp new file mode 100644 index 00000000..bcd2acac --- /dev/null +++ b/gpu/vulkan/src/simple.cpp @@ -0,0 +1,411 @@ +#include +#include +#include +#include +#include +#include + +#define GLM_FORCE_RADIANS +#define GLM_FORCE_DEPTH_ZERO_TO_ONE +//#include +#include + +std::vector readFile(const std::string& filename) { + std::ifstream file(filename, std::ios::ate | std::ios::binary); + if (!file.is_open()) { + throw std::runtime_error("failed to open file!"); + } + size_t fileSize = (size_t) file.tellg(); + std::vector buffer(fileSize); + file.seekg(0); + file.read(buffer.data(), fileSize); + file.close(); + return buffer; +} + +VkInstance createInstance() { + VkApplicationInfo appInfo{}; + appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + appInfo.pApplicationName = "Simple Vulkan Compute App"; + appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + appInfo.pEngineName = "No Engine"; + appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); + appInfo.apiVersion = VK_API_VERSION_1_0; + + VkInstanceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + createInfo.pApplicationInfo = &appInfo; + + VkInstance instance; + if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) { + throw std::runtime_error("failed to create instance!"); + } + return instance; +} + +VkPhysicalDevice pickPhysicalDevice(VkInstance instance) { + uint32_t deviceCount = 0; + vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr); + if (deviceCount == 0) { + throw std::runtime_error("failed to find GPUs with Vulkan support!"); + } + std::vector devices(deviceCount); + vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data()); + return devices[0]; // Just pick the first device for simplicity +} + +uint32_t findComputeQueueFamily(VkPhysicalDevice device) { + uint32_t queueFamilyCount = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, nullptr); + std::vector queueFamilies(queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, queueFamilies.data()); + + for (uint32_t i = 0; i < queueFamilyCount; i++) { + if (queueFamilies[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { + return i; + } + } + + throw std::runtime_error("failed to find a compute queue family!"); +} + +VkDevice createLogicalDevice(VkPhysicalDevice physicalDevice, uint32_t computeQueueFamily) { + VkDeviceQueueCreateInfo queueCreateInfo{}; + queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queueCreateInfo.queueFamilyIndex = computeQueueFamily; + queueCreateInfo.queueCount = 1; + float queuePriority = 1.0f; + queueCreateInfo.pQueuePriorities = &queuePriority; + + VkPhysicalDeviceFeatures deviceFeatures{}; + + VkDeviceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + createInfo.pQueueCreateInfos = &queueCreateInfo; + createInfo.queueCreateInfoCount = 1; + createInfo.pEnabledFeatures = &deviceFeatures; + + VkDevice device; + if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) != VK_SUCCESS) { + throw std::runtime_error("failed to create logical device!"); + } + return device; +} + +VkShaderModule createShaderModule(VkDevice device, const std::vector& code) { + VkShaderModuleCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + createInfo.codeSize = code.size(); + createInfo.pCode = reinterpret_cast(code.data()); + + VkShaderModule shaderModule; + if (vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) != VK_SUCCESS) { + throw std::runtime_error("failed to create shader module!"); + } + return shaderModule; +} + +VkDescriptorSetLayout createDescriptorSetLayout(VkDevice device) { + VkDescriptorSetLayoutBinding layoutBinding{}; + layoutBinding.binding = 0; + layoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + layoutBinding.descriptorCount = 1; + layoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layoutInfo{}; + layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layoutInfo.bindingCount = 1; + layoutInfo.pBindings = &layoutBinding; + + VkDescriptorSetLayout descriptorSetLayout; + if (vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &descriptorSetLayout) != VK_SUCCESS) { + throw std::runtime_error("failed to create descriptor set layout!"); + } + return descriptorSetLayout; +} + +VkPipelineLayout createPipelineLayout(VkDevice device, VkDescriptorSetLayout descriptorSetLayout) { + VkPipelineLayoutCreateInfo pipelineLayoutInfo{}; + pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipelineLayoutInfo.setLayoutCount = 1; + pipelineLayoutInfo.pSetLayouts = &descriptorSetLayout; + + VkPipelineLayout pipelineLayout; + if (vkCreatePipelineLayout(device, &pipelineLayoutInfo, nullptr, &pipelineLayout) != VK_SUCCESS) { + throw std::runtime_error("failed to create pipeline layout!"); + } + return pipelineLayout; +} + +VkPipeline createComputePipeline(VkDevice device, VkShaderModule computeShaderModule, VkPipelineLayout pipelineLayout) { + VkPipelineShaderStageCreateInfo shaderStageInfo{}; + shaderStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStageInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; + shaderStageInfo.module = computeShaderModule; + shaderStageInfo.pName = "main"; + + VkComputePipelineCreateInfo pipelineInfo{}; + pipelineInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipelineInfo.stage = shaderStageInfo; + pipelineInfo.layout = pipelineLayout; + + VkPipeline computePipeline; + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, nullptr, &computePipeline) != VK_SUCCESS) { + throw std::runtime_error("failed to create compute pipeline!"); + } + return computePipeline; +} + +VkCommandPool createCommandPool(VkDevice device, uint32_t queueFamilyIndex) { + VkCommandPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + poolInfo.queueFamilyIndex = queueFamilyIndex; + + VkCommandPool commandPool; + if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) != VK_SUCCESS) { + throw std::runtime_error("failed to create command pool!"); + } + return commandPool; +} + +VkCommandBuffer createCommandBuffer(VkDevice device, VkCommandPool commandPool) { + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.commandPool = commandPool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + if (vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer) != VK_SUCCESS) { + throw std::runtime_error("failed to allocate command buffers!"); + } + return commandBuffer; +} + + +uint32_t findMemoryType(VkPhysicalDevice physicalDevice, uint32_t typeFilter, VkMemoryPropertyFlags properties) { + VkPhysicalDeviceMemoryProperties memProperties; + vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties); + + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) { + return i; + } + } + + throw std::runtime_error("failed to find suitable memory type!"); +} + +VkDescriptorPool createDescriptorPool(VkDevice device) { + VkDescriptorPoolSize poolSize{}; + poolSize.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + poolSize.descriptorCount = 1; + + VkDescriptorPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + poolInfo.poolSizeCount = 1; + poolInfo.pPoolSizes = &poolSize; + poolInfo.maxSets = 1; + + VkDescriptorPool descriptorPool; + if (vkCreateDescriptorPool(device, &poolInfo, nullptr, &descriptorPool) != VK_SUCCESS) { + throw std::runtime_error("failed to create descriptor pool!"); + } + return descriptorPool; +} + +VkDescriptorSet createDescriptorSet(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSetLayout descriptorSetLayout, VkBuffer buffer) { + VkDescriptorSetAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocInfo.descriptorPool = descriptorPool; + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &descriptorSetLayout; + + VkDescriptorSet descriptorSet; + if (vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet) != VK_SUCCESS) { + throw std::runtime_error("failed to allocate descriptor set!"); + } + + VkDescriptorBufferInfo bufferInfo{}; + bufferInfo.buffer = buffer; + bufferInfo.offset = 0; + bufferInfo.range = VK_WHOLE_SIZE; + + VkWriteDescriptorSet descriptorWrite{}; + descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptorWrite.dstSet = descriptorSet; + descriptorWrite.dstBinding = 0; + descriptorWrite.dstArrayElement = 0; + descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptorWrite.descriptorCount = 1; + descriptorWrite.pBufferInfo = &bufferInfo; + + vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, nullptr); + + return descriptorSet; +} + +int main() { + try { + std::cout << "Creating Vulkan instance..." << std::endl; + VkInstance instance = createInstance(); + + std::cout << "Picking physical device..." << std::endl; + VkPhysicalDevice physicalDevice = pickPhysicalDevice(instance); + + std::cout << "Finding compute queue family..." << std::endl; + uint32_t computeQueueFamily = findComputeQueueFamily(physicalDevice); + + std::cout << "Creating logical device..." << std::endl; + VkDevice device = createLogicalDevice(physicalDevice, computeQueueFamily); + + std::cout << "Getting compute queue..." << std::endl; + VkQueue computeQueue; + vkGetDeviceQueue(device, computeQueueFamily, 0, &computeQueue); + + std::cout << "Reading shader file..." << std::endl; + auto shaderCode = readFile("simple-shader.spv"); + if (shaderCode.empty()) { + throw std::runtime_error("Shader file is empty or could not be read."); + } + + std::cout << "Creating shader module..." << std::endl; + VkShaderModule computeShaderModule = createShaderModule(device, shaderCode); + + std::cout << "Creating descriptor set layout..." << std::endl; + VkDescriptorSetLayout descriptorSetLayout = createDescriptorSetLayout(device); + + std::cout << "Creating pipeline layout..." << std::endl; + VkPipelineLayout pipelineLayout = createPipelineLayout(device, descriptorSetLayout); + + std::cout << "Creating compute pipeline..." << std::endl; + VkPipeline computePipeline = createComputePipeline(device, computeShaderModule, pipelineLayout); + + std::cout << "Creating command pool..." << std::endl; + VkCommandPool commandPool = createCommandPool(device, computeQueueFamily); + + std::cout << "Creating command buffer..." << std::endl; + VkCommandBuffer commandBuffer = createCommandBuffer(device, commandPool); + + std::cout << "Creating buffer..." << std::endl; + VkBufferCreateInfo bufferInfo{}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.size = sizeof(glm::mat4) * 3; // Two input matrices and one output + bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + VkBuffer buffer; + if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) { + throw std::runtime_error("failed to create buffer!"); + } + + std::cout << "Getting memory requirements..." << std::endl; + VkMemoryRequirements memRequirements; + vkGetBufferMemoryRequirements(device, buffer, &memRequirements); + + std::cout << "Allocating memory..." << std::endl; + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = memRequirements.size; + allocInfo.memoryTypeIndex = findMemoryType(physicalDevice, memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + VkDeviceMemory bufferMemory; + if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) != VK_SUCCESS) { + throw std::runtime_error("failed to allocate buffer memory!"); + } + + std::cout << "Binding buffer memory..." << std::endl; + vkBindBufferMemory(device, buffer, bufferMemory, 0); + + std::cout << "Filling input matrices..." << std::endl; + float matrixA[16] = { + 1.0f, 2.0f, 3.0f, 4.0f, // First column + 5.0f, 6.0f, 7.0f, 8.0f, // Second column + 9.0f,10.0f,11.0f,12.0f, // Third column + 13.0f,14.f,15.0f,16.0f // Fourth column + }; + float matrixB[16] = { + 2.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 2.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 2.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 2.0f + }; + + //glm::mat4 matrixA = glm::rotate(glm::mat4(1.0f), glm::radians(45.0f), glm::vec3(0.0f, 0.0f, 1.0f)); + //glm::mat4 matrixB = glm::scale(glm::mat4(1.0f), glm::vec3(2.0f, 2.0f, 2.0f)); + + std::cout << "Copying matrices to GPU memory..." << std::endl; + void* data; + vkMapMemory(device, bufferMemory, 0, bufferInfo.size, 0, &data); + memcpy(data, &matrixA, sizeof(glm::mat4)); + memcpy((char*)data + sizeof(glm::mat4), &matrixB, sizeof(glm::mat4)); + vkUnmapMemory(device, bufferMemory); + + std::cout << "Creating descriptor pool..." << std::endl; +VkDescriptorPool descriptorPool = createDescriptorPool(device); + +std::cout << "Creating descriptor set..." << std::endl; +VkDescriptorSet descriptorSet = createDescriptorSet(device, descriptorPool, descriptorSetLayout, buffer); + + std::cout << "Recording command buffer..." << std::endl; + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + if (vkBeginCommandBuffer(commandBuffer, &beginInfo) != VK_SUCCESS) { + throw std::runtime_error("failed to begin recording command buffer!"); + } + + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline); + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); + vkCmdDispatch(commandBuffer, 1, 1, 1); + + if (vkEndCommandBuffer(commandBuffer) != VK_SUCCESS) { + throw std::runtime_error("failed to record command buffer!"); + } + + std::cout << "Submitting command buffer..." << std::endl; + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + if (vkQueueSubmit(computeQueue, 1, &submitInfo, VK_NULL_HANDLE) != VK_SUCCESS) { + throw std::runtime_error("failed to submit compute command buffer!"); + } + + std::cout << "Waiting for queue to become idle..." << std::endl; + vkQueueWaitIdle(computeQueue); + + std::cout << "Reading back the result..." << std::endl; + glm::mat4 result; + vkMapMemory(device, bufferMemory, 2 * sizeof(glm::mat4), sizeof(glm::mat4), 0, &data); + memcpy(&result, data, sizeof(glm::mat4)); + vkUnmapMemory(device, bufferMemory); + + std::cout << "Result matrix:" << std::endl; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + std::cout << result[i][j] << " "; + } + std::cout << std::endl; + } + + std::cout << "Cleaning up..." << std::endl; + vkDestroyBuffer(device, buffer, nullptr); + vkFreeMemory(device, bufferMemory, nullptr); + vkDestroyShaderModule(device, computeShaderModule, nullptr); + vkDestroyPipeline(device, computePipeline, nullptr); + vkDestroyPipelineLayout(device, pipelineLayout, nullptr); + vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); + vkDestroyCommandPool(device, commandPool, nullptr); + vkDestroyDevice(device, nullptr); + vkDestroyInstance(instance, nullptr); + + std::cout << "Program completed successfully." << std::endl; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/gpu/vulkan/src/simple.glsl b/gpu/vulkan/src/simple.glsl new file mode 100644 index 00000000..563a2adb --- /dev/null +++ b/gpu/vulkan/src/simple.glsl @@ -0,0 +1,22 @@ +#version 450 + +layout(local_size_x = 4, local_size_y = 4) in; + +layout(set = 0, binding = 0) buffer InputOutputBuffer { + mat4 matrixA; + mat4 matrixB; + mat4 result; +}; + +void main() { + uint col = gl_GlobalInvocationID.x; + uint row = gl_GlobalInvocationID.y; + + if(col < 4 && row < 4) { + float sum = 0.0; + for(int i = 0; i < 4; ++i) { + sum += matrixA[row][i] * matrixB[i][col]; + } + result[row][col] = sum; + } +}