From 4eca1bb80f80189eb4a8c49d8fb690485a1957e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Delgado=20Kr=C3=A4mer?= Date: Sat, 24 Aug 2024 20:53:50 +0200 Subject: [PATCH] cgpu/gb: fix use of GbSmallVector Although this does not change anything because we alias SmallVector to std::vector right now, the concept has a non-zero cost: stack memory. --- src/cgpu/impl/Cgpu.cpp | 85 +++++++++++++++++------------- src/cgpu/impl/ShaderReflection.cpp | 2 +- src/cgpu/impl/ShaderReflection.h | 4 +- src/gb/gtl/gb/HandleStore.h | 9 ++-- src/gb/impl/HandleStore.cpp | 6 +++ 5 files changed, 62 insertions(+), 44 deletions(-) diff --git a/src/cgpu/impl/Cgpu.cpp b/src/cgpu/impl/Cgpu.cpp index a90c2eb9..c8c43bac 100644 --- a/src/cgpu/impl/Cgpu.cpp +++ b/src/cgpu/impl/Cgpu.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -97,17 +98,17 @@ namespace gtl struct CgpuIPipeline { - VkPipeline pipeline; - VkPipelineLayout layout; - VkDescriptorPool descriptorPool; - VkDescriptorSet descriptorSet; - VkDescriptorSetLayout descriptorSetLayout; - GbSmallVector descriptorSetLayoutBindings; - VkPipelineBindPoint bindPoint; - VkStridedDeviceAddressRegionKHR sbtRgen; - VkStridedDeviceAddressRegionKHR sbtMiss; - VkStridedDeviceAddressRegionKHR sbtHit; - CgpuIBuffer sbt; + VkPipeline pipeline; + VkPipelineLayout layout; + VkDescriptorPool descriptorPool; + VkDescriptorSet descriptorSet; + VkDescriptorSetLayout descriptorSetLayout; + std::vector descriptorSetLayoutBindings; + VkPipelineBindPoint bindPoint; + VkStridedDeviceAddressRegionKHR sbtRgen; + VkStridedDeviceAddressRegionKHR sbtMiss; + VkStridedDeviceAddressRegionKHR sbtHit; + CgpuIBuffer sbt; }; struct CgpuIShader @@ -435,8 +436,8 @@ namespace gtl return false; } - GbSmallVector enabledLayers; - GbSmallVector enabledExtensions; + GbSmallVector enabledLayers; + GbSmallVector enabledExtensions; bool debugUtilsEnabled = false; #ifndef NDEBUG { @@ -460,7 +461,7 @@ namespace gtl uint32_t extensionCount; vkEnumerateInstanceExtensionProperties(nullptr, &extensionCount, nullptr); - GbSmallVector availableExtensions(extensionCount); + std::vector availableExtensions(extensionCount); vkEnumerateInstanceExtensionProperties(nullptr, &extensionCount, availableExtensions.data()); #ifndef NDEBUG @@ -550,7 +551,7 @@ namespace gtl GB_WARN("more than one device found -- choosing first one"); } - GbSmallVector physicalDevices(physDeviceCount); + GbSmallVector physicalDevices(physDeviceCount); vkEnumeratePhysicalDevices(iinstance->instance, &physDeviceCount, physicalDevices.data()); idevice->physicalDevice = physicalDevices[0]; @@ -606,10 +607,10 @@ namespace gtl uint32_t extensionCount; vkEnumerateDeviceExtensionProperties(idevice->physicalDevice, nullptr, &extensionCount, nullptr); - GbSmallVector extensions(extensionCount); + std::vector extensions(extensionCount); vkEnumerateDeviceExtensionProperties(idevice->physicalDevice, nullptr, &extensionCount, extensions.data()); - GbSmallVector requiredExtensions = { + std::array requiredExtensions = { VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME, // required by VK_KHR_acceleration_structure VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, // required by VK_KHR_acceleration_structure @@ -622,7 +623,7 @@ namespace gtl VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME }; - GbSmallVector enabledExtensions; + GbSmallVector enabledExtensions; for (uint32_t i = 0; i < requiredExtensions.size(); i++) { const char* extension = requiredExtensions[i]; @@ -688,7 +689,7 @@ namespace gtl uint32_t queueFamilyCount; vkGetPhysicalDeviceQueueFamilyProperties(idevice->physicalDevice, &queueFamilyCount, nullptr); - GbSmallVector queueFamilies(queueFamilyCount); + GbSmallVector queueFamilies(queueFamilyCount); vkGetPhysicalDeviceQueueFamilyProperties(idevice->physicalDevice, &queueFamilyCount, queueFamilies.data()); uint32_t queueFamilyIndex = UINT32_MAX; @@ -1555,8 +1556,10 @@ namespace gtl static bool cgpuCreatePipelineDescriptors(CgpuIDevice* idevice, CgpuIPipeline* ipipeline, CgpuIShader* ishader, VkShaderStageFlags stageFlags) { const CgpuShaderReflection* shaderReflection = &ishader->reflection; + size_t bindingCount = shaderReflection->bindings.size(); - for (uint32_t i = 0; i < shaderReflection->bindings.size(); i++) + ipipeline->descriptorSetLayoutBindings.resize(bindingCount); + for (uint32_t i = 0; i < bindingCount; i++) { const CgpuShaderReflectionBinding* binding_reflection = &shaderReflection->bindings[i]; @@ -1568,14 +1571,14 @@ namespace gtl .pImmutableSamplers = nullptr, }; - ipipeline->descriptorSetLayoutBindings.push_back(layoutBinding); + ipipeline->descriptorSetLayoutBindings[i] = layoutBinding; } VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .bindingCount = (uint32_t) ipipeline->descriptorSetLayoutBindings.size(), + .bindingCount = (uint32_t) bindingCount, .pBindings = ipipeline->descriptorSetLayoutBindings.data(), }; @@ -1816,7 +1819,7 @@ namespace gtl uint32_t firstGroup = 0; uint32_t dataSize = handleSize * groupCount; - GbSmallVector handleData(dataSize); + std::vector handleData(dataSize); if (idevice->table.vkGetRayTracingShaderGroupHandlesKHR(idevice->logicalDevice, ipipeline->pipeline, firstGroup, groupCount, handleData.size(), handleData.data()) != VK_SUCCESS) { CGPU_RETURN_ERROR("failed to create sbt handles"); @@ -1886,11 +1889,13 @@ namespace gtl CGPU_RESOLVE_OR_RETURN_SHADER(createInfo.rgenShader, irgenShader); // Set up stages - GbSmallVector stages; + std::vector stages; + stages.reserve(256); + VkShaderStageFlags shaderStageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR; auto pushStage = [&stages](VkShaderStageFlagBits stage, VkShaderModule module) { - VkPipelineShaderStageCreateInfo pipeline_shader_stage_create_info = { + VkPipelineShaderStageCreateInfo stageCreateInfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -1899,7 +1904,7 @@ namespace gtl .pName = "main", .pSpecializationInfo = nullptr, }; - stages.push_back(pipeline_shader_stage_create_info); + stages.push_back(stageCreateInfo); }; // Ray gen @@ -1947,12 +1952,12 @@ namespace gtl } // Set up groups - GbSmallVector groups; - groups.resize(1/*rgen*/ + createInfo.missShaderCount + createInfo.hitGroupCount); + size_t groupCount = 1/*rgen*/ + createInfo.missShaderCount + createInfo.hitGroupCount; + std::vector groups(groupCount); for (uint32_t i = 0; i < groups.size(); i++) { - VkRayTracingShaderGroupCreateInfoKHR sgCreateInfo = { + VkRayTracingShaderGroupCreateInfoKHR groupCreateInfo = { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .pNext = nullptr, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, @@ -1963,7 +1968,7 @@ namespace gtl .pShaderGroupCaptureReplayHandle = nullptr, }; - groups[i] = sgCreateInfo; + groups[i] = groupCreateInfo; } bool anyNullClosestHitShader = false; @@ -2533,7 +2538,8 @@ namespace gtl CGPU_RESOLVE_OR_RETURN_COMMAND_BUFFER(commandBuffer, icommandBuffer); CGPU_RESOLVE_OR_RETURN_DEVICE(icommandBuffer->device, idevice); - GbSmallVector barriers; + std::vector barriers; + barriers.reserve(64); /* FIXME: this has quadratic complexity */ const CgpuShaderReflection* reflection = &ishader->reflection; @@ -2647,15 +2653,17 @@ namespace gtl CGPU_RESOLVE_OR_RETURN_DEVICE(icommandBuffer->device, idevice); CGPU_RESOLVE_OR_RETURN_PIPELINE(pipeline, ipipeline); - GbSmallVector bufferInfos; - GbSmallVector imageInfos; + GbSmallVector bufferInfos; GbSmallVector asInfos; + std::vector imageInfos; + imageInfos.reserve(128); bufferInfos.reserve(bindings->bufferCount); imageInfos.reserve(bindings->imageCount + bindings->samplerCount); asInfos.reserve(bindings->tlasCount); - GbSmallVector writeDescriptorSets; + std::vector writeDescriptorSets; + writeDescriptorSets.reserve(128); /* FIXME: this has a rather high complexity */ for (uint32_t i = 0; i < ipipeline->descriptorSetLayoutBindings.size(); i++) @@ -3013,7 +3021,8 @@ namespace gtl CGPU_RESOLVE_OR_RETURN_COMMAND_BUFFER(commandBuffer, icommandBuffer); CGPU_RESOLVE_OR_RETURN_DEVICE(icommandBuffer->device, idevice); - GbSmallVector vkMemBarriers; + std::vector vkMemBarriers; + vkMemBarriers.reserve(128); for (uint32_t i = 0; i < barrier->memoryBarrierCount; ++i) { @@ -3030,8 +3039,10 @@ namespace gtl vkMemBarriers.push_back(bVk); } - GbSmallVector vkBufferMemBarriers; - GbSmallVector vkImageMemBarriers; + std::vector vkBufferMemBarriers; + vkBufferMemBarriers.reserve(16); + std::vector vkImageMemBarriers; + vkImageMemBarriers.reserve(128); for (uint32_t i = 0; i < barrier->bufferBarrierCount; ++i) { diff --git a/src/cgpu/impl/ShaderReflection.cpp b/src/cgpu/impl/ShaderReflection.cpp index 2b72890c..4b3df086 100644 --- a/src/cgpu/impl/ShaderReflection.cpp +++ b/src/cgpu/impl/ShaderReflection.cpp @@ -34,7 +34,7 @@ namespace gtl } bool result = false; - GbSmallVector bindings; + std::vector bindings; uint32_t bindingCount; if (spvReflectEnumerateDescriptorBindings(&shaderModule, &bindingCount, nullptr) != SPV_REFLECT_RESULT_SUCCESS) diff --git a/src/cgpu/impl/ShaderReflection.h b/src/cgpu/impl/ShaderReflection.h index fb3c038e..19979791 100644 --- a/src/cgpu/impl/ShaderReflection.h +++ b/src/cgpu/impl/ShaderReflection.h @@ -18,7 +18,7 @@ #pragma once #include -#include +#include namespace gtl { @@ -34,7 +34,7 @@ namespace gtl struct CgpuShaderReflection { uint32_t pushConstantsSize; - GbSmallVector bindings; + std::vector bindings; }; bool cgpuReflectShader(const uint32_t* spv, uint64_t size, CgpuShaderReflection* reflection); diff --git a/src/gb/gtl/gb/HandleStore.h b/src/gb/gtl/gb/HandleStore.h index dd035588..dfdb545b 100644 --- a/src/gb/gtl/gb/HandleStore.h +++ b/src/gb/gtl/gb/HandleStore.h @@ -20,14 +20,15 @@ #include #include #include - -#include "SmallVector.h" +#include namespace gtl { class GbHandleStore { public: + GbHandleStore(); + uint64_t allocateHandle(); bool isHandleValid(uint64_t handle) const; @@ -36,7 +37,7 @@ namespace gtl private: uint32_t m_maxIndex = 0; - GbSmallVector m_versions; - GbSmallVector m_freeList; + std::vector m_versions; + std::vector m_freeList; }; } diff --git a/src/gb/impl/HandleStore.cpp b/src/gb/impl/HandleStore.cpp index 10c6b567..ed5e5b3a 100644 --- a/src/gb/impl/HandleStore.cpp +++ b/src/gb/impl/HandleStore.cpp @@ -21,6 +21,12 @@ namespace gtl { + GbHandleStore::GbHandleStore() + { + m_versions.reserve(1024); + m_freeList.reserve(32); + } + uint64_t GbHandleStore::allocateHandle() { uint32_t index;