Skip to content

Commit

Permalink
cgpu/gb: fix use of GbSmallVector
Browse files Browse the repository at this point in the history
Although this does not change anything because we alias SmallVector to
std::vector right now, the concept has a non-zero cost: stack memory.
  • Loading branch information
pablode committed Aug 24, 2024
1 parent f73d83d commit 4eca1bb
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 44 deletions.
85 changes: 48 additions & 37 deletions src/cgpu/impl/Cgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <array>
#include <memory>

#include <volk.h>
Expand Down Expand Up @@ -97,17 +98,17 @@ namespace gtl

struct CgpuIPipeline
{
VkPipeline pipeline;
VkPipelineLayout layout;
VkDescriptorPool descriptorPool;
VkDescriptorSet descriptorSet;
VkDescriptorSetLayout descriptorSetLayout;
GbSmallVector<VkDescriptorSetLayoutBinding, 128> descriptorSetLayoutBindings;
VkPipelineBindPoint bindPoint;
VkStridedDeviceAddressRegionKHR sbtRgen;
VkStridedDeviceAddressRegionKHR sbtMiss;
VkStridedDeviceAddressRegionKHR sbtHit;
CgpuIBuffer sbt;
VkPipeline pipeline;
VkPipelineLayout layout;
VkDescriptorPool descriptorPool;
VkDescriptorSet descriptorSet;
VkDescriptorSetLayout descriptorSetLayout;
std::vector<VkDescriptorSetLayoutBinding> descriptorSetLayoutBindings;
VkPipelineBindPoint bindPoint;
VkStridedDeviceAddressRegionKHR sbtRgen;
VkStridedDeviceAddressRegionKHR sbtMiss;
VkStridedDeviceAddressRegionKHR sbtHit;
CgpuIBuffer sbt;
};

struct CgpuIShader
Expand Down Expand Up @@ -435,8 +436,8 @@ namespace gtl
return false;
}

GbSmallVector<const char*, 8> enabledLayers;
GbSmallVector<const char*, 8> enabledExtensions;
GbSmallVector<const char*, 4> enabledLayers;
GbSmallVector<const char*, 16> enabledExtensions;
bool debugUtilsEnabled = false;
#ifndef NDEBUG
{
Expand All @@ -460,7 +461,7 @@ namespace gtl
uint32_t extensionCount;
vkEnumerateInstanceExtensionProperties(nullptr, &extensionCount, nullptr);

GbSmallVector<VkExtensionProperties, 512> availableExtensions(extensionCount);
std::vector<VkExtensionProperties> availableExtensions(extensionCount);
vkEnumerateInstanceExtensionProperties(nullptr, &extensionCount, availableExtensions.data());

#ifndef NDEBUG
Expand Down Expand Up @@ -550,7 +551,7 @@ namespace gtl
GB_WARN("more than one device found -- choosing first one");
}

GbSmallVector<VkPhysicalDevice, 8> physicalDevices(physDeviceCount);
GbSmallVector<VkPhysicalDevice, 4> physicalDevices(physDeviceCount);
vkEnumeratePhysicalDevices(iinstance->instance, &physDeviceCount, physicalDevices.data());

idevice->physicalDevice = physicalDevices[0];
Expand Down Expand Up @@ -606,10 +607,10 @@ namespace gtl
uint32_t extensionCount;
vkEnumerateDeviceExtensionProperties(idevice->physicalDevice, nullptr, &extensionCount, nullptr);

GbSmallVector<VkExtensionProperties, 1024> extensions(extensionCount);
std::vector<VkExtensionProperties> extensions(extensionCount);
vkEnumerateDeviceExtensionProperties(idevice->physicalDevice, nullptr, &extensionCount, extensions.data());

GbSmallVector<const char*, 16> requiredExtensions = {
std::array<const char*, 10> requiredExtensions = {
VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME,
VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME, // required by VK_KHR_acceleration_structure
VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, // required by VK_KHR_acceleration_structure
Expand All @@ -622,7 +623,7 @@ namespace gtl
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME
};

GbSmallVector<const char*, 32> enabledExtensions;
GbSmallVector<const char*, 8> enabledExtensions;
for (uint32_t i = 0; i < requiredExtensions.size(); i++)
{
const char* extension = requiredExtensions[i];
Expand Down Expand Up @@ -688,7 +689,7 @@ namespace gtl
uint32_t queueFamilyCount;
vkGetPhysicalDeviceQueueFamilyProperties(idevice->physicalDevice, &queueFamilyCount, nullptr);

GbSmallVector<VkQueueFamilyProperties, 32> queueFamilies(queueFamilyCount);
GbSmallVector<VkQueueFamilyProperties, 8> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(idevice->physicalDevice, &queueFamilyCount, queueFamilies.data());

uint32_t queueFamilyIndex = UINT32_MAX;
Expand Down Expand Up @@ -1555,8 +1556,10 @@ namespace gtl
static bool cgpuCreatePipelineDescriptors(CgpuIDevice* idevice, CgpuIPipeline* ipipeline, CgpuIShader* ishader, VkShaderStageFlags stageFlags)
{
const CgpuShaderReflection* shaderReflection = &ishader->reflection;
size_t bindingCount = shaderReflection->bindings.size();

for (uint32_t i = 0; i < shaderReflection->bindings.size(); i++)
ipipeline->descriptorSetLayoutBindings.resize(bindingCount);
for (uint32_t i = 0; i < bindingCount; i++)
{
const CgpuShaderReflectionBinding* binding_reflection = &shaderReflection->bindings[i];

Expand All @@ -1568,14 +1571,14 @@ namespace gtl
.pImmutableSamplers = nullptr,
};

ipipeline->descriptorSetLayoutBindings.push_back(layoutBinding);
ipipeline->descriptorSetLayoutBindings[i] = layoutBinding;
}

VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = (uint32_t) ipipeline->descriptorSetLayoutBindings.size(),
.bindingCount = (uint32_t) bindingCount,
.pBindings = ipipeline->descriptorSetLayoutBindings.data(),
};

Expand Down Expand Up @@ -1816,7 +1819,7 @@ namespace gtl
uint32_t firstGroup = 0;
uint32_t dataSize = handleSize * groupCount;

GbSmallVector<uint8_t, 64> handleData(dataSize);
std::vector<uint8_t> handleData(dataSize);
if (idevice->table.vkGetRayTracingShaderGroupHandlesKHR(idevice->logicalDevice, ipipeline->pipeline, firstGroup, groupCount, handleData.size(), handleData.data()) != VK_SUCCESS)
{
CGPU_RETURN_ERROR("failed to create sbt handles");
Expand Down Expand Up @@ -1886,11 +1889,13 @@ namespace gtl
CGPU_RESOLVE_OR_RETURN_SHADER(createInfo.rgenShader, irgenShader);

// Set up stages
GbSmallVector<VkPipelineShaderStageCreateInfo, 128> stages;
std::vector<VkPipelineShaderStageCreateInfo> stages;
stages.reserve(256);

VkShaderStageFlags shaderStageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR;

auto pushStage = [&stages](VkShaderStageFlagBits stage, VkShaderModule module) {
VkPipelineShaderStageCreateInfo pipeline_shader_stage_create_info = {
VkPipelineShaderStageCreateInfo stageCreateInfo = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
Expand All @@ -1899,7 +1904,7 @@ namespace gtl
.pName = "main",
.pSpecializationInfo = nullptr,
};
stages.push_back(pipeline_shader_stage_create_info);
stages.push_back(stageCreateInfo);
};

// Ray gen
Expand Down Expand Up @@ -1947,12 +1952,12 @@ namespace gtl
}

// Set up groups
GbSmallVector<VkRayTracingShaderGroupCreateInfoKHR, 128> groups;
groups.resize(1/*rgen*/ + createInfo.missShaderCount + createInfo.hitGroupCount);
size_t groupCount = 1/*rgen*/ + createInfo.missShaderCount + createInfo.hitGroupCount;
std::vector<VkRayTracingShaderGroupCreateInfoKHR> groups(groupCount);

for (uint32_t i = 0; i < groups.size(); i++)
{
VkRayTracingShaderGroupCreateInfoKHR sgCreateInfo = {
VkRayTracingShaderGroupCreateInfoKHR groupCreateInfo = {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.pNext = nullptr,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
Expand All @@ -1963,7 +1968,7 @@ namespace gtl
.pShaderGroupCaptureReplayHandle = nullptr,
};

groups[i] = sgCreateInfo;
groups[i] = groupCreateInfo;
}

bool anyNullClosestHitShader = false;
Expand Down Expand Up @@ -2533,7 +2538,8 @@ namespace gtl
CGPU_RESOLVE_OR_RETURN_COMMAND_BUFFER(commandBuffer, icommandBuffer);
CGPU_RESOLVE_OR_RETURN_DEVICE(icommandBuffer->device, idevice);

GbSmallVector<VkImageMemoryBarrier2KHR, 64> barriers;
std::vector<VkImageMemoryBarrier2KHR> barriers;
barriers.reserve(64);

/* FIXME: this has quadratic complexity */
const CgpuShaderReflection* reflection = &ishader->reflection;
Expand Down Expand Up @@ -2647,15 +2653,17 @@ namespace gtl
CGPU_RESOLVE_OR_RETURN_DEVICE(icommandBuffer->device, idevice);
CGPU_RESOLVE_OR_RETURN_PIPELINE(pipeline, ipipeline);

GbSmallVector<VkDescriptorBufferInfo, 64> bufferInfos;
GbSmallVector<VkDescriptorImageInfo, 128> imageInfos;
GbSmallVector<VkDescriptorBufferInfo, 8> bufferInfos;
GbSmallVector<VkWriteDescriptorSetAccelerationStructureKHR, 1> asInfos;
std::vector<VkDescriptorImageInfo> imageInfos;
imageInfos.reserve(128);

bufferInfos.reserve(bindings->bufferCount);
imageInfos.reserve(bindings->imageCount + bindings->samplerCount);
asInfos.reserve(bindings->tlasCount);

GbSmallVector<VkWriteDescriptorSet, 128> writeDescriptorSets;
std::vector<VkWriteDescriptorSet> writeDescriptorSets;
writeDescriptorSets.reserve(128);

/* FIXME: this has a rather high complexity */
for (uint32_t i = 0; i < ipipeline->descriptorSetLayoutBindings.size(); i++)
Expand Down Expand Up @@ -3013,7 +3021,8 @@ namespace gtl
CGPU_RESOLVE_OR_RETURN_COMMAND_BUFFER(commandBuffer, icommandBuffer);
CGPU_RESOLVE_OR_RETURN_DEVICE(icommandBuffer->device, idevice);

GbSmallVector<VkMemoryBarrier2KHR, 128> vkMemBarriers;
std::vector<VkMemoryBarrier2KHR> vkMemBarriers;
vkMemBarriers.reserve(128);

for (uint32_t i = 0; i < barrier->memoryBarrierCount; ++i)
{
Expand All @@ -3030,8 +3039,10 @@ namespace gtl
vkMemBarriers.push_back(bVk);
}

GbSmallVector<VkBufferMemoryBarrier2KHR, 32> vkBufferMemBarriers;
GbSmallVector<VkImageMemoryBarrier2KHR, 128> vkImageMemBarriers;
std::vector<VkBufferMemoryBarrier2KHR> vkBufferMemBarriers;
vkBufferMemBarriers.reserve(16);
std::vector<VkImageMemoryBarrier2KHR> vkImageMemBarriers;
vkImageMemBarriers.reserve(128);

for (uint32_t i = 0; i < barrier->bufferBarrierCount; ++i)
{
Expand Down
2 changes: 1 addition & 1 deletion src/cgpu/impl/ShaderReflection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace gtl
}

bool result = false;
GbSmallVector<SpvReflectDescriptorBinding*, 32> bindings;
std::vector<SpvReflectDescriptorBinding*> bindings;

uint32_t bindingCount;
if (spvReflectEnumerateDescriptorBindings(&shaderModule, &bindingCount, nullptr) != SPV_REFLECT_RESULT_SUCCESS)
Expand Down
4 changes: 2 additions & 2 deletions src/cgpu/impl/ShaderReflection.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#pragma once

#include <stdint.h>
#include <gtl/gb/SmallVector.h>
#include <vector>

namespace gtl
{
Expand All @@ -34,7 +34,7 @@ namespace gtl
struct CgpuShaderReflection
{
uint32_t pushConstantsSize;
GbSmallVector<CgpuShaderReflectionBinding, 32> bindings;
std::vector<CgpuShaderReflectionBinding> bindings;
};

bool cgpuReflectShader(const uint32_t* spv, uint64_t size, CgpuShaderReflection* reflection);
Expand Down
9 changes: 5 additions & 4 deletions src/gb/gtl/gb/HandleStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>

#include "SmallVector.h"
#include <vector>

namespace gtl
{
class GbHandleStore
{
public:
GbHandleStore();

uint64_t allocateHandle();

bool isHandleValid(uint64_t handle) const;
Expand All @@ -36,7 +37,7 @@ namespace gtl

private:
uint32_t m_maxIndex = 0;
GbSmallVector<uint32_t, 1024> m_versions;
GbSmallVector<uint32_t, 64> m_freeList;
std::vector<uint32_t> m_versions;
std::vector<uint32_t> m_freeList;
};
}
6 changes: 6 additions & 0 deletions src/gb/impl/HandleStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@

namespace gtl
{
GbHandleStore::GbHandleStore()
{
m_versions.reserve(1024);
m_freeList.reserve(32);
}

uint64_t GbHandleStore::allocateHandle()
{
uint32_t index;
Expand Down

0 comments on commit 4eca1bb

Please sign in to comment.