Skip to content

Commit

Permalink
Implemented crude memory allocator for Vulkan (#1429)
Browse files Browse the repository at this point in the history
This is enough to stay well below the 4096 allocation limit
  • Loading branch information
jrouwe authored Dec 30, 2024
1 parent c6157d4 commit 636b080
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 43 deletions.
20 changes: 3 additions & 17 deletions TestFramework/Renderer/VK/BufferVK.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,12 @@
class BufferVK
{
public:
/// Free memory associated with a buffer
void Free(VkDevice inDevice)
{
if (mBuffer != VK_NULL_HANDLE)
{
vkDestroyBuffer(inDevice, mBuffer, nullptr);
mBuffer = VK_NULL_HANDLE;
}

if (mMemory != VK_NULL_HANDLE)
{
vkFreeMemory(inDevice, mMemory, nullptr);
mMemory = VK_NULL_HANDLE;
}
}

VkBuffer mBuffer = VK_NULL_HANDLE;
VkDeviceMemory mMemory = VK_NULL_HANDLE;
VkDeviceSize mOffset = 0;
VkDeviceSize mSize = 0;

VkBufferUsageFlags mUsage;
VkMemoryPropertyFlags mProperties;
VkDeviceSize mSize = 0;
VkDeviceSize mAllocatedSize;
};
2 changes: 1 addition & 1 deletion TestFramework/Renderer/VK/ConstantBufferVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ConstantBufferVK::~ConstantBufferVK()
void *ConstantBufferVK::MapInternal()
{
void *data = nullptr;
FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mBuffer.mMemory, 0, mBuffer.mSize, 0, &data));
FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mBuffer.mMemory, mBuffer.mOffset, mBuffer.mSize, 0, &data));
return data;
}

Expand Down
2 changes: 1 addition & 1 deletion TestFramework/Renderer/VK/RenderInstancesVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void RenderInstancesVK::CreateBuffer(int inNumInstances, int inInstanceSize)
void *RenderInstancesVK::Lock()
{
void *data;
FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mInstancesBuffer.mMemory, 0, mInstancesBuffer.mSize, 0, &data));
FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mInstancesBuffer.mMemory, mInstancesBuffer.mOffset, mInstancesBuffer.mSize, 0, &data));
return data;
}

Expand Down
4 changes: 2 additions & 2 deletions TestFramework/Renderer/VK/RenderPrimitiveVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void *RenderPrimitiveVK::LockVertexBuffer()
JPH_ASSERT(!mVertexBufferDeviceLocal);

void *data;
FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mVertexBuffer.mMemory, 0, VkDeviceSize(mNumVtx) * mVtxSize, 0, &data));
FatalErrorIfFailed(vkMapMemory(mRenderer->GetDevice(), mVertexBuffer.mMemory, mVertexBuffer.mOffset, VkDeviceSize(mNumVtx) * mVtxSize, 0, &data));
return data;
}

Expand Down Expand Up @@ -70,7 +70,7 @@ uint32 *RenderPrimitiveVK::LockIndexBuffer()
JPH_ASSERT(!mIndexBufferDeviceLocal);

void *data;
vkMapMemory(mRenderer->GetDevice(), mIndexBuffer.mMemory, 0, VkDeviceSize(mNumIdx) * sizeof(uint32), 0, &data);
vkMapMemory(mRenderer->GetDevice(), mIndexBuffer.mMemory, mIndexBuffer.mOffset, VkDeviceSize(mNumIdx) * sizeof(uint32), 0, &data);
return reinterpret_cast<uint32 *>(data);
}

Expand Down
116 changes: 98 additions & 18 deletions TestFramework/Renderer/VK/RendererVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ RendererVK::~RendererVK()
{
vkDeviceWaitIdle(mDevice);

// Trace allocation stats
Trace("VK: Max allocations: %u, max size: %u MB", mMaxNumAllocations, uint32(mMaxTotalAllocated >> 20));

// Destroy the shadow map
mShadowMap = nullptr;
vkDestroyFramebuffer(mDevice, mShadowFrameBuffer, nullptr);
Expand All @@ -50,16 +53,22 @@ RendererVK::~RendererVK()
cb = nullptr;
for (unique_ptr<ConstantBufferVK> &cb : mPixelShaderConstantBuffer)
cb = nullptr;

// Free all buffers
for (BufferCache &bc : mFreedBuffers)
for (BufferCache::value_type &vt : bc)
for (BufferVK &bvk : vt.second)
bvk.Free(mDevice);
FreeBufferInternal(bvk);
for (BufferCache::value_type &vt : mBufferCache)
for (BufferVK &bvk : vt.second)
bvk.Free(mDevice);
FreeBufferInternal(bvk);

// Free all blocks in the memory cache
for (MemoryCache::value_type &mc : mMemoryCache)
for (Memory &m : mc.second)
if (m.mOffset == 0)
vkFreeMemory(mDevice, m.mMemory, nullptr); // Don't care about memory tracking anymore

for (VkFence fence : mInFlightFences)
vkDestroyFence(mDevice, fence, nullptr);

Expand Down Expand Up @@ -735,8 +744,8 @@ void RendererVK::DestroySwapChain()
if (mDepthImageView != VK_NULL_HANDLE)
{
vkDestroyImageView(mDevice, mDepthImageView, nullptr);
vkDestroyImage(mDevice, mDepthImage, nullptr);
vkFreeMemory(mDevice, mDepthImageMemory, nullptr);

DestroyImage(mDepthImage, mDepthImageMemory);
}

for (VkFramebuffer frame_buffer : mSwapChainFramebuffers)
Expand Down Expand Up @@ -794,7 +803,7 @@ void RendererVK::BeginFrame(const CameraState &inCamera, float inWorldScale)
// Free buffers that weren't used this frame
for (BufferCache::value_type &vt : mBufferCache)
for (BufferVK &bvk : vt.second)
bvk.Free(mDevice);
FreeBufferInternal(bvk);
mBufferCache.clear();

// Recycle the buffers that were freed
Expand Down Expand Up @@ -986,6 +995,32 @@ uint32 RendererVK::FindMemoryType(uint32 inTypeFilter, VkMemoryPropertyFlags inP
FatalError("Failed to find memory type!");
}

void RendererVK::AllocateMemory(VkDeviceSize inSize, uint32 inMemoryTypeBits, VkMemoryPropertyFlags inProperties, VkDeviceMemory &outMemory)
{
VkMemoryAllocateInfo alloc_info = {};
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
alloc_info.allocationSize = inSize;
alloc_info.memoryTypeIndex = FindMemoryType(inMemoryTypeBits, inProperties);
FatalErrorIfFailed(vkAllocateMemory(mDevice, &alloc_info, nullptr, &outMemory));

// Track allocation
++mNumAllocations;
mTotalAllocated += inSize;

// Track max usage
mMaxTotalAllocated = max(mMaxTotalAllocated, mTotalAllocated);
mMaxNumAllocations = max(mMaxNumAllocations, mNumAllocations);
}

void RendererVK::FreeMemory(VkDeviceMemory inMemory, VkDeviceSize inSize)
{
vkFreeMemory(mDevice, inMemory, nullptr);

// Track free
--mNumAllocations;
mTotalAllocated -= inSize;
}

void RendererVK::CreateBuffer(VkDeviceSize inSize, VkBufferUsageFlags inUsage, VkMemoryPropertyFlags inProperties, BufferVK &outBuffer)
{
// Check the cache
Expand All @@ -1012,14 +1047,40 @@ void RendererVK::CreateBuffer(VkDeviceSize inSize, VkBufferUsageFlags inUsage, V
VkMemoryRequirements mem_requirements;
vkGetBufferMemoryRequirements(mDevice, outBuffer.mBuffer, &mem_requirements);

VkMemoryAllocateInfo alloc_info = {};
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
alloc_info.allocationSize = mem_requirements.size;
alloc_info.memoryTypeIndex = FindMemoryType(mem_requirements.memoryTypeBits, inProperties);
if (mem_requirements.size > cMaxAllocSize)
{
// Allocate block directly
AllocateMemory(mem_requirements.size, mem_requirements.memoryTypeBits, inProperties, outBuffer.mMemory);
outBuffer.mAllocatedSize = mem_requirements.size;
outBuffer.mOffset = 0;
}
else
{
// Round allocation to the next power of 2 so that we can use a simple block based allocator
outBuffer.mAllocatedSize = max(VkDeviceSize(GetNextPowerOf2(uint32(mem_requirements.size))), cMinAllocSize);

// Ensure that we have memory available from the right pool
Array<Memory> &mem_array = mMemoryCache[{ outBuffer.mAllocatedSize, outBuffer.mUsage, outBuffer.mProperties }];
if (mem_array.empty())
{
// Allocate a bigger block
VkDeviceMemory device_memory;
AllocateMemory(cBlockSize, mem_requirements.memoryTypeBits, inProperties, device_memory);

FatalErrorIfFailed(vkAllocateMemory(mDevice, &alloc_info, nullptr, &outBuffer.mMemory));
// Divide into sub blocks
for (VkDeviceSize offset = 0; offset < cBlockSize; offset += outBuffer.mAllocatedSize)
mem_array.push_back({ device_memory, offset });
}

vkBindBufferMemory(mDevice, outBuffer.mBuffer, outBuffer.mMemory, 0);
// Claim memory from the pool
Memory &memory = mem_array.back();
outBuffer.mMemory = memory.mMemory;
outBuffer.mOffset = memory.mOffset;
mem_array.pop_back();
}

// Bind the memory to the buffer
vkBindBufferMemory(mDevice, outBuffer.mBuffer, outBuffer.mMemory, outBuffer.mOffset);
}

VkCommandBuffer RendererVK::StartTempCommandBuffer()
Expand Down Expand Up @@ -1073,7 +1134,7 @@ void RendererVK::CreateDeviceLocalBuffer(const void *inData, VkDeviceSize inSize
CreateBuffer(inSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, staging_buffer);

void *data;
vkMapMemory(mDevice, staging_buffer.mMemory, 0, inSize, 0, &data);
vkMapMemory(mDevice, staging_buffer.mMemory, staging_buffer.mOffset, inSize, 0, &data);
memcpy(data, inData, (size_t)inSize);
vkUnmapMemory(mDevice, staging_buffer.mMemory);

Expand All @@ -1093,6 +1154,19 @@ void RendererVK::FreeBuffer(BufferVK &ioBuffer)
}
}

void RendererVK::FreeBufferInternal(BufferVK &ioBuffer)
{
// Destroy the buffer
vkDestroyBuffer(mDevice, ioBuffer.mBuffer, nullptr);
ioBuffer.mBuffer = VK_NULL_HANDLE;

if (ioBuffer.mAllocatedSize > cMaxAllocSize)
FreeMemory(ioBuffer.mMemory, ioBuffer.mAllocatedSize);
else
mMemoryCache[{ ioBuffer.mAllocatedSize, ioBuffer.mUsage, ioBuffer.mProperties }].push_back({ ioBuffer.mMemory, ioBuffer.mOffset });
ioBuffer.mMemory = VK_NULL_HANDLE;
}

unique_ptr<ConstantBufferVK> RendererVK::CreateConstantBuffer(VkDeviceSize inBufferSize)
{
return make_unique<ConstantBufferVK>(this, inBufferSize);
Expand Down Expand Up @@ -1136,15 +1210,21 @@ void RendererVK::CreateImage(uint32 inWidth, uint32 inHeight, VkFormat inFormat,
VkMemoryRequirements mem_requirements;
vkGetImageMemoryRequirements(mDevice, outImage, &mem_requirements);

VkMemoryAllocateInfo alloc_info = {};
alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
alloc_info.allocationSize = mem_requirements.size;
alloc_info.memoryTypeIndex = FindMemoryType(mem_requirements.memoryTypeBits, inProperties);
FatalErrorIfFailed(vkAllocateMemory(mDevice, &alloc_info, nullptr, &outMemory));
AllocateMemory(mem_requirements.size, mem_requirements.memoryTypeBits, inProperties, outMemory);

vkBindImageMemory(mDevice, outImage, outMemory, 0);
}

void RendererVK::DestroyImage(VkImage inImage, VkDeviceMemory inMemory)
{
VkMemoryRequirements mem_requirements;
vkGetImageMemoryRequirements(mDevice, inImage, &mem_requirements);

vkDestroyImage(mDevice, inImage, nullptr);

FreeMemory(inMemory, mem_requirements.size);
}

void RendererVK::UpdateViewPortAndScissorRect(uint32 inWidth, uint32 inHeight)
{
VkCommandBuffer command_buffer = GetCommandBuffer();
Expand Down
29 changes: 28 additions & 1 deletion TestFramework/Renderer/VK/RendererVK.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,21 @@ class RendererVK : public Renderer
VkCommandBuffer GetCommandBuffer() { JPH_ASSERT(mInFrame); return mCommandBuffers[mFrameIndex]; }
VkCommandBuffer StartTempCommandBuffer();
void EndTempCommandBuffer(VkCommandBuffer inCommandBuffer);
void AllocateMemory(VkDeviceSize inSize, uint32 inMemoryTypeBits, VkMemoryPropertyFlags inProperties, VkDeviceMemory &outMemory);
void FreeMemory(VkDeviceMemory inMemory, VkDeviceSize inSize);
void CreateBuffer(VkDeviceSize inSize, VkBufferUsageFlags inUsage, VkMemoryPropertyFlags inProperties, BufferVK &outBuffer);
void CopyBuffer(VkBuffer inSrc, VkBuffer inDst, VkDeviceSize inSize);
void CreateDeviceLocalBuffer(const void *inData, VkDeviceSize inSize, VkBufferUsageFlags inUsage, BufferVK &outBuffer);
void FreeBuffer(BufferVK &ioBuffer);
unique_ptr<ConstantBufferVK> CreateConstantBuffer(VkDeviceSize inBufferSize);
void CreateImage(uint32 inWidth, uint32 inHeight, VkFormat inFormat, VkImageTiling inTiling, VkImageUsageFlags inUsage, VkMemoryPropertyFlags inProperties, VkImage &outImage, VkDeviceMemory &outMemory);
void DestroyImage(VkImage inImage, VkDeviceMemory inMemory);
VkImageView CreateImageView(VkImage inImage, VkFormat inFormat, VkImageAspectFlags inAspectFlags);
VkFormat FindDepthFormat();

private:
uint32 FindMemoryType(uint32 inTypeFilter, VkMemoryPropertyFlags inProperties);
void FreeBufferInternal(BufferVK &ioBuffer);
VkSurfaceFormatKHR SelectFormat(VkPhysicalDevice inDevice);
void CreateSwapChain(VkPhysicalDevice inDevice);
void DestroySwapChain();
Expand Down Expand Up @@ -103,7 +107,7 @@ class RendererVK : public Renderer
unique_ptr<ConstantBufferVK> mVertexShaderConstantBufferProjection[cFrameCount];
unique_ptr<ConstantBufferVK> mVertexShaderConstantBufferOrtho[cFrameCount];
unique_ptr<ConstantBufferVK> mPixelShaderConstantBuffer[cFrameCount];

struct Key
{
bool operator == (const Key &inRHS) const
Expand All @@ -118,8 +122,31 @@ class RendererVK : public Renderer

JPH_MAKE_HASH_STRUCT(Key, KeyHasher, t.mSize, t.mUsage, t.mProperties)

// We try to recycle buffers from frame to frame
using BufferCache = UnorderedMap<Key, Array<BufferVK>, KeyHasher>;

BufferCache mFreedBuffers[cFrameCount];
BufferCache mBufferCache;

// Smaller allocations (from cMinAllocSize to cMaxAllocSize) will be done in blocks of cBlockSize bytes.
// We do this because there is a limit to the number of allocations that we can make in Vulkan.
static constexpr VkDeviceSize cMinAllocSize = 512;
static constexpr VkDeviceSize cMaxAllocSize = 65536;
static constexpr VkDeviceSize cBlockSize = 524288;

JPH_MAKE_HASH_STRUCT(Key, MemKeyHasher, t.mUsage, t.mProperties, t.mSize)

struct Memory
{
VkDeviceMemory mMemory;
VkDeviceSize mOffset;
};

using MemoryCache = UnorderedMap<Key, Array<Memory>, KeyHasher>;

MemoryCache mMemoryCache;
uint32 mNumAllocations = 0;
uint32 mMaxNumAllocations = 0;
VkDeviceSize mTotalAllocated = 0;
VkDeviceSize mMaxTotalAllocated = 0;
};
6 changes: 3 additions & 3 deletions TestFramework/Renderer/VK/TextureVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ TextureVK::TextureVK(RendererVK *inRenderer, const Surface *inSurface) :
// Copy data to upload texture
surface->Lock(ESurfaceLockMode::Read);
void *data;
vkMapMemory(device, staging_buffer.mMemory, 0, image_size, 0, &data);
vkMapMemory(device, staging_buffer.mMemory, staging_buffer.mOffset, image_size, 0, &data);
for (int y = 0; y < mHeight; ++y)
memcpy(reinterpret_cast<uint8 *>(data) + y * mWidth * bpp, surface->GetData() + y * surface->GetStride(), mWidth * bpp);
vkUnmapMemory(device, staging_buffer.mMemory);
Expand Down Expand Up @@ -109,8 +109,8 @@ TextureVK::~TextureVK()
vkDeviceWaitIdle(device);

vkDestroyImageView(device, mImageView, nullptr);
vkDestroyImage(device, mImage, nullptr);
vkFreeMemory(device, mImageMemory, nullptr);

mRenderer->DestroyImage(mImage, mImageMemory);
}
}

Expand Down

0 comments on commit 636b080

Please sign in to comment.