]> git.tdb.fi Git - libs/gl.git/commitdiff
Overhaul the Vulkan backend's memory allocator
authorMikko Rasa <tdb@tdb.fi>
Tue, 25 Jan 2022 17:42:35 +0000 (19:42 +0200)
committerMikko Rasa <tdb@tdb.fi>
Tue, 25 Jan 2022 17:42:35 +0000 (19:42 +0200)
It now sub-allocates from larger chunks of memory to avoid hitting
Vulkan's allocation count limit.

source/backends/vulkan/memoryallocator.cpp
source/backends/vulkan/memoryallocator.h

index b15344c7aeb39280a2f9c5d2e26ef765314687e0..227b54b6595904041cdff1d77c81b6944e0fa3b2 100644 (file)
@@ -20,52 +20,220 @@ MemoryAllocator::MemoryAllocator(Device &d):
        VkPhysicalDeviceMemoryProperties mem_props;
        vk.GetPhysicalDeviceMemoryProperties(mem_props);
 
+       for(unsigned i=0; i<mem_props.memoryHeapCount; ++i)
+               if(mem_props.memoryHeaps[i].flags&VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
+                       total_device_memory += mem_props.memoryHeaps[i].size;
+
+       default_region_size = total_device_memory/256;
+       default_region_size -= default_region_size%min_alignment;
+       direct_alloc_threshold = default_region_size/4;
+
        const VkMemoryPropertyFlags host_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-       memory_types.reserve(mem_props.memoryTypeCount);
+       pools.resize(mem_props.memoryTypeCount);
        for(unsigned i=0; i<mem_props.memoryTypeCount; ++i)
        {
                VkMemoryPropertyFlags flags = mem_props.memoryTypes[i].propertyFlags;
-               MemoryType type = UNKNOWN_MEMORY;
                if(flags&VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
                {
                        if((flags&host_flags)==host_flags)
-                               type = STREAMING_MEMORY;
+                               pools[i].type = STREAMING_MEMORY;
                        else
-                               type = DEVICE_MEMORY;
+                               pools[i].type = DEVICE_MEMORY;
                }
                else if((flags&host_flags)==host_flags)
-                       type = STAGING_MEMORY;
-               memory_types.push_back(type);
+                       pools[i].type = STAGING_MEMORY;
        }
 }
 
-unsigned MemoryAllocator::find_memory_type_index(unsigned mask, MemoryType type)
+MemoryAllocator::~MemoryAllocator()
+{
+       const VulkanFunctions &vk = device.get_functions();
+
+       for(Region &r: regions)
+               if(r.memory)
+                       vk.FreeMemory(r.memory);
+}
+
+unsigned MemoryAllocator::find_memory_pool(unsigned mask, MemoryType type)
 {
-       for(unsigned i=0; i<memory_types.size(); ++i)
-               if((mask&(1<<i)) && memory_types[i]==type)
+       for(unsigned i=0; i<pools.size(); ++i)
+               if((mask&(1<<i)) && pools[i].type==type)
                        return i;
        if(type==DEVICE_MEMORY || type==STAGING_MEMORY)
-               return find_memory_type_index(mask, STREAMING_MEMORY);
+               return find_memory_pool(mask, STREAMING_MEMORY);
        throw runtime_error("Unable to find suitable memory type");
 }
 
-unsigned MemoryAllocator::allocate(size_t size, unsigned type_bits, MemoryType type)
+unsigned MemoryAllocator::create_region(unsigned pool_index, size_t size, bool direct)
 {
        const VulkanFunctions &vk = device.get_functions();
 
        VkMemoryAllocateInfo alloc_info = { };
        alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
        alloc_info.allocationSize = size;
-       alloc_info.memoryTypeIndex = find_memory_type_index(type_bits, type);
+       alloc_info.memoryTypeIndex = pool_index;
+
+       Region region;
+       vk.AllocateMemory(alloc_info, region.memory);
+
+       region.pool = pool_index;
+       region.direct = direct;
+       region.size = size;
+       regions.push_back(region);
+
+       return regions.size()-1;
+}
+
+vector<unsigned>::iterator MemoryAllocator::lower_bound_by_size(vector<unsigned> &indices, size_t size)
+{
+       return lower_bound(indices, size, [this](unsigned j, unsigned s){ return blocks[j].size<s; });
+}
+
+unsigned MemoryAllocator::allocate(size_t size, size_t align, unsigned type_bits, MemoryType type)
+{
+       unsigned pool_index = find_memory_pool(type_bits, type);
+       Pool &pool = pools[pool_index];
+
+       if(size>=direct_alloc_threshold)
+       {
+               Block block;
+               block.region = create_region(pool_index, size, true);
+               block.size = size;
+               block.allocated = true;
+
+               blocks.push_back(block);
+               return blocks.size()-1;
+       }
 
-       Allocation alloc;
-       vk.AllocateMemory(alloc_info, alloc.memory);
+       if(pool.can_consolidate && blocks[pool.free_blocks.back()].size<size+align)
+               consolidate(pool_index);
 
-       alloc.type = type;
-       alloc.size = size;
-       allocations.push_back(alloc);
+       auto i = lower_bound_by_size(pool.free_blocks, size);
+       for(; i!=pool.free_blocks.end(); ++i)
+       {
+               Block &block = blocks[*i];
+               size_t offset = align-1-(block.offset+align-1)%align;
+               if(offset+size<=block.size)
+                       break;
+       }
+
+       unsigned block_index;
+       if(i!=pool.free_blocks.end())
+       {
+               block_index = *i;
+               pool.free_blocks.erase(i);
+               if(pool.free_blocks.empty())
+                       pool.can_consolidate = false;
+       }
+       else
+       {
+               Block block;
+               block.region = create_region(pool_index, default_region_size, false);
+               block.size = default_region_size;
+
+               blocks.push_back(block);
+               block_index = blocks.size()-1;
+       }
+
+       size_t offset = align-1-(blocks[block_index].offset+align-1)%align;
+       if(offset)
+       {
+               unsigned head_index = block_index;
+               block_index = split_block(block_index, offset);
+               pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[head_index].size), head_index);
+       }
+
+       size += min_alignment-1;
+       size -= size%min_alignment;
+       if(blocks[block_index].size>=size+min_alignment)
+       {
+               unsigned tail_index = split_block(block_index, size);
+               pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[tail_index].size), tail_index);
+       }
+
+       blocks[block_index].allocated = true;
 
-       return allocations.size()-1;
+       return block_index;
+}
+
+unsigned MemoryAllocator::split_block(unsigned index, size_t head_size)
+{
+       blocks.emplace_back();
+       Block &block = blocks[index];
+       Block &tail = blocks.back();
+       unsigned tail_index = blocks.size()-1;
+
+       tail.region = block.region;
+       tail.offset = block.offset+head_size;
+       tail.size = block.size-head_size;
+       tail.prev = index;
+       tail.next = block.next;
+
+       block.size = head_size;
+       block.next = tail_index;
+
+       return tail_index;
+}
+
+void MemoryAllocator::consolidate(unsigned pool_index)
+{
+       Pool &pool = pools[pool_index];
+
+       vector<unsigned> merged_blocks;
+       unsigned i = 0;
+       for(unsigned j=0; j<pool.free_blocks.size(); ++j)
+       {
+               unsigned block_index = pool.free_blocks[j];
+               Block &block = blocks[block_index];
+               if(!block.allocated)
+               {
+                       if(block.prev<0 || blocks[block.prev].allocated)
+                       {
+                               if(block.next>=0 && !blocks[block.next].allocated)
+                               {
+                                       merge_block_with_next(block_index);
+
+                                       while(block.next>=0 && !blocks[block.next].allocated)
+                                               merge_block_with_next(block_index);
+
+                                       merged_blocks.insert(lower_bound_by_size(merged_blocks, block.size), block_index);
+                               }
+                       }
+                       else
+                               continue;
+               }
+
+               if(j!=i)
+                       pool.free_blocks[i] = block_index;
+               ++i;
+       }
+
+       pool.free_blocks.resize(i+merged_blocks.size());
+
+       if(!merged_blocks.empty())
+       {
+               unsigned j = merged_blocks.size();
+               for(unsigned k=pool.free_blocks.size()-1; j; --k)
+               {
+                       if(!i || blocks[merged_blocks[j-1]].size>blocks[pool.free_blocks[i-1]].size)
+                               pool.free_blocks[k] = merged_blocks[--j];
+                       else
+                               pool.free_blocks[k] = pool.free_blocks[--i];
+               }
+       }
+}
+
+void MemoryAllocator::merge_block_with_next(unsigned index)
+{
+       Block &block = blocks[index];
+
+       Block &next = blocks[block.next];
+       block.size += next.size;
+       block.next = next.next;
+       if(block.next>=0)
+               blocks[block.next].prev = index;
+
+       next = Block();
 }
 
 unsigned MemoryAllocator::allocate(VkBuffer buffer, MemoryType type)
@@ -75,11 +243,12 @@ unsigned MemoryAllocator::allocate(VkBuffer buffer, MemoryType type)
        VkMemoryRequirements requirements;
        vk.GetBufferMemoryRequirements(buffer, requirements);
 
-       unsigned index = allocate(requirements.size, requirements.memoryTypeBits, type);
+       unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type);
 
-       vk.BindBufferMemory(buffer, allocations[index].memory, 0);
+       Block &block = blocks[block_index];
+       vk.BindBufferMemory(buffer, regions[block.region].memory, block.offset);
 
-       return index+1;
+       return block_index+1;
 }
 
 unsigned MemoryAllocator::allocate(VkImage image, MemoryType type)
@@ -89,50 +258,75 @@ unsigned MemoryAllocator::allocate(VkImage image, MemoryType type)
        VkMemoryRequirements requirements;
        vk.GetImageMemoryRequirements(image, requirements);
 
-       unsigned index = allocate(requirements.size, requirements.memoryTypeBits, type);
+       unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type);
 
-       vk.BindImageMemory(image, allocations[index].memory, 0);
+       Block &block = blocks[block_index];
+       vk.BindImageMemory(image, regions[block.region].memory, block.offset);
 
-       return index+1;
+       return block_index+1;
 }
 
 void MemoryAllocator::release(unsigned id)
 {
-       if(!id || id>allocations.size() || !allocations[id-1].memory)
+       if(!id || id>blocks.size() || !blocks[id-1].allocated)
                throw key_error(id);
 
-       const VulkanFunctions &vk = device.get_functions();
+       unsigned block_index = id-1;
+       Block &block = blocks[block_index];
+
+       block.allocated = false;
 
-       vk.FreeMemory(allocations[id-1].memory);
+       Region &region = regions[block.region];
+       if(region.direct)
+       {
+               const VulkanFunctions &vk = device.get_functions();
+
+               vk.FreeMemory(region.memory);
+               region = Region();
+               block = Block();
+               return;
+       }
+
+       Pool &pool = pools[region.pool];
+       pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, block.size), block_index);
+       if((block.prev>=0 && !blocks[block.prev].allocated) || (block.next>=0 && !blocks[block.next].allocated))
+               pool.can_consolidate = true;
 }
 
 void *MemoryAllocator::map(unsigned id)
 {
-       if(!id || id>allocations.size() || !allocations[id-1].memory)
+       if(!id || id>blocks.size() || !blocks[id-1].allocated)
                throw key_error(id);
 
-       Allocation &alloc = allocations[id-1];
-
-       const VulkanFunctions &vk = device.get_functions();
+       Block &block = blocks[id-1];
+       Region &region = regions[block.region];
+       if(!region.mapped_address)
+       {
+               const VulkanFunctions &vk = device.get_functions();
+               vk.MapMemory(region.memory, 0, region.size, 0, &region.mapped_address);
+       }
 
-       vk.MapMemory(alloc.memory, 0, alloc.size, 0, &alloc.mapped_address);
+       ++region.map_count;
 
-       return alloc.mapped_address;
+       return static_cast<char *>(region.mapped_address)+block.offset;
 }
 
 void MemoryAllocator::unmap(unsigned id)
 {
-       if(!id || id>allocations.size() || !allocations[id-1].memory)
+       if(!id || id>blocks.size() || !blocks[id-1].allocated)
                throw key_error(id);
 
-       Allocation &alloc = allocations[id-1];
-       if(!alloc.mapped_address)
-               throw invalid_operation("MemoryAllocator::unmap");
+       Block &block = blocks[id-1];
+       Region &region = regions[block.region];
 
-       const VulkanFunctions &vk = device.get_functions();
-
-       vk.UnmapMemory(alloc.memory);
-       alloc.mapped_address = 0;
+       if(!regions[block.region].mapped_address)
+               throw invalid_operation("MemoryAllocator::unmap");
+       else if(!--region.map_count)
+       {
+               const VulkanFunctions &vk = device.get_functions();
+               vk.UnmapMemory(region.memory);
+               region.mapped_address = 0;
+       }
 }
 
 } // namespace GL
index 230f21a202126d176034f576981426eddab9f281..272f54e0ed79d87cd73180702558a5c0ee507b09 100644 (file)
@@ -2,6 +2,7 @@
 #define MSP_GL_VULKAN_MEMORYALLOCATOR_H_
 
 #include <vector>
+#include <msp/core/noncopyable.h>
 #include <msp/graphics/vulkancontext.h>
 #include "handles.h"
 
@@ -18,28 +19,58 @@ enum MemoryType
        STREAMING_MEMORY
 };
 
-class MemoryAllocator
+class MemoryAllocator: public NonCopyable
 {
 private:
-       struct Allocation
+       struct Pool
        {
-               VkDeviceMemory memory = 0;
                MemoryType type = UNKNOWN_MEMORY;
+               std::vector<unsigned> free_blocks;
+               bool can_consolidate = false;
+       };
+
+       struct Region
+       {
+               int pool = -1;
+               bool direct = false;
+               VkDeviceMemory memory = 0;
                std::size_t size = 0;
                void *mapped_address = 0;
+               unsigned map_count = 0;
+       };
+
+       struct Block
+       {
+               int region = -1;
+               bool allocated = false;
+               std::size_t offset = 0;
+               std::size_t size = 0;
+               int prev = -1;
+               int next = -1;
        };
 
        Device &device;
        VkPhysicalDevice phys_device;
-       std::vector<MemoryType> memory_types;
-       std::vector<Allocation> allocations;
+       std::size_t total_device_memory = 0;
+       std::size_t default_region_size = 0;
+       std::size_t direct_alloc_threshold = 0;
+       std::size_t min_alignment = 256;
+       std::vector<Pool> pools;
+       std::vector<Region> regions;
+       std::vector<Block> blocks;
 
 public:
        MemoryAllocator(Device &);
+       ~MemoryAllocator();
 
 private:
-       unsigned find_memory_type_index(unsigned, MemoryType);
-       unsigned allocate(std::size_t, unsigned, MemoryType);
+       unsigned find_memory_pool(unsigned, MemoryType);
+       unsigned create_region(unsigned, size_t, bool);
+       std::vector<unsigned>::iterator lower_bound_by_size(std::vector<unsigned> &, std::size_t);
+       unsigned allocate(std::size_t, std::size_t, unsigned, MemoryType);
+       unsigned split_block(unsigned, std::size_t);
+       void consolidate(unsigned);
+       void merge_block_with_next(unsigned);
 
 public:
        unsigned allocate(VkBuffer, MemoryType);