]> git.tdb.fi Git - libs/gl.git/blobdiff - source/backends/vulkan/memoryallocator.cpp
Minor tweaks to MemoryAllocator
[libs/gl.git] / source / backends / vulkan / memoryallocator.cpp
index 6ef5d2bd7a6b8714fb545997819abb11e1995fa9..82155faf41624ef03829cd994ebbbfc2eab246ab 100644 (file)
@@ -1,5 +1,8 @@
 #include <msp/core/algorithm.h>
+#include <msp/core/maputils.h>
 #include <msp/graphics/vulkancontext_platform.h>
+#include <msp/stringcodec/utf8.h>
+#include <msp/strings/format.h>
 #include "device.h"
 #include "error.h"
 #include "memoryallocator.h"
@@ -19,62 +22,220 @@ MemoryAllocator::MemoryAllocator(Device &d):
        VkPhysicalDeviceMemoryProperties mem_props;
        vk.GetPhysicalDeviceMemoryProperties(mem_props);
 
+       for(unsigned i=0; i<mem_props.memoryHeapCount; ++i)
+               if(mem_props.memoryHeaps[i].flags&VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
+                       total_device_memory += mem_props.memoryHeaps[i].size;
+
+       default_region_size = total_device_memory/256;
+       default_region_size -= default_region_size%min_alignment;
+       direct_alloc_threshold = default_region_size/4;
+
        const VkMemoryPropertyFlags host_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-       memory_types.reserve(mem_props.memoryTypeCount);
+       pools.resize(mem_props.memoryTypeCount);
        for(unsigned i=0; i<mem_props.memoryTypeCount; ++i)
        {
                VkMemoryPropertyFlags flags = mem_props.memoryTypes[i].propertyFlags;
-               MemoryType type = UNKNOWN_MEMORY;
                if(flags&VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
                {
                        if((flags&host_flags)==host_flags)
-                               type = STREAMING_MEMORY;
+                               pools[i].type = STREAMING_MEMORY;
                        else
-                               type = DEVICE_MEMORY;
+                               pools[i].type = DEVICE_MEMORY;
                }
                else if((flags&host_flags)==host_flags)
-                       type = STAGING_MEMORY;
-               memory_types.push_back(type);
+                       pools[i].type = STAGING_MEMORY;
        }
 }
 
-unsigned MemoryAllocator::find_memory_type_index(unsigned mask, MemoryType type)
+MemoryAllocator::~MemoryAllocator()
 {
-       for(unsigned i=0; i<memory_types.size(); ++i)
-               if((mask&(1<<i)) && memory_types[i]==type)
+       const VulkanFunctions &vk = device.get_functions();
+
+       for(Region &r: regions)
+               if(r.memory)
+                       vk.FreeMemory(r.memory);
+}
+
+unsigned MemoryAllocator::find_memory_pool(unsigned mask, MemoryType type) const
+{
+       for(unsigned i=0; i<pools.size(); ++i)
+               if((mask&(1<<i)) && pools[i].type==type)
                        return i;
        if(type==DEVICE_MEMORY || type==STAGING_MEMORY)
-               return find_memory_type_index(mask, STREAMING_MEMORY);
+               return find_memory_pool(mask, STREAMING_MEMORY);
        throw runtime_error("Unable to find suitable memory type");
 }
 
-unsigned MemoryAllocator::allocate(size_t size, unsigned type_bits, MemoryType type)
+unsigned MemoryAllocator::create_region(unsigned pool_index, size_t size, bool direct)
 {
        const VulkanFunctions &vk = device.get_functions();
 
        VkMemoryAllocateInfo alloc_info = { };
        alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
        alloc_info.allocationSize = size;
-       alloc_info.memoryTypeIndex = find_memory_type_index(type_bits, type);
+       alloc_info.memoryTypeIndex = pool_index;
+
+       Region region;
+       vk.AllocateMemory(alloc_info, region.memory);
+
+       region.pool = pool_index;
+       region.direct = direct;
+       region.size = size;
+       regions.push_back(region);
+
+       return regions.size()-1;
+}
+
+vector<unsigned>::iterator MemoryAllocator::lower_bound_by_size(vector<unsigned> &indices, size_t size) const
+{
+       return lower_bound(indices, size, [this](unsigned j, unsigned s){ return blocks[j].size<s; });
+}
+
+unsigned MemoryAllocator::allocate(size_t size, size_t align, unsigned type_bits, MemoryType type)
+{
+       unsigned pool_index = find_memory_pool(type_bits, type);
+       Pool &pool = pools[pool_index];
+
+       if(size>=direct_alloc_threshold)
+       {
+               Block block;
+               block.region = create_region(pool_index, size, true);
+               block.size = size;
+               block.allocated = true;
+
+               blocks.push_back(block);
+               return blocks.size()-1;
+       }
+
+       if(pool.can_consolidate && blocks[pool.free_blocks.back()].size<size+align)
+               consolidate(pool_index);
+
+       auto i = lower_bound_by_size(pool.free_blocks, size);
+       for(; i!=pool.free_blocks.end(); ++i)
+       {
+               Block &block = blocks[*i];
+               size_t offset = align-1-(block.offset+align-1)%align;
+               if(offset+size<=block.size)
+                       break;
+       }
+
+       unsigned block_index;
+       if(i!=pool.free_blocks.end())
+       {
+               block_index = *i;
+               pool.free_blocks.erase(i);
+               if(pool.free_blocks.empty())
+                       pool.can_consolidate = false;
+       }
+       else
+       {
+               Block block;
+               block.region = create_region(pool_index, default_region_size, false);
+               block.size = regions[block.region].size;
+
+               blocks.push_back(block);
+               block_index = blocks.size()-1;
+       }
+
+       size_t offset = align-1-(blocks[block_index].offset+align-1)%align;
+       if(offset)
+       {
+               unsigned head_index = block_index;
+               block_index = split_block(block_index, offset);
+               pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[head_index].size), head_index);
+       }
+
+       size += min_alignment-1;
+       size -= size%min_alignment;
+       if(blocks[block_index].size>=size+min_alignment)
+       {
+               unsigned tail_index = split_block(block_index, size);
+               pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[tail_index].size), tail_index);
+       }
+
+       blocks[block_index].allocated = true;
+
+       return block_index;
+}
+
+unsigned MemoryAllocator::split_block(unsigned index, size_t head_size)
+{
+       blocks.emplace_back();
+       Block &block = blocks[index];
+       Block &tail = blocks.back();
+       unsigned tail_index = blocks.size()-1;
 
-       Allocation alloc;
-       vk.AllocateMemory(alloc_info, alloc.memory);
+       tail.region = block.region;
+       tail.offset = block.offset+head_size;
+       tail.size = block.size-head_size;
+       tail.prev = index;
+       tail.next = block.next;
 
-       alloc.type = type;
-       alloc.size = size;
-       allocations.push_back(alloc);
+       block.size = head_size;
+       block.next = tail_index;
 
-       return allocations.size();
+       return tail_index;
 }
 
-MemoryAllocator::Allocation &MemoryAllocator::get_allocation(unsigned id)
+void MemoryAllocator::consolidate(unsigned pool_index)
 {
-       return allocations[id-1];
+       Pool &pool = pools[pool_index];
+
+       vector<unsigned> merged_blocks;
+       unsigned i = 0;
+       for(unsigned j=0; j<pool.free_blocks.size(); ++j)
+       {
+               unsigned block_index = pool.free_blocks[j];
+               Block &block = blocks[block_index];
+               if(!block.allocated)
+               {
+                       if(block.prev<0 || blocks[block.prev].allocated)
+                       {
+                               if(block.next>=0 && !blocks[block.next].allocated)
+                               {
+                                       merge_block_with_next(block_index);
+
+                                       while(block.next>=0 && !blocks[block.next].allocated)
+                                               merge_block_with_next(block_index);
+
+                                       merged_blocks.insert(lower_bound_by_size(merged_blocks, block.size), block_index);
+                               }
+                       }
+                       else
+                               continue;
+               }
+
+               if(j!=i)
+                       pool.free_blocks[i] = block_index;
+               ++i;
+       }
+
+       pool.free_blocks.resize(i+merged_blocks.size());
+
+       if(!merged_blocks.empty())
+       {
+               unsigned j = merged_blocks.size();
+               for(unsigned k=pool.free_blocks.size()-1; j; --k)
+               {
+                       if(!i || blocks[merged_blocks[j-1]].size>blocks[pool.free_blocks[i-1]].size)
+                               pool.free_blocks[k] = merged_blocks[--j];
+                       else
+                               pool.free_blocks[k] = pool.free_blocks[--i];
+               }
+       }
 }
 
-const MemoryAllocator::Allocation &MemoryAllocator::get_allocation(unsigned id) const
+void MemoryAllocator::merge_block_with_next(unsigned index)
 {
-       return allocations[id-1];
+       Block &block = blocks[index];
+
+       Block &next = blocks[block.next];
+       block.size += next.size;
+       block.next = next.next;
+       if(block.next>=0)
+               blocks[block.next].prev = index;
+
+       next = Block();
 }
 
 unsigned MemoryAllocator::allocate(VkBuffer buffer, MemoryType type)
@@ -84,11 +245,12 @@ unsigned MemoryAllocator::allocate(VkBuffer buffer, MemoryType type)
        VkMemoryRequirements requirements;
        vk.GetBufferMemoryRequirements(buffer, requirements);
 
-       unsigned id = allocate(requirements.size, requirements.memoryTypeBits, type);
+       unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type);
 
-       vk.BindBufferMemory(buffer, get_allocation(id).memory, 0);
+       Block &block = blocks[block_index];
+       vk.BindBufferMemory(buffer, regions[block.region].memory, block.offset);
 
-       return id;
+       return block_index+1;
 }
 
 unsigned MemoryAllocator::allocate(VkImage image, MemoryType type)
@@ -98,52 +260,162 @@ unsigned MemoryAllocator::allocate(VkImage image, MemoryType type)
        VkMemoryRequirements requirements;
        vk.GetImageMemoryRequirements(image, requirements);
 
-       unsigned id = allocate(requirements.size, requirements.memoryTypeBits, type);
+       unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type);
 
-       vk.BindImageMemory(image, get_allocation(id).memory, 0);
+       Block &block = blocks[block_index];
+       vk.BindImageMemory(image, regions[block.region].memory, block.offset);
 
-       return id;
+       return block_index+1;
 }
 
 void MemoryAllocator::release(unsigned id)
 {
-       Allocation &alloc = get_allocation(id);
-       if(!alloc.memory)
-               throw invalid_operation("MemoryAllocator::release");
+       if(!id || id>blocks.size() || !blocks[id-1].allocated)
+               throw key_error(id);
 
-       const VulkanFunctions &vk = device.get_functions();
+       unsigned block_index = id-1;
+       Block &block = blocks[block_index];
 
-       vk.FreeMemory(alloc.memory);
-}
+       block.allocated = false;
 
-size_t MemoryAllocator::get_allocation_size(unsigned id) const
-{
-       return get_allocation(id).size;
+       Region &region = regions[block.region];
+       if(region.direct)
+       {
+               const VulkanFunctions &vk = device.get_functions();
+
+               vk.FreeMemory(region.memory);
+               region = Region();
+               block = Block();
+               return;
+       }
+
+       Pool &pool = pools[region.pool];
+       pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, block.size), block_index);
+       if((block.prev>=0 && !blocks[block.prev].allocated) || (block.next>=0 && !blocks[block.next].allocated))
+               pool.can_consolidate = true;
 }
 
-void *MemoryAllocator::map(unsigned id, size_t offset, size_t size)
+void *MemoryAllocator::map(unsigned id)
 {
-       Allocation &alloc = get_allocation(id);
-       if(alloc.mapped_address)
-               throw invalid_operation("MemoryAllocator::map");
+       if(!id || id>blocks.size() || !blocks[id-1].allocated)
+               throw key_error(id);
 
-       const VulkanFunctions &vk = device.get_functions();
+       Block &block = blocks[id-1];
+       Region &region = regions[block.region];
+       if(!region.mapped_address)
+       {
+               const VulkanFunctions &vk = device.get_functions();
+               vk.MapMemory(region.memory, 0, region.size, 0, &region.mapped_address);
+       }
 
-       vk.MapMemory(alloc.memory, offset, size, 0, &alloc.mapped_address);
+       ++region.map_count;
 
-       return alloc.mapped_address;
+       return static_cast<char *>(region.mapped_address)+block.offset;
 }
 
-void MemoryAllocator::unmap(void *ptr)
+void MemoryAllocator::unmap(unsigned id)
 {
-       auto i = find_member(allocations, ptr, &Allocation::mapped_address);
-       if(i==allocations.end())
+       if(!id || id>blocks.size() || !blocks[id-1].allocated)
+               throw key_error(id);
+
+       Block &block = blocks[id-1];
+       Region &region = regions[block.region];
+
+       if(!regions[block.region].mapped_address)
                throw invalid_operation("MemoryAllocator::unmap");
+       else if(!--region.map_count)
+       {
+               const VulkanFunctions &vk = device.get_functions();
+               vk.UnmapMemory(region.memory);
+               region.mapped_address = 0;
+       }
+}
 
-       const VulkanFunctions &vk = device.get_functions();
+string MemoryAllocator::get_debug() const
+{
+       static const StringCodec::unichar bar_chars[] = { 0xB7, 0x2596, 0x258C, 0x2597, 0x2584, 0x2599, 0x2590, 0x259F, 0x2588 };  // ·▖▌▗▄▙▐▟█
+
+       string debug;
+       for(unsigned i=0; i<pools.size(); ++i)
+       {
+               const Pool &pool = pools[i];
+
+               string pool_debug;
+               size_t total_heap = 0;
+               size_t total_used = 0;
+               for(unsigned j=0; j<regions.size(); ++j)
+                       if(regions[j].pool==static_cast<int>(i))
+                       {
+                               total_heap += regions[j].size;
+                               pool_debug += format("  Region %d: %d kB", j, (regions[j].size+512)/1024);
+                               if(regions[j].direct)
+                                       pool_debug += ", direct";
+                               pool_debug += '\n';
+
+                               int block_index = -1;
+                               for(unsigned k=0; (block_index<0 && k<blocks.size()); ++k)
+                                       if(blocks[k].region==static_cast<int>(j) && blocks[k].offset==0)
+                                               block_index = k;
+
+                               unsigned slice_index = 0;
+                               unsigned slice_data = 0;
+
+                               string bar = "    [";
+                               string region_debug;
+                               StringCodec::Utf8::Encoder bar_enc;
+                               while(block_index>=0)
+                               {
+                                       const Block &block = blocks[block_index];
+                                       if(block.allocated)
+                                               total_used += block.size;
+                                       const char *state_str = (block.allocated ? "allocated" : "free");
+                                       region_debug += format("    Block %d: %d bytes at %d, %s\n", block_index, block.size, block.offset, state_str);
+                                       block_index = block.next;
+
+                                       size_t block_end = block.offset+block.size;
+                                       while(1)
+                                       {
+                                               size_t slice_end = regions[j].size*(slice_index+1)/140;
+                                               slice_data |= 1<<(block.allocated+slice_index%2*2);
+                                               if(slice_end>block_end)
+                                                       break;
+                                               ++slice_index;
+                                               if(slice_index%2==0)
+                                               {
+                                                       slice_data = 5+((slice_data>>1)&5)-(slice_data&5);
+                                                       bar_enc.encode_char(bar_chars[(slice_data&3)+3*((slice_data>>2)&3)], bar);
+                                                       slice_data = 0;
+                                               }
+                                       }
+                               }
+
+                               bar += "]\n";
+                               if(!regions[j].direct)
+                                       pool_debug += bar;
+                               pool_debug += region_debug;
+                       }
+
+               if(!pool_debug.empty())
+               {
+                       MemoryType t = pool.type;
+                       const char *type_str = (t==DEVICE_MEMORY ? "device" : t==STAGING_MEMORY ? "staging" :
+                               t==STREAMING_MEMORY ? "streaming" : "unknown");
+                       debug += format("Pool %d: %s, %d/%d kB used\n", i, type_str, (total_used+512)/1024, (total_heap+512)/1024);
+                       debug += pool_debug;
+               }
+
+               if(!pool.free_blocks.empty())
+               {
+                       debug += "  Free blocks:\n";
+                       for(unsigned j: pool.free_blocks)
+                       {
+                               const char *type = (blocks[j].type==BUFFER ? "buffer" : blocks[j].type==IMAGE ? "image" : "undecided");
+                               debug += format("    Block %d: %d bytes, %s\n", j, blocks[j].size, type);
+                       }
+               }
+       }
 
-       vk.UnmapMemory(i->memory);
-       i->mapped_address = 0;
+       return debug;
 }
 
 } // namespace GL