1 #include <msp/core/algorithm.h>
2 #include <msp/core/maputils.h>
3 #include <msp/graphics/vulkancontext_platform.h>
4 #include <msp/stringcodec/utf8.h>
5 #include <msp/strings/format.h>
8 #include "memoryallocator.h"
16 MemoryAllocator::MemoryAllocator(Device &d):
18 phys_device(handle_cast<VkPhysicalDevice>(device.get_context().get_private().physical_device))
20 const VulkanFunctions &vk = device.get_functions();
22 VkPhysicalDeviceMemoryProperties mem_props;
23 vk.GetPhysicalDeviceMemoryProperties(mem_props);
25 for(unsigned i=0; i<mem_props.memoryHeapCount; ++i)
26 if(mem_props.memoryHeaps[i].flags&VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
27 total_device_memory += mem_props.memoryHeaps[i].size;
29 default_region_size = total_device_memory/256;
30 default_region_size -= default_region_size%min_alignment;
31 direct_alloc_threshold = default_region_size/4;
33 const VkMemoryPropertyFlags host_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
34 pools.resize(mem_props.memoryTypeCount);
35 for(unsigned i=0; i<mem_props.memoryTypeCount; ++i)
37 VkMemoryPropertyFlags flags = mem_props.memoryTypes[i].propertyFlags;
38 if(flags&VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
40 if((flags&host_flags)==host_flags)
41 pools[i].type = STREAMING_MEMORY;
43 pools[i].type = DEVICE_MEMORY;
45 else if((flags&host_flags)==host_flags)
46 pools[i].type = STAGING_MEMORY;
50 MemoryAllocator::~MemoryAllocator()
52 const VulkanFunctions &vk = device.get_functions();
54 for(Region &r: regions)
56 vk.FreeMemory(r.memory);
59 unsigned MemoryAllocator::find_memory_pool(unsigned mask, MemoryType type) const
61 for(unsigned i=0; i<pools.size(); ++i)
62 if((mask&(1<<i)) && pools[i].type==type)
64 if(type==DEVICE_MEMORY || type==STAGING_MEMORY)
65 return find_memory_pool(mask, STREAMING_MEMORY);
66 throw runtime_error("Unable to find suitable memory type");
69 unsigned MemoryAllocator::create_region(unsigned pool_index, size_t size, bool direct)
71 const VulkanFunctions &vk = device.get_functions();
73 VkMemoryAllocateInfo alloc_info = { };
74 alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
75 alloc_info.allocationSize = size;
76 alloc_info.memoryTypeIndex = pool_index;
79 vk.AllocateMemory(alloc_info, region.memory);
81 region.pool = pool_index;
82 region.direct = direct;
84 regions.push_back(region);
86 return regions.size()-1;
89 vector<unsigned>::iterator MemoryAllocator::lower_bound_by_size(vector<unsigned> &indices, size_t size) const
91 return lower_bound(indices, size, [this](unsigned j, unsigned s){ return blocks[j].size<s; });
94 size_t MemoryAllocator::get_alloc_offset(const Block &block, size_t size, size_t align, BlockType type) const
96 size_t offset = block.offset;
97 if(type!=block.type && block.prev>=0 && type!=blocks[block.prev].type)
99 offset += buffer_image_granularity-1;
100 offset -= offset%buffer_image_granularity;
104 offset -= offset%align;
108 size_t offset2 = block.offset+block.size-size;
109 offset2 -= offset2%align;
110 offset = max(offset, offset2);
113 return offset-block.offset;
116 void MemoryAllocator::update_largest_free(Pool &pool)
118 for(auto i=pool.free_blocks.end(); ((pool.largest_free_buffer<0 || pool.largest_free_image<0) && i!=pool.free_blocks.begin()); )
121 if(pool.largest_free_buffer<0 && (blocks[*i].type==BUFFER || blocks[*i].type==UNDECIDED))
122 pool.largest_free_buffer = *i;
123 if(pool.largest_free_image<0 && (blocks[*i].type==IMAGE || blocks[*i].type==UNDECIDED))
124 pool.largest_free_image = *i;
128 unsigned MemoryAllocator::allocate(size_t size, size_t align, unsigned type_bits, MemoryType mem_type, BlockType block_type)
130 unsigned pool_index = find_memory_pool(type_bits, mem_type);
131 Pool &pool = pools[pool_index];
133 if(size>=direct_alloc_threshold)
136 block.region = create_region(pool_index, size, true);
138 block.allocated = true;
139 block.type = block_type;
141 blocks.push_back(block);
142 return blocks.size()-1;
145 int largest_free = (block_type==BUFFER ? pool.largest_free_buffer : pool.largest_free_image);
146 if(pool.can_consolidate && blocks[largest_free].size<size+align)
147 consolidate(pool_index);
149 auto i = lower_bound_by_size(pool.free_blocks, size);
150 for(; i!=pool.free_blocks.end(); ++i)
152 Block &block = blocks[*i];
153 if(block.type==UNDECIDED || block.type==block_type)
155 size_t offset = get_alloc_offset(block, size, align, block_type);
156 if(offset+size<=block.size)
161 unsigned block_index;
162 if(i!=pool.free_blocks.end())
165 pool.free_blocks.erase(i);
166 if(pool.free_blocks.empty())
167 pool.can_consolidate = false;
168 if(static_cast<int>(block_index)==pool.largest_free_buffer)
169 pool.largest_free_buffer = -1;
170 if(static_cast<int>(block_index)==pool.largest_free_image)
171 pool.largest_free_image = -1;
176 block.region = create_region(pool_index, default_region_size, false);
177 block.size = regions[block.region].size;
179 blocks.push_back(block);
180 block_index = blocks.size()-1;
183 size_t offset = get_alloc_offset(blocks[block_index], size, align, block_type);
186 unsigned head_index = block_index;
187 block_index = split_block(block_index, offset);
188 pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[head_index].size), head_index);
191 size += min_alignment-1;
192 size -= size%min_alignment;
193 if(blocks[block_index].size>=size+min_alignment)
195 unsigned tail_index = split_block(block_index, size);
196 pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[tail_index].size), tail_index);
199 blocks[block_index].allocated = true;
200 blocks[block_index].type = block_type;
202 update_largest_free(pool);
207 unsigned MemoryAllocator::split_block(unsigned index, size_t head_size)
209 blocks.emplace_back();
210 Block &block = blocks[index];
211 Block &tail = blocks.back();
212 unsigned tail_index = blocks.size()-1;
214 tail.region = block.region;
215 tail.offset = block.offset+head_size;
216 tail.size = block.size-head_size;
218 tail.next = block.next;
220 block.size = head_size;
221 block.next = tail_index;
226 void MemoryAllocator::consolidate(unsigned pool_index)
228 Pool &pool = pools[pool_index];
230 vector<unsigned> merged_blocks;
232 for(unsigned j=0; j<pool.free_blocks.size(); ++j)
234 unsigned block_index = pool.free_blocks[j];
235 Block &block = blocks[block_index];
238 if(block.prev<0 || blocks[block.prev].allocated)
240 if(block.next>=0 && !blocks[block.next].allocated)
242 merge_block_with_next(block_index);
244 while(block.next>=0 && !blocks[block.next].allocated)
245 merge_block_with_next(block_index);
247 merged_blocks.insert(lower_bound_by_size(merged_blocks, block.size), block_index);
255 pool.free_blocks[i] = block_index;
259 pool.free_blocks.resize(i+merged_blocks.size());
261 if(!merged_blocks.empty())
263 unsigned j = merged_blocks.size();
264 for(unsigned k=pool.free_blocks.size()-1; j; --k)
266 if(!i || blocks[merged_blocks[j-1]].size>blocks[pool.free_blocks[i-1]].size)
267 pool.free_blocks[k] = merged_blocks[--j];
269 pool.free_blocks[k] = pool.free_blocks[--i];
273 pool.largest_free_buffer = -1;
274 pool.largest_free_image = -1;
275 update_largest_free(pool);
278 void MemoryAllocator::merge_block_with_next(unsigned index)
280 Block &block = blocks[index];
282 Block &next = blocks[block.next];
283 block.size += next.size;
284 block.next = next.next;
286 blocks[block.next].prev = index;
291 unsigned MemoryAllocator::allocate(VkBuffer buffer, MemoryType type)
293 const VulkanFunctions &vk = device.get_functions();
295 VkMemoryRequirements requirements;
296 vk.GetBufferMemoryRequirements(buffer, requirements);
298 unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type, BUFFER);
300 Block &block = blocks[block_index];
301 vk.BindBufferMemory(buffer, regions[block.region].memory, block.offset);
303 return block_index+1;
306 unsigned MemoryAllocator::allocate(VkImage image, MemoryType type)
308 const VulkanFunctions &vk = device.get_functions();
310 VkMemoryRequirements requirements;
311 vk.GetImageMemoryRequirements(image, requirements);
313 unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type, IMAGE);
315 Block &block = blocks[block_index];
316 vk.BindImageMemory(image, regions[block.region].memory, block.offset);
318 return block_index+1;
321 void MemoryAllocator::release(unsigned id)
323 if(!id || id>blocks.size() || !blocks[id-1].allocated)
326 unsigned block_index = id-1;
327 Block &block = blocks[block_index];
329 block.allocated = false;
331 Region ®ion = regions[block.region];
334 const VulkanFunctions &vk = device.get_functions();
336 vk.FreeMemory(region.memory);
342 Pool &pool = pools[region.pool];
343 pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, block.size), block_index);
344 if((block.prev>=0 && !blocks[block.prev].allocated) || (block.next>=0 && !blocks[block.next].allocated))
345 pool.can_consolidate = true;
347 if(block.type==BUFFER)
349 if(pool.largest_free_buffer<0 || blocks[pool.largest_free_buffer].size<block.size)
350 pool.largest_free_buffer = block_index;
352 else if(block.type==IMAGE)
354 if(pool.largest_free_image<0 || blocks[pool.largest_free_image].size<block.size)
355 pool.largest_free_image = block_index;
359 void *MemoryAllocator::map(unsigned id)
361 if(!id || id>blocks.size() || !blocks[id-1].allocated)
364 Block &block = blocks[id-1];
365 Region ®ion = regions[block.region];
366 if(!region.mapped_address)
368 const VulkanFunctions &vk = device.get_functions();
369 vk.MapMemory(region.memory, 0, region.size, 0, ®ion.mapped_address);
374 return static_cast<char *>(region.mapped_address)+block.offset;
377 void MemoryAllocator::unmap(unsigned id)
379 if(!id || id>blocks.size() || !blocks[id-1].allocated)
382 Block &block = blocks[id-1];
383 Region ®ion = regions[block.region];
385 if(!regions[block.region].mapped_address)
386 throw invalid_operation("MemoryAllocator::unmap");
387 else if(!--region.map_count)
389 const VulkanFunctions &vk = device.get_functions();
390 vk.UnmapMemory(region.memory);
391 region.mapped_address = 0;
395 string MemoryAllocator::get_debug() const
397 static const StringCodec::unichar bar_chars[] = { 0xB7, 0x2596, 0x258C, 0x2597, 0x2584, 0x2599, 0x2590, 0x259F, 0x2588 }; // ·▖▌▗▄▙▐▟█
400 for(unsigned i=0; i<pools.size(); ++i)
402 const Pool &pool = pools[i];
405 size_t total_heap = 0;
406 size_t total_used = 0;
407 for(unsigned j=0; j<regions.size(); ++j)
408 if(regions[j].pool==static_cast<int>(i))
410 total_heap += regions[j].size;
411 pool_debug += format(" Region %d: %d kB", j, (regions[j].size+512)/1024);
412 if(regions[j].direct)
413 pool_debug += ", direct";
416 int block_index = -1;
417 for(unsigned k=0; (block_index<0 && k<blocks.size()); ++k)
418 if(blocks[k].region==static_cast<int>(j) && blocks[k].offset==0)
421 unsigned slice_index = 0;
422 unsigned slice_data = 0;
426 StringCodec::Utf8::Encoder bar_enc;
427 while(block_index>=0)
429 const Block &block = blocks[block_index];
431 total_used += block.size;
432 const char *state_str = (block.allocated ? "allocated" : "free");
433 const char *type_str = (block.type==BUFFER ? "buffer" : block.type==IMAGE ? "image" : "undecided");
434 region_debug += format(" Block %d: %d bytes at %d, %s %s\n", block_index, block.size, block.offset, state_str, type_str);
435 block_index = block.next;
437 size_t block_end = block.offset+block.size;
440 size_t slice_end = regions[j].size*(slice_index+1)/140;
441 slice_data |= 1<<(block.allocated+slice_index%2*2);
442 if(slice_end>block_end)
447 slice_data = 5+((slice_data>>1)&5)-(slice_data&5);
448 bar_enc.encode_char(bar_chars[(slice_data&3)+3*((slice_data>>2)&3)], bar);
455 if(!regions[j].direct)
457 pool_debug += region_debug;
460 if(!pool_debug.empty())
462 MemoryType t = pool.type;
463 const char *type_str = (t==DEVICE_MEMORY ? "device" : t==STAGING_MEMORY ? "staging" :
464 t==STREAMING_MEMORY ? "streaming" : "unknown");
465 debug += format("Pool %d: %s, %d/%d kB used\n", i, type_str, (total_used+512)/1024, (total_heap+512)/1024);
469 if(!pool.free_blocks.empty())
471 debug += " Free blocks:\n";
472 for(unsigned j: pool.free_blocks)
474 const char *type = (blocks[j].type==BUFFER ? "buffer" : blocks[j].type==IMAGE ? "image" : "undecided");
475 debug += format(" Block %d: %d bytes, %s", j, blocks[j].size, type);
476 unsigned largest_flags = (static_cast<int>(j)==pool.largest_free_buffer)+(static_cast<int>(j)==pool.largest_free_image)*2;
479 debug += " (largest free ";