]> git.tdb.fi Git - libs/gl.git/blob - source/backends/vulkan/memoryallocator.cpp
Avoid allocating buffers and images too close together
[libs/gl.git] / source / backends / vulkan / memoryallocator.cpp
1 #include <msp/core/algorithm.h>
2 #include <msp/core/maputils.h>
3 #include <msp/graphics/vulkancontext_platform.h>
4 #include <msp/stringcodec/utf8.h>
5 #include <msp/strings/format.h>
6 #include "device.h"
7 #include "error.h"
8 #include "memoryallocator.h"
9 #include "vulkan.h"
10
11 using namespace std;
12
13 namespace Msp {
14 namespace GL {
15
16 MemoryAllocator::MemoryAllocator(Device &d):
17         device(d),
18         phys_device(handle_cast<VkPhysicalDevice>(device.get_context().get_private().physical_device))
19 {
20         const VulkanFunctions &vk = device.get_functions();
21
22         VkPhysicalDeviceMemoryProperties mem_props;
23         vk.GetPhysicalDeviceMemoryProperties(mem_props);
24
25         for(unsigned i=0; i<mem_props.memoryHeapCount; ++i)
26                 if(mem_props.memoryHeaps[i].flags&VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
27                         total_device_memory += mem_props.memoryHeaps[i].size;
28
29         default_region_size = total_device_memory/256;
30         default_region_size -= default_region_size%min_alignment;
31         direct_alloc_threshold = default_region_size/4;
32
33         const VkMemoryPropertyFlags host_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
34         pools.resize(mem_props.memoryTypeCount);
35         for(unsigned i=0; i<mem_props.memoryTypeCount; ++i)
36         {
37                 VkMemoryPropertyFlags flags = mem_props.memoryTypes[i].propertyFlags;
38                 if(flags&VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
39                 {
40                         if((flags&host_flags)==host_flags)
41                                 pools[i].type = STREAMING_MEMORY;
42                         else
43                                 pools[i].type = DEVICE_MEMORY;
44                 }
45                 else if((flags&host_flags)==host_flags)
46                         pools[i].type = STAGING_MEMORY;
47         }
48 }
49
50 MemoryAllocator::~MemoryAllocator()
51 {
52         const VulkanFunctions &vk = device.get_functions();
53
54         for(Region &r: regions)
55                 if(r.memory)
56                         vk.FreeMemory(r.memory);
57 }
58
59 unsigned MemoryAllocator::find_memory_pool(unsigned mask, MemoryType type) const
60 {
61         for(unsigned i=0; i<pools.size(); ++i)
62                 if((mask&(1<<i)) && pools[i].type==type)
63                         return i;
64         if(type==DEVICE_MEMORY || type==STAGING_MEMORY)
65                 return find_memory_pool(mask, STREAMING_MEMORY);
66         throw runtime_error("Unable to find suitable memory type");
67 }
68
69 unsigned MemoryAllocator::create_region(unsigned pool_index, size_t size, bool direct)
70 {
71         const VulkanFunctions &vk = device.get_functions();
72
73         VkMemoryAllocateInfo alloc_info = { };
74         alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
75         alloc_info.allocationSize = size;
76         alloc_info.memoryTypeIndex = pool_index;
77
78         Region region;
79         vk.AllocateMemory(alloc_info, region.memory);
80
81         region.pool = pool_index;
82         region.direct = direct;
83         region.size = size;
84         regions.push_back(region);
85
86         return regions.size()-1;
87 }
88
89 vector<unsigned>::iterator MemoryAllocator::lower_bound_by_size(vector<unsigned> &indices, size_t size) const
90 {
91         return lower_bound(indices, size, [this](unsigned j, unsigned s){ return blocks[j].size<s; });
92 }
93
94 size_t MemoryAllocator::get_alloc_offset(const Block &block, size_t size, size_t align, BlockType type) const
95 {
96         size_t offset = block.offset;
97         if(type!=block.type && block.prev>=0 && type!=blocks[block.prev].type)
98         {
99                 offset += buffer_image_granularity-1;
100                 offset -= offset%buffer_image_granularity;
101         }
102
103         offset += align-1;
104         offset -= offset%align;
105
106         if(type==BUFFER)
107         {
108                 size_t offset2 = block.offset+block.size-size;
109                 offset2 -= offset2%align;
110                 offset = max(offset, offset2);
111         }
112
113         return offset-block.offset;
114 }
115
116 void MemoryAllocator::update_largest_free(Pool &pool)
117 {
118         for(auto i=pool.free_blocks.end(); ((pool.largest_free_buffer<0 || pool.largest_free_image<0) && i!=pool.free_blocks.begin()); )
119         {
120                 --i;
121                 if(pool.largest_free_buffer<0 && (blocks[*i].type==BUFFER || blocks[*i].type==UNDECIDED))
122                         pool.largest_free_buffer = *i;
123                 if(pool.largest_free_image<0 && (blocks[*i].type==IMAGE || blocks[*i].type==UNDECIDED))
124                         pool.largest_free_image = *i;
125         }
126 }
127
128 unsigned MemoryAllocator::allocate(size_t size, size_t align, unsigned type_bits, MemoryType mem_type, BlockType block_type)
129 {
130         unsigned pool_index = find_memory_pool(type_bits, mem_type);
131         Pool &pool = pools[pool_index];
132
133         if(size>=direct_alloc_threshold)
134         {
135                 Block block;
136                 block.region = create_region(pool_index, size, true);
137                 block.size = size;
138                 block.allocated = true;
139                 block.type = block_type;
140
141                 blocks.push_back(block);
142                 return blocks.size()-1;
143         }
144
145         int largest_free = (block_type==BUFFER ? pool.largest_free_buffer : pool.largest_free_image);
146         if(pool.can_consolidate && blocks[largest_free].size<size+align)
147                 consolidate(pool_index);
148
149         auto i = lower_bound_by_size(pool.free_blocks, size);
150         for(; i!=pool.free_blocks.end(); ++i)
151         {
152                 Block &block = blocks[*i];
153                 if(block.type==UNDECIDED || block.type==block_type)
154                 {
155                         size_t offset = get_alloc_offset(block, size, align, block_type);
156                         if(offset+size<=block.size)
157                                 break;
158                 }
159         }
160
161         unsigned block_index;
162         if(i!=pool.free_blocks.end())
163         {
164                 block_index = *i;
165                 pool.free_blocks.erase(i);
166                 if(pool.free_blocks.empty())
167                         pool.can_consolidate = false;
168                 if(static_cast<int>(block_index)==pool.largest_free_buffer)
169                         pool.largest_free_buffer = -1;
170                 if(static_cast<int>(block_index)==pool.largest_free_image)
171                         pool.largest_free_image = -1;
172         }
173         else
174         {
175                 Block block;
176                 block.region = create_region(pool_index, default_region_size, false);
177                 block.size = regions[block.region].size;
178
179                 blocks.push_back(block);
180                 block_index = blocks.size()-1;
181         }
182
183         size_t offset = get_alloc_offset(blocks[block_index], size, align, block_type);
184         if(offset)
185         {
186                 unsigned head_index = block_index;
187                 block_index = split_block(block_index, offset);
188                 pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[head_index].size), head_index);
189         }
190
191         size += min_alignment-1;
192         size -= size%min_alignment;
193         if(blocks[block_index].size>=size+min_alignment)
194         {
195                 unsigned tail_index = split_block(block_index, size);
196                 pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, blocks[tail_index].size), tail_index);
197         }
198
199         blocks[block_index].allocated = true;
200         blocks[block_index].type = block_type;
201
202         update_largest_free(pool);
203
204         return block_index;
205 }
206
207 unsigned MemoryAllocator::split_block(unsigned index, size_t head_size)
208 {
209         blocks.emplace_back();
210         Block &block = blocks[index];
211         Block &tail = blocks.back();
212         unsigned tail_index = blocks.size()-1;
213
214         tail.region = block.region;
215         tail.offset = block.offset+head_size;
216         tail.size = block.size-head_size;
217         tail.prev = index;
218         tail.next = block.next;
219
220         block.size = head_size;
221         block.next = tail_index;
222
223         return tail_index;
224 }
225
226 void MemoryAllocator::consolidate(unsigned pool_index)
227 {
228         Pool &pool = pools[pool_index];
229
230         vector<unsigned> merged_blocks;
231         unsigned i = 0;
232         for(unsigned j=0; j<pool.free_blocks.size(); ++j)
233         {
234                 unsigned block_index = pool.free_blocks[j];
235                 Block &block = blocks[block_index];
236                 if(!block.allocated)
237                 {
238                         if(block.prev<0 || blocks[block.prev].allocated)
239                         {
240                                 if(block.next>=0 && !blocks[block.next].allocated)
241                                 {
242                                         merge_block_with_next(block_index);
243
244                                         while(block.next>=0 && !blocks[block.next].allocated)
245                                                 merge_block_with_next(block_index);
246
247                                         merged_blocks.insert(lower_bound_by_size(merged_blocks, block.size), block_index);
248                                 }
249                         }
250                         else
251                                 continue;
252                 }
253
254                 if(j!=i)
255                         pool.free_blocks[i] = block_index;
256                 ++i;
257         }
258
259         pool.free_blocks.resize(i+merged_blocks.size());
260
261         if(!merged_blocks.empty())
262         {
263                 unsigned j = merged_blocks.size();
264                 for(unsigned k=pool.free_blocks.size()-1; j; --k)
265                 {
266                         if(!i || blocks[merged_blocks[j-1]].size>blocks[pool.free_blocks[i-1]].size)
267                                 pool.free_blocks[k] = merged_blocks[--j];
268                         else
269                                 pool.free_blocks[k] = pool.free_blocks[--i];
270                 }
271         }
272
273         pool.largest_free_buffer = -1;
274         pool.largest_free_image = -1;
275         update_largest_free(pool);
276 }
277
278 void MemoryAllocator::merge_block_with_next(unsigned index)
279 {
280         Block &block = blocks[index];
281
282         Block &next = blocks[block.next];
283         block.size += next.size;
284         block.next = next.next;
285         if(block.next>=0)
286                 blocks[block.next].prev = index;
287
288         next = Block();
289 }
290
291 unsigned MemoryAllocator::allocate(VkBuffer buffer, MemoryType type)
292 {
293         const VulkanFunctions &vk = device.get_functions();
294
295         VkMemoryRequirements requirements;
296         vk.GetBufferMemoryRequirements(buffer, requirements);
297
298         unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type, BUFFER);
299
300         Block &block = blocks[block_index];
301         vk.BindBufferMemory(buffer, regions[block.region].memory, block.offset);
302
303         return block_index+1;
304 }
305
306 unsigned MemoryAllocator::allocate(VkImage image, MemoryType type)
307 {
308         const VulkanFunctions &vk = device.get_functions();
309
310         VkMemoryRequirements requirements;
311         vk.GetImageMemoryRequirements(image, requirements);
312
313         unsigned block_index = allocate(requirements.size, requirements.alignment, requirements.memoryTypeBits, type, IMAGE);
314
315         Block &block = blocks[block_index];
316         vk.BindImageMemory(image, regions[block.region].memory, block.offset);
317
318         return block_index+1;
319 }
320
321 void MemoryAllocator::release(unsigned id)
322 {
323         if(!id || id>blocks.size() || !blocks[id-1].allocated)
324                 throw key_error(id);
325
326         unsigned block_index = id-1;
327         Block &block = blocks[block_index];
328
329         block.allocated = false;
330
331         Region &region = regions[block.region];
332         if(region.direct)
333         {
334                 const VulkanFunctions &vk = device.get_functions();
335
336                 vk.FreeMemory(region.memory);
337                 region = Region();
338                 block = Block();
339                 return;
340         }
341
342         Pool &pool = pools[region.pool];
343         pool.free_blocks.insert(lower_bound_by_size(pool.free_blocks, block.size), block_index);
344         if((block.prev>=0 && !blocks[block.prev].allocated) || (block.next>=0 && !blocks[block.next].allocated))
345                 pool.can_consolidate = true;
346
347         if(block.type==BUFFER)
348         {
349                 if(pool.largest_free_buffer<0 || blocks[pool.largest_free_buffer].size<block.size)
350                         pool.largest_free_buffer = block_index;
351         }
352         else if(block.type==IMAGE)
353         {
354                 if(pool.largest_free_image<0 || blocks[pool.largest_free_image].size<block.size)
355                         pool.largest_free_image = block_index;
356         }
357 }
358
359 void *MemoryAllocator::map(unsigned id)
360 {
361         if(!id || id>blocks.size() || !blocks[id-1].allocated)
362                 throw key_error(id);
363
364         Block &block = blocks[id-1];
365         Region &region = regions[block.region];
366         if(!region.mapped_address)
367         {
368                 const VulkanFunctions &vk = device.get_functions();
369                 vk.MapMemory(region.memory, 0, region.size, 0, &region.mapped_address);
370         }
371
372         ++region.map_count;
373
374         return static_cast<char *>(region.mapped_address)+block.offset;
375 }
376
377 void MemoryAllocator::unmap(unsigned id)
378 {
379         if(!id || id>blocks.size() || !blocks[id-1].allocated)
380                 throw key_error(id);
381
382         Block &block = blocks[id-1];
383         Region &region = regions[block.region];
384
385         if(!regions[block.region].mapped_address)
386                 throw invalid_operation("MemoryAllocator::unmap");
387         else if(!--region.map_count)
388         {
389                 const VulkanFunctions &vk = device.get_functions();
390                 vk.UnmapMemory(region.memory);
391                 region.mapped_address = 0;
392         }
393 }
394
395 string MemoryAllocator::get_debug() const
396 {
397         static const StringCodec::unichar bar_chars[] = { 0xB7, 0x2596, 0x258C, 0x2597, 0x2584, 0x2599, 0x2590, 0x259F, 0x2588 };  // ·▖▌▗▄▙▐▟█
398
399         string debug;
400         for(unsigned i=0; i<pools.size(); ++i)
401         {
402                 const Pool &pool = pools[i];
403
404                 string pool_debug;
405                 size_t total_heap = 0;
406                 size_t total_used = 0;
407                 for(unsigned j=0; j<regions.size(); ++j)
408                         if(regions[j].pool==static_cast<int>(i))
409                         {
410                                 total_heap += regions[j].size;
411                                 pool_debug += format("  Region %d: %d kB", j, (regions[j].size+512)/1024);
412                                 if(regions[j].direct)
413                                         pool_debug += ", direct";
414                                 pool_debug += '\n';
415
416                                 int block_index = -1;
417                                 for(unsigned k=0; (block_index<0 && k<blocks.size()); ++k)
418                                         if(blocks[k].region==static_cast<int>(j) && blocks[k].offset==0)
419                                                 block_index = k;
420
421                                 unsigned slice_index = 0;
422                                 unsigned slice_data = 0;
423
424                                 string bar = "    [";
425                                 string region_debug;
426                                 StringCodec::Utf8::Encoder bar_enc;
427                                 while(block_index>=0)
428                                 {
429                                         const Block &block = blocks[block_index];
430                                         if(block.allocated)
431                                                 total_used += block.size;
432                                         const char *state_str = (block.allocated ? "allocated" : "free");
433                                         const char *type_str = (block.type==BUFFER ? "buffer" : block.type==IMAGE ? "image" : "undecided");
434                                         region_debug += format("    Block %d: %d bytes at %d, %s %s\n", block_index, block.size, block.offset, state_str, type_str);
435                                         block_index = block.next;
436
437                                         size_t block_end = block.offset+block.size;
438                                         while(1)
439                                         {
440                                                 size_t slice_end = regions[j].size*(slice_index+1)/140;
441                                                 slice_data |= 1<<(block.allocated+slice_index%2*2);
442                                                 if(slice_end>block_end)
443                                                         break;
444                                                 ++slice_index;
445                                                 if(slice_index%2==0)
446                                                 {
447                                                         slice_data = 5+((slice_data>>1)&5)-(slice_data&5);
448                                                         bar_enc.encode_char(bar_chars[(slice_data&3)+3*((slice_data>>2)&3)], bar);
449                                                         slice_data = 0;
450                                                 }
451                                         }
452                                 }
453
454                                 bar += "]\n";
455                                 if(!regions[j].direct)
456                                         pool_debug += bar;
457                                 pool_debug += region_debug;
458                         }
459
460                 if(!pool_debug.empty())
461                 {
462                         MemoryType t = pool.type;
463                         const char *type_str = (t==DEVICE_MEMORY ? "device" : t==STAGING_MEMORY ? "staging" :
464                                 t==STREAMING_MEMORY ? "streaming" : "unknown");
465                         debug += format("Pool %d: %s, %d/%d kB used\n", i, type_str, (total_used+512)/1024, (total_heap+512)/1024);
466                         debug += pool_debug;
467                 }
468
469                 if(!pool.free_blocks.empty())
470                 {
471                         debug += "  Free blocks:\n";
472                         for(unsigned j: pool.free_blocks)
473                         {
474                                 const char *type = (blocks[j].type==BUFFER ? "buffer" : blocks[j].type==IMAGE ? "image" : "undecided");
475                                 debug += format("    Block %d: %d bytes, %s", j, blocks[j].size, type);
476                                 unsigned largest_flags = (static_cast<int>(j)==pool.largest_free_buffer)+(static_cast<int>(j)==pool.largest_free_image)*2;
477                                 if(largest_flags)
478                                 {
479                                         debug += " (largest free ";
480                                         if(largest_flags&1)
481                                                 debug += "buffer";
482                                         if(largest_flags==3)
483                                                 debug += ", ";
484                                         if(largest_flags&2)
485                                                 debug += "image";
486                                         debug += ')';
487                                 }
488                                 debug += '\n';
489                         }
490                 }
491         }
492
493         return debug;
494 }
495
496 } // namespace GL
497 } // namespace Msp