--- /dev/null
+extension ARB_compute_shader
#include <algorithm>
+#include <msp/gl/extensions/arb_compute_shader.h>
#include <msp/gl/extensions/arb_direct_state_access.h>
#include <msp/gl/extensions/arb_draw_instanced.h>
#include <msp/gl/extensions/arb_occlusion_query.h>
glDrawElementsInstanced(batch.gl_prim_type, batch.size(), batch.gl_index_type, data_ptr, count);
}
+void OpenGLCommands::dispatch(unsigned count_x, unsigned count_y, unsigned count_z)
+{
+ if(!pipeline_state)
+ throw invalid_operation("OpenGLCommands::dispatch_compute");
+
+ static Require req(ARB_compute_shader);
+
+ pipeline_state->apply();
+ glDispatchCompute(count_x, count_y, count_z);
+}
+
void OpenGLCommands::resolve_multisample(Framebuffer &target)
{
const Framebuffer *source = (pipeline_state ? pipeline_state->get_framebuffer() : 0);
void clear(const ClearValue *);
void draw(const Batch &);
void draw_instanced(const Batch &, unsigned);
+ void dispatch(unsigned, unsigned, unsigned);
void resolve_multisample(Framebuffer &);
void begin_query(const QueryPool &, unsigned);
#include <cstring>
#include <msp/core/algorithm.h>
+#include <msp/gl/extensions/arb_compute_shader.h>
#include <msp/gl/extensions/arb_es2_compatibility.h>
#include <msp/gl/extensions/arb_fragment_shader.h>
#include <msp/gl/extensions/arb_gl_spirv.h>
case VERTEX: { static Require _req(ARB_vertex_shader); gl_type = GL_VERTEX_SHADER; } break;
case GEOMETRY: { static Require _req(ARB_geometry_shader4); gl_type = GL_GEOMETRY_SHADER; } break;
case FRAGMENT: { static Require _req(ARB_fragment_shader); gl_type = GL_FRAGMENT_SHADER; } break;
+ case COMPUTE: { static Require _req(ARB_compute_shader); gl_type = GL_COMPUTE_SHADER; } break;
default: throw invalid_argument("OpenGLProgram::add_stage");
}
case SL::Stage::VERTEX: stage_id = add_stage(VERTEX); break;
case SL::Stage::GEOMETRY: stage_id = add_stage(GEOMETRY); break;
case SL::Stage::FRAGMENT: stage_id = add_stage(FRAGMENT); break;
+ case SL::Stage::COMPUTE: stage_id = add_stage(COMPUTE); break;
default: throw invalid_operation("OpenGLProgram::add_glsl_stages");
}
link(mod);
query_uniforms();
query_attributes();
+ if(is_compute())
+ {
+ int wg_size[3];
+ glGetProgramiv(id, GL_COMPUTE_WORK_GROUP_SIZE, wg_size);
+ rd.compute_wg_size = LinAl::Vector<unsigned, 3>(wg_size[0], wg_size[1], wg_size[2]);
+ }
const map<string, unsigned> &block_bindings = compiler.get_uniform_block_bindings();
if(!block_bindings.empty())
case SpirVModule::VERTEX: stage_id = add_stage(VERTEX); break;
case SpirVModule::GEOMETRY: stage_id = add_stage(GEOMETRY); break;
case SpirVModule::FRAGMENT: stage_id = add_stage(FRAGMENT); break;
+ case SpirVModule::COMPUTE: stage_id = add_stage(COMPUTE); break;
default: throw invalid_operation("OpenGLProgram::add_spirv_stages");
}
void OpenGLProgram::set_stage_debug_name(unsigned stage_id, Stage type)
{
#ifdef DEBUG
- static const char *const suffixes[] = { " [VS]", " [GS]", " [FS]" };
+ static const char *const suffixes[] = { " [VS]", " [GS]", " [FS]", " [CS]" };
string name = debug_name+suffixes[type];
glObjectLabel(GL_SHADER, stage_id, name.size(), name.c_str());
#else
VERTEX,
GEOMETRY,
FRAGMENT,
+ COMPUTE,
MAX_STAGES
};
void query_attributes();
void finalize_uniforms();
+ bool is_compute() const { return stage_ids[COMPUTE]; }
+
void set_debug_name(const std::string &);
void set_stage_debug_name(unsigned, Stage);
};
vkCmd.DrawIndexed(batch.size(), count, first_index, 0, 0);
}
+void VulkanCommands::dispatch(unsigned count_x, unsigned count_y, unsigned count_z)
+{
+ if(!pipeline_state)
+ throw invalid_operation("VulkanCommands::draw_instanced");
+
+ if(framebuffer)
+ end_render_pass();
+
+ VulkanCommandRecorder vkCmd(device.get_functions(), primary_buffer);
+
+ pipeline_state->refresh();
+ pipeline_state->synchronize_resources();
+ device.get_synchronizer().barrier(vkCmd);
+ pipeline_state->apply(vkCmd, 0, frame_index, false);
+ vkCmd.Dispatch(count_x, count_y, count_z);
+}
+
void VulkanCommands::resolve_multisample(Framebuffer &)
{
throw logic_error("VulkanCommands::resolve_multisample is unimplemented");
void clear(const ClearValue *);
void draw(const Batch &);
void draw_instanced(const Batch &, unsigned);
+ void dispatch(unsigned, unsigned, unsigned);
void resolve_multisample(Framebuffer &);
void begin_query(const QueryPool &, unsigned);
case SpirVModule::VERTEX: return VK_SHADER_STAGE_VERTEX_BIT;
case SpirVModule::GEOMETRY: return VK_SHADER_STAGE_GEOMETRY_BIT;
case SpirVModule::FRAGMENT: return VK_SHADER_STAGE_FRAGMENT_BIT;
+ case SpirVModule::COMPUTE: return VK_SHADER_STAGE_COMPUTE_BIT;
default: throw invalid_argument("get_vulkan_stage");
}
}
vector<char> buffer;
ps.fill_creation_info(buffer);
- const VkGraphicsPipelineCreateInfo *creation_info = reinterpret_cast<const VkGraphicsPipelineCreateInfo *>(buffer.data());
+ VkStructureType type = *reinterpret_cast<const VkStructureType *>(buffer.data());
VkPipeline pipeline;
- vk.CreateGraphicsPipelines(0, 1, creation_info, &pipeline);
+ if(type==VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO)
+ {
+ const VkComputePipelineCreateInfo *creation_info = reinterpret_cast<const VkComputePipelineCreateInfo *>(buffer.data());
+ vk.CreateComputePipelines(0, 1, creation_info, &pipeline);
+ }
+ else
+ {
+ const VkGraphicsPipelineCreateInfo *creation_info = reinterpret_cast<const VkGraphicsPipelineCreateInfo *>(buffer.data());
+ vk.CreateGraphicsPipelines(0, 1, creation_info, &pipeline);
+ }
pipelines.insert(make_pair(key, pipeline));
push_const_compat = hash_update<32>(push_const_compat, self.shprog->get_push_constants_size());
}
- constexpr unsigned pipeline_mask = PipelineState::SHPROG|PipelineState::VERTEX_SETUP|PipelineState::FACE_CULL|
+ constexpr unsigned graphics_mask = PipelineState::VERTEX_SETUP|PipelineState::FACE_CULL|
PipelineState::DEPTH_TEST|PipelineState::STENCIL_TEST|PipelineState::BLEND|PipelineState::PRIMITIVE_TYPE;
+ unsigned pipeline_mask = PipelineState::SHPROG;
+ if(!self.shprog->is_compute())
+ pipeline_mask |= graphics_mask;
if(changes&pipeline_mask)
{
handle = device.get_pipeline_cache().get_pipeline(self);
uint64_t VulkanPipelineState::compute_hash() const
{
const PipelineState &self = *static_cast<const PipelineState *>(this);
- const FrameFormat &format = self.framebuffer->get_format();
uint64_t result = hash<64>(self.shprog);
- result = hash_update<64>(result, self.vertex_setup->compute_hash());
- result = hash_round<64>(result, self.primitive_type);
- if(self.front_face!=NON_MANIFOLD && self.face_cull!=NO_CULL)
+ if(!self.shprog->is_compute())
{
- result = hash_round<64>(result, self.front_face);
- result = hash_round<64>(result, self.face_cull);
- }
+ const FrameFormat &format = self.framebuffer->get_format();
- result = hash_round<64>(result, format.get_samples());
+ result = hash_update<64>(result, self.vertex_setup->compute_hash());
+ result = hash_round<64>(result, self.primitive_type);
- if(self.depth_test.enabled)
- {
- result = hash_round<64>(result, self.depth_test.compare);
- result = hash_update<64>(result, self.depth_test.write);
- }
+ if(self.front_face!=NON_MANIFOLD && self.face_cull!=NO_CULL)
+ {
+ result = hash_round<64>(result, self.front_face);
+ result = hash_round<64>(result, self.face_cull);
+ }
- if(self.stencil_test.enabled)
- {
- result = hash_round<64>(result, self.stencil_test.compare);
- result = hash_round<64>(result, self.stencil_test.stencil_fail_op);
- result = hash_round<64>(result, self.stencil_test.depth_fail_op);
- result = hash_round<64>(result, self.stencil_test.depth_pass_op);
- result = hash_update<64>(result, self.stencil_test.reference);
- }
+ result = hash_round<64>(result, format.get_samples());
- if(self.blend.enabled)
- {
- result = hash_round<64>(result, self.blend.equation);
- result = hash_round<64>(result, self.blend.src_factor);
- result = hash_round<64>(result, self.blend.dst_factor);
- result = hash_round<64>(result, self.blend.write_mask);
- }
+ if(self.depth_test.enabled)
+ {
+ result = hash_round<64>(result, self.depth_test.compare);
+ result = hash_update<64>(result, self.depth_test.write);
+ }
- for(FrameAttachment a: format)
- result = hash_update<64>(result, a);
+ if(self.stencil_test.enabled)
+ {
+ result = hash_round<64>(result, self.stencil_test.compare);
+ result = hash_round<64>(result, self.stencil_test.stencil_fail_op);
+ result = hash_round<64>(result, self.stencil_test.depth_fail_op);
+ result = hash_round<64>(result, self.stencil_test.depth_pass_op);
+ result = hash_update<64>(result, self.stencil_test.reference);
+ }
+
+ if(self.blend.enabled)
+ {
+ result = hash_round<64>(result, self.blend.equation);
+ result = hash_round<64>(result, self.blend.src_factor);
+ result = hash_round<64>(result, self.blend.dst_factor);
+ result = hash_round<64>(result, self.blend.write_mask);
+ }
+
+ for(FrameAttachment a: format)
+ result = hash_update<64>(result, a);
+ }
return result;
}
void VulkanPipelineState::fill_creation_info(vector<char> &buffer) const
+{
+ if(static_cast<const PipelineState *>(this)->shprog->is_compute())
+ fill_compute_creation_info(buffer);
+ else
+ fill_graphics_creation_info(buffer);
+}
+
+void VulkanPipelineState::fill_graphics_creation_info(vector<char> &buffer) const
{
const PipelineState &self = *static_cast<const PipelineState *>(this);
pipeline_info->pVertexInputState = reinterpret_cast<const VkPipelineVertexInputStateCreateInfo *>(self.vertex_setup->creation_info.data());
}
+void VulkanPipelineState::fill_compute_creation_info(vector<char> &buffer) const
+{
+ const PipelineState &self = *static_cast<const PipelineState *>(this);
+
+ StructureBuilder sb(buffer, 1);
+ VkComputePipelineCreateInfo *const &pipeline_info = sb.add<VkComputePipelineCreateInfo>();
+
+ pipeline_info->sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
+
+ if(self.shprog)
+ {
+ pipeline_info->stage = *reinterpret_cast<const VkPipelineShaderStageCreateInfo *>(self.shprog->creation_info.data());
+ pipeline_info->layout = handle_cast<::VkPipelineLayout>(self.shprog->layout_handle);
+ }
+}
+
uint64_t VulkanPipelineState::compute_descriptor_set_hash(unsigned index) const
{
const PipelineState &self = *static_cast<const PipelineState *>(this);
unapplied |= PipelineState::SCISSOR;
}
+ VkPipelineBindPoint bind_point = (self.shprog->is_compute() ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
if(unapplied&PipelineState::SHPROG)
- vkCmd.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, handle);
+ vkCmd.BindPipeline(bind_point, handle);
- if(unapplied&PipelineState::VERTEX_SETUP)
+ if(!self.shprog->is_compute() && (unapplied&PipelineState::VERTEX_SETUP))
if(const VertexSetup *vs = self.vertex_setup)
{
vkCmd.BindVertexBuffers(0, vs->n_bindings, vs->buffers, vs->offsets);
descriptor_set_handles.push_back(device.get_descriptor_pool().get_descriptor_set(
self.descriptor_set_slots[i], self, i, frame));
- vkCmd.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, self.shprog->layout_handle,
+ vkCmd.BindDescriptorSets(bind_point, self.shprog->layout_handle,
first_changed_desc_set, descriptor_set_handles.size(), descriptor_set_handles.data(), 0, 0);
}
- if(unapplied&(PipelineState::VIEWPORT|PipelineState::SCISSOR))
+ if(!self.shprog->is_compute() && (unapplied&(PipelineState::VIEWPORT|PipelineState::SCISSOR)))
{
Rect fb_rect = self.framebuffer->get_rect();
void refresh() const { if(changes) update(); }
std::uint64_t compute_hash() const;
void fill_creation_info(std::vector<char> &) const;
+ void fill_graphics_creation_info(std::vector<char> &) const;
+ void fill_compute_creation_info(std::vector<char> &) const;
std::uint64_t compute_descriptor_set_hash(unsigned) const;
bool is_descriptor_set_dynamic(unsigned) const;
VkDescriptorSetLayout get_descriptor_set_layout(unsigned) const;
#endif
}
+bool VulkanProgram::is_compute() const
+{
+ return stage_flags&VK_SHADER_STAGE_COMPUTE_BIT;
+}
+
void VulkanProgram::set_debug_name(const string &name)
{
#ifdef DEBUG
void finalize_uniforms();
+ bool is_compute() const;
+
void set_debug_name(const std::string &);
void set_vulkan_object_name() const;
};
vkCreateShaderModule(context.get_function<PFN_vkCreateShaderModule>("vkCreateShaderModule")),
vkDestroyShaderModule(context.get_function<PFN_vkDestroyShaderModule>("vkDestroyShaderModule")),
// 10
+ vkCreateComputePipelines(context.get_function<PFN_vkCreateComputePipelines>("vkCreateComputePipelines")),
vkCreateGraphicsPipelines(context.get_function<PFN_vkCreateGraphicsPipelines>("vkCreateGraphicsPipelines")),
vkDestroyPipeline(context.get_function<PFN_vkDestroyPipeline>("vkDestroyPipeline")),
vkCmdBindPipeline(context.get_function<PFN_vkCmdBindPipeline>("vkCmdBindPipeline")),
vkCmdSetViewport(context.get_function<PFN_vkCmdSetViewport>("vkCmdSetViewport")),
// 26
vkCmdSetScissor(context.get_function<PFN_vkCmdSetScissor>("vkCmdSetScissor")),
+ // 28
+ vkCmdDispatch(context.get_function<PFN_vkCmdDispatch>("vkCmdDispatch")),
// 30
vkGetPhysicalDeviceSurfaceCapabilities(context.get_function<PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR>("vkGetPhysicalDeviceSurfaceCapabilitiesKHR")),
vkGetPhysicalDeviceSurfaceFormats(context.get_function<PFN_vkGetPhysicalDeviceSurfaceFormatsKHR>("vkGetPhysicalDeviceSurfaceFormatsKHR")),
PFN_vkCmdEndRenderPass vkCmdEndRenderPass = 0; // 8.4
PFN_vkCreateShaderModule vkCreateShaderModule = 0; // 9.1
PFN_vkDestroyShaderModule vkDestroyShaderModule = 0; // 9.1
+ PFN_vkCreateComputePipelines vkCreateComputePipelines = 0; // 10.1
PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines = 0; // 10.2
PFN_vkDestroyPipeline vkDestroyPipeline = 0; // 10.4
PFN_vkCmdBindPipeline vkCmdBindPipeline = 0; // 10.10
PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers = 0; // 21.2
PFN_vkCmdSetViewport vkCmdSetViewport = 0; // 24.5
PFN_vkCmdSetScissor vkCmdSetScissor = 0; // 26.1
+ PFN_vkCmdDispatch vkCmdDispatch = 0; // 28
PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilities = 0; // 30.5.1
PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormats = 0; // 30.5.2
PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModes = 0; // 30.5.3
{ vkDestroyShaderModule(device, handle_cast<::VkShaderModule>(shaderModule), 0); }
// Chapter 10: Pipelines
+ Result CreateComputePipelines(VkPipelineCache pipelineCache, std::uint32_t createInfoCount, const VkComputePipelineCreateInfo *pCreateInfos, VkPipeline *pPipelines) const
+ { return { vkCreateComputePipelines(device, handle_cast<::VkPipelineCache>(pipelineCache), createInfoCount, pCreateInfos, 0, handle_cast<::VkPipeline *>(pPipelines)), "vkCreateComputePipelines" }; }
+
Result CreateGraphicsPipelines(VkPipelineCache pipelineCache, std::uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo *pCreateInfos, VkPipeline *pPipelines) const
{ return { vkCreateGraphicsPipelines(device, handle_cast<::VkPipelineCache>(pipelineCache), createInfoCount, pCreateInfos, 0, handle_cast<::VkPipeline *>(pPipelines)), "vkCreateGraphicsPipelines" }; }
void CmdSetScissor(VkCommandBuffer commandBuffer, std::uint32_t firstScissor, std::uint32_t scissorCount, const VkRect2D *pScissors) const
{ vkCmdSetScissor(handle_cast<::VkCommandBuffer>(commandBuffer), firstScissor, scissorCount, pScissors); }
+ // Chapter 28: Dispatching Commands
+ void CmdDispatch(VkCommandBuffer commandBuffer, std::uint32_t groupCountX, std::uint32_t groupCountY, std::uint32_t groupCountZ) const
+ { vkCmdDispatch(handle_cast<::VkCommandBuffer>(commandBuffer), groupCountX, groupCountY, groupCountZ); }
+
// Chapter 30: Window System Integration (WSI)
Result GetPhysicalDeviceSurfaceCapabilities(VkSurface surface, VkSurfaceCapabilitiesKHR &rSurfaceCapabilities) const
{ return { vkGetPhysicalDeviceSurfaceCapabilities(physicalDevice, handle_cast<::VkSurfaceKHR>(surface), &rSurfaceCapabilities), "vkGetPhysicalDeviceSurfaceCapabilities" }; }
void SetScissor(std::uint32_t firstScissor, std::uint32_t scissorCount, const VkRect2D *pScissors) const
{ vk.CmdSetScissor(commandBuffer, firstScissor, scissorCount, pScissors); }
+
+ void Dispatch(std::uint32_t groupCountX, std::uint32_t groupCountY, std::uint32_t groupCountZ) const
+ { vk.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); }
};
} // namespace GL
using CommandsBackend::clear;
using CommandsBackend::draw;
using CommandsBackend::draw_instanced;
+ using CommandsBackend::dispatch;
using CommandsBackend::resolve_multisample;
using CommandsBackend::begin_query;
OP_RETURN_VALUE = 254,
OP_UNREACHABLE = 255,
+ EXEC_LOCAL_SIZE = 17,
+
DECO_SPEC_ID = 1,
DECO_ARRAY_STRIDE = 6,
DECO_MATRIX_STRIDE = 7,
case OP_NAME: reflect_name(op); break;
case OP_MEMBER_NAME: reflect_member_name(op); break;
case OP_ENTRY_POINT: reflect_entry_point(op); break;
+ case OP_EXECUTION_MODE: reflect_execution_mode(op); break;
case OP_TYPE_VOID: reflect_void_type(op); break;
case OP_TYPE_BOOL: reflect_bool_type(op); break;
case OP_TYPE_INT: reflect_int_type(op); break;
entry.globals.push_back(&variables[*op]);
}
+void SpirVModule::Reflection::reflect_execution_mode(CodeIterator op)
+{
+ EntryPoint &entry = entry_points[*(op+1)];
+ unsigned mode = *(op+2);
+ if(mode==EXEC_LOCAL_SIZE)
+ {
+ entry.compute_local_size.x = *(op+3);
+ entry.compute_local_size.y = *(op+4);
+ entry.compute_local_size.z = *(op+5);
+ }
+}
+
void SpirVModule::Reflection::reflect_void_type(CodeIterator op)
{
types[*(op+1)].type = VOID;
{
VERTEX = 0,
GEOMETRY = 3,
- FRAGMENT = 4
+ FRAGMENT = 4,
+ COMPUTE = 5
};
enum StorageClass
unsigned id = 0;
Stage stage = VERTEX;
std::vector<const Variable *> globals;
+ LinAl::Vector<unsigned, 3> compute_local_size;
};
struct StructMember
void reflect_name(CodeIterator);
void reflect_member_name(CodeIterator);
void reflect_entry_point(CodeIterator);
+ void reflect_execution_mode(CodeIterator);
void reflect_void_type(CodeIterator);
void reflect_bool_type(CodeIterator);
void reflect_int_type(CodeIterator);
collect_uniforms(spirv_mod);
collect_attributes(spirv_mod);
collect_builtins(spirv_mod);
+
+ for(const SpirVModule::EntryPoint &e: spirv_mod.get_entry_points())
+ if(e.stage==SpirVModule::COMPUTE)
+ reflect_data.compute_wg_size = e.compute_local_size;
}
finalize_uniforms();
void collect_builtins(const SpirVModule::Structure &);
public:
+ using ProgramBackend::is_compute;
+
ReflectData::LayoutHash get_uniform_layout_hash() const { return reflect_data.layout_hash; }
unsigned get_n_descriptor_sets() const { return reflect_data.n_descriptor_sets; }
unsigned get_push_constants_size() const { return reflect_data.push_constants_size; }
const ReflectData::AttributeInfo &get_attribute_info(const std::string &) const;
int get_attribute_location(const std::string &) const;
unsigned get_n_clip_distances() const { return reflect_data.n_clip_distances; }
+ const LinAl::Vector<unsigned, 3> &get_compute_workgroup_size() const { return reflect_data.compute_wg_size; }
using ProgramBackend::set_debug_name;
};
unsigned n_descriptor_sets = 0;
unsigned push_constants_size = 0;
std::vector<int> used_bindings;
+ LinAl::Vector<unsigned, 3> compute_wg_size;
void update_layout_hash();
void update_used_bindings();
commands.draw_instanced(batch, count);
}
+void Renderer::dispatch(unsigned count_x, unsigned count_y, unsigned count_z)
+{
+ apply_state();
+ PipelineState &ps = get_pipeline_state();
+ commands.use_pipeline(&ps);
+ commands.dispatch(count_x, count_y, count_z);
+}
+
void Renderer::resolve_multisample(Framebuffer &target)
{
const State &state = get_state();
/** Draws multiple instances of a batch of primitives. A shader must be active. */
void draw_instanced(const Batch &, unsigned);
+ /** Dispatches a compute operation. */
+ void dispatch(unsigned, unsigned = 1, unsigned = 1);
+
/** Resolves multisample attachments from the active framebuffer into
target. */
void resolve_multisample(Framebuffer &target);