diff --git a/ChangeLog.txt b/ChangeLog.txt index e3ecb659..6ffb0ea9 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1621,3 +1621,43 @@ Internal Issues: when needed (internal issue 462). * Shorten the table of contents in the single-page ref page HTML output. Still working on the PDF (internal issue 536). + +----------------------------------------------------- + +Change log for November 25, 2016 Vulkan 1.0.35 spec update: + + * Bump API patch number and header version number to 35 for this update. + +Github Issues: + + * Document in the <> section that + mapping and unmapping does not invalidate or flush the mapped memory + (public issues 27, 126). + * Redefine the entire <> chapter in terms of consistent + and well defined terminology, that's called out at the start of the + chapter. This terminology is applied equally to all synchronization + types, including subpass dependencies, submissions, and much of the + implicit ordering stuff dotted around the spec. Key terms are laid out + in the <> section at the top of the rewritten chapter (public + issues 128, 131, 132, 217, 299, 300, 302, 306, 322, 346, 347, 371, 407). + * Specify order of submission for batches in the + <> and + <> commands (public issue 371). + * Add valid usage statements to each of the WSI extension sections + indicating that the WSI-specific structure parameters must be valid, and + remove automatically generated valid usage statements now covered by the + manual sections (public issue 383). + * Clarify render pass compatibility for flink:vkCmdExecuteCommands (public + issue 390). + +Internal Issues: + + * Update +vk.xml+ to make previously explicit valid usage statements for + <> implicit instead + (internal issue 553). + * Add valid usage statement for slink:VkCreateImageInfo preventing + creation of 1D sparse images (internal issue 573). + * Fix Python scripts to always read/write files in utf-8 encoding, and a + logic error in reflib.py which could cause a fatal error for + malstructured asciidoc (internal issues 578, 586). diff --git a/doc/specs/vulkan/Makefile b/doc/specs/vulkan/Makefile index 1d8f7520..8205660a 100644 --- a/doc/specs/vulkan/Makefile +++ b/doc/specs/vulkan/Makefile @@ -160,7 +160,7 @@ GENDEPENDS = api/timeMarker validity/timeMarker hostsynctable/timeMarker COMMONDOCS = $(CHAPTERS) $(GENINCLUDE) $(GENDEPENDS) # A generated included file containing the spec version, date, and git commit SPECVERSION = specversion.txt -SPECREVISION = 1.0.34 +SPECREVISION = 1.0.35 SPECREMARK = # Spec targets diff --git a/doc/specs/vulkan/appendices/VK_NVX_device_generated_commands.txt b/doc/specs/vulkan/appendices/VK_NVX_device_generated_commands.txt new file mode 100644 index 00000000..d520c0c8 --- /dev/null +++ b/doc/specs/vulkan/appendices/VK_NVX_device_generated_commands.txt @@ -0,0 +1,378 @@ +[[VK_NVX_device_generated_commands]] +== VK_NVX_device_generated_commands + +*Name String*:: + +VK_NVX_device_generated_commands+ +*Extension Type*:: + Device extension +*Registered Extension Number*:: + 87 +*Last Modified Date*:: + 2016-10-31 +*Revision*:: + 1 +*Dependencies*:: + - This extension is written against version 1.0 of the Vulkan API. +*Contributors*:: + - Pierre Boudier, NVIDIA + - Christoph Kubisch, NVIDIA + - Mathias Schott, NVIDIA + - Jeff Bolz, NVIDIA + - Eric Werness, NVIDIA + - Detlef Roettger, NVIDIA + - Daniel Koch, NVIDIA + +*Contacts*:: + - Pierre Boudier, NVIDIA (pboudier@nvidia.com) + - Christoph Kubisch, NVIDIA (ckubisch@nvidia.com) + + +This extension allows the device to generate a number of critical commands +for command buffers. + +When rendering a large number of objects, the device can be leveraged to +implement a number of critical functions, like updating matrices, or +implementing occlusion culling, frustum culling, front to back sorting... +Implementing those on the device does not require any special extension, +since an application is free to define its own data structure, and just +process them using shaders. + +However, if the application desires to quickly kick off the rendering of the +final stream of objects, then unextended Vulkan forces the application to +read back the processed stream and issue graphics command from the host. +For very large scenes, the synchronization overhead, and cost to generate +the command buffer can become the bottleneck. +This extension allows an application to generate a device side stream of +state changes and commands, and convert it efficiently into a command buffer +without having to read it back on the host. + +Furthermore, it allows incremental changes to such command buffers, by +manipulating only partial sections of a command stream, for example pipeline +bindings. +Unextended Vulkan requires re-creation of entire command buffers in such +scenario, or updates synchronized on the host. + +The intended usage for this extension is for the application to: + + * create its objects as in unextended Vulkan + * create a VkObjectTableNVX, and register the various Vulkan objects that + are needed to evaluate the input parameters. + * create a VkIndirectCommandsLayoutNVX, which lists the + VkIndirectCommandsTokenTypes it wants to dynamically change as atomic + command sequence. + This step likely involves some internal device code compilation, since + the intent is for the GPU to generate the command buffer in the + pipeline. + * fill the input buffers with the data for each of the inputs it needs. + Each input is an array that will be filled with an index in the object + table, instead of using CPU pointers. + * set up a target secondary command buffer + * reserve command buffer space via vkCmdReserveSpaceForCommandsNVX in a + target command buffer at the position you want the generated commands to + be executed. + * call vkCmdProcessCommandsNVX to create the actual device commands for + all sequences based on the array contents into a provided target command + buffer. + * execute the target command buffer like a regular secondary command + buffer + +For each draw/dispatch, the following can be specified: + + * a different pipeline state object + * a number of descriptor sets, with dynamic offsets + * a number of vertex buffer bindings, with an optional dynamic offset + * a different index buffer, with an optional dynamic offset + +It is recommended to register a small number of objects and to use dynamic +offsets whenever possible. + +While the GPU can be faster than a CPU to generate the commands, it may not +happen asynchronously, therefore the primary use-case is generating "less" +total work (occlusion culling, classification to use specialized +shaders...). + +=== New Object Types + + * sname:VkObjectTableNVX + * sname:VkIndirectCommandsLayoutNVX + +== New Flag Types + + * sname:VkIndirectCommandsLayoutUsageFlagsNVX + * sname:VkObjectEntryUsageFlagsNVX + +=== New Enum Constants + +Extending elink:VkStructureType: + + ** ename:VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX + ** ename:VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX + ** ename:VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX + ** ename:VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX + ** ename:VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX + ** ename:VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX + +Extending elink:VkPipelineStageFlagBits: + + ** ename:VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX + +=== New Enums + + * elink:VkIndirectCommandsLayoutUsageFlagBitsNVX + * elink:VkIndirectCommandsTokenTypeNVX + * elink:VkObjectEntryUsageFlagBitsNVX + * elink:VkObjectEntryTypeNVX + +=== New Structures + + * slink:VkDeviceGeneratedCommandsFeaturesNVX + * slink:VkDeviceGeneratedCommandsLimitsNVX + * slink:VkIndirectCommandsTokenNVX + * slink:VkIndirectCommandsLayoutTokenNVX + * slink:VkIndirectCommandsLayoutCreateInfoNVX + * slink:VkCmdProcessCommandsInfoNVX + * slink:VkCmdReserveSpaceForCommandsInfoNVX + * slink:VkObjectTableCreateInfoNVX + * slink:VkObjectTableEntryNVX + * slink:VkObjectTablePipelineEntryNVX + * slink:VkObjectTableDescriptorSetEntryNVX + * slink:VkObjectTableVertexBufferEntryNVX + * slink:VkObjectTableIndexBufferEntryNVX + * slink:VkObjectTablePushConstantEntryNVX + +=== New Functions + + * flink:vkCmdProcessCommandsNVX + * flink:vkCmdReserveSpaceForCommandsNVX + * flink:vkCreateIndirectCommandsLayoutNVX + * flink:vkDestroyIndirectCommandsLayoutNVX + * flink:vkCreateObjectTableNVX + * flink:vkDestroyObjectTableNVX + * flink:vkRegisterObjectsNVX + * flink:vkUnregisterObjectsNVX + * flink:vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX + +=== Issues + +1) How to name this extension ? + + As usual one of the hardest issues ;) + + VK_gpu_commands VK_execute_commands VK_device_commands + VK_device_execute_commands VK_device_execute VK_device_created_commands + VK_device_recorded_commands VK_device_generated_commands + +2) Should we use serial tokens or redundant sequence description? + + Similar to VkPipeline, signatures have the most likeliness to be + cross-vendor adoptable. + They also benefit from being processable in parallel. + +3) How to name sequence description + + ExecuteCommandSignature a bit long, just ExecuteSignature or actually more + Vulkan nomenclature IndirectCommandsLayout + +4) Do we want to provide indirectCommands inputs with layout or at +indirectCommands time? + + Separate layout from data as Vulkan does. + Provide full flexibilty for indirectCommands. + +5) Should the input be provided as SoA or AoS? + + It is desired by application to reuse the list of objects and render them + with some kind override. + This can be done by just selecting a different input for a push constant + or a descriptor set, if they are defined as independent arrays. + If the data was interleaved, this would not be as easily possible. + + Allowing input divisors can also reduce the conservative command buffer + allocation. + +6) how do we know the size of the GPU command buffer generated by +vkCmdProcessCommandsNVX ? + + maxSequenceCount can give an upper estimate, even if the actual count is + sourced from the gpu buffer at (buffer, countOffset). + As such maxSequenceCount must always be set correctly. + + Developers are encouraged to make well use the IndirectCommandsLayout's + pTokens->divisor, as they allow less conservative storage costs. + Especially pipeline changes on a per-draw basis can be costly memory wise. + +7) How to deal with dynamic offsets in DescriptorSets? + + Maybe additional token VK_EXECUTE_DESCRIPTOR_SET_OFFSET_COMMAND_NVX that + works for a "single dynamic buffer" descriptor set and then use (32 bit + tableEntry + 32bit offset) + +added dynamicCount field, variable sized input + +8) Should we allow updates to the object table, similar to DescriptorSet? + + Desired yes, people may change "material" shaders and not want to recreate + the entire register table. + However the developer must ensure to not overwrite a registered + objectindex while it is still being used. + +9) Should we allow dynamic state changes? + + Seems a bit excessive for "per-draw" type of scenario, but GPU could + partition work itself with viewport/scissor... + +10) How do we allow re-using already "filled" indirectCommands buffers? + + just use a VkCommandBuffer for the output, and it can be reused easily. + +11) How portable should such re-use be? + + Same as secondary command buffer + +12) Should sequenceOrdered be part of IndirectCommandsLayout or +vkCmdProcessCommandsNVX? + + Seems better for IndirectCommandsLayout, as that is when most heavy + lifting in terms of internal device code generation is done. + +13) Under which conditions is vkCmdProcessCommandsNVX legal? + + Options: a) on the host command buffer like a regular draw call b) + vkCmdProcessCommandsNVX makes use VkCommandBufferBeginInfo and serves + as vkBeginCommandBuffer/vkEndCommandBuffer implicitly. + c) The targetCommandbuffer must be inside the "begin" state already at the + moment of being passed. + This very likely suggests a new VkCommandBufferUsageFlags + VK_COMMAND_BUFFER_USAGE_DEVICE_GENERATED_BIT. + d) The targetCommandbuffer must reserve space via a new function. + + used a & d. + +14) What if different pipelines have different DescriptorSetLayouts at a +certain set unit that mismatches in "token.dynamicCount"? + + Considered legal, as long as the maximum dynamic count of all used + DescriptorSetLayouts is provided. + +15) Should we add "strides" to input arrays, so that "Array of Structures" +type setups can be support more easily? + + Maybe provide a usage flag for packed tokens stream (all inputs from same + buffer, implicit stride). + + No, given performance test was worse. + +16) Should we allow re-using the target command buffer directly, without +need to reset command buffer? + + YES: new api vkCmdReserveSpaceForCommandsNVX. + +17) Is vkCmdProcessCommandsNVX copying the input data or referencing it ? + + There are multiple implementations possible: + + * one could have some emulation code that parse the inputs, and generates + an output command buffer, therefore copying the inputs. + * one could just reference the inputs, and have the processing done in + pipe at execution time. + + If the data is mandated to be copied, then it puts a penalty on + implementation that could process the inputs directly in pipe. + If the data is "referenced", then it allows both types of implementation + + The inputs are "referenced", and should not be modified after the call to + vkCmdProcessCommands and until after the rendering of the target command + buffer is finished. + +18) Why is this NVX and not NV? + + To allow early experimentation and feedback. + We expect that a version with a refined design as multi-vendor variant + will follow up. + +19) Should we make the availability for each token type a device limit? + + Only distinguish between graphics/compute for now, further splitting up + may lead to too much fractioning. + +20) When can the objectTable be modified? + + Similar to the other inputs for vkCmdProcessCommandsNVX, only when all + device access via vkCmdProcessCommandsNVX or execution of target command + buffer has completed can an object at a given objectIndex be unregistered + or re-registered again. + +21) Which buffer usage flags are required for the buffers referenced by +vkCmdProcessCommandsNVX + + reuse existing VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT + + * VkCmdProcessCommandsInfoNVX::sequencesCountBuffer + * VkCmdProcessCommandsInfoNVX::sequencesIndexBuffer + * VkIndirectCommandsTokenNVX::buffer + +22) In which pipeline stage do the device generated command expansion +happen? + + This is required in order to allow applications to properly syncronize + access (e.g. via memory barriers) when writing to the buffers referenced + by vkCmdProcessCommandsNVX + + added VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT + +23) What if most token data is "static", but we frequently want to render a +subsection? + + added "sequencesIndexBuffer". + This allows to easier sort and filter what should actually be processed. + +=== Example Code + +TODO links to gameworks & designworks samples + +[source,c] +--------------------------------------------------- + + // setup secondary command buffer + vkBeginCommandBuffer(generatedCmdBuffer, &beginInfo); + ... setup its state as usual + + // insert the reservation (there can only be one per command buffer) + // where the generated calls should be filled into + VkCmdReserveSpaceForCommandsInfoNVX reserveInfo = { VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX }; + reserveInfo.objectTable = objectTable; + reserveInfo.indirectCommandsLayout = deviceGeneratedLayout; + reserveInfo.maxSequencesCount = myCount; + vkCmdReserveSpaceForCommandsNVX(generatedCmdBuffer, &reserveInfo); + + vkEndCommandBuffer(generatedCmdBuffer); + + // trigger the generation at some point in another primary command buffer + VkCmdProcessCommandsInfoNVX processInfo = { VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX }; + processInfo.objectTable = objectTable; + processInfo.indirectCommandsLayout = deviceGeneratedLayout; + processInfo.maxSequencesCount = myCount; + // set the target of the generation (if null we would directly execute with mainCmd) + processInfo.targetCommandBuffer = generatedCmdBuffer; + // provide input data + processInfo.indirectCommandsTokenCount = 3; + processInfo.pIndirectCommandsTokens = myTokens; + + // If you modify the input buffer data referenced by VkCmdProcessCommandsInfoNVX, + // ensure you have added the appropriate barriers prior generation process. + // When regenerating the content of the same reserved space, ensure prior operations have completed + vkCmdPipelineBarrier (mainCmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX, ...); + + vkCmdProcessCommandsNVX(mainCmd, &processInfo); + ... + // execute the secondary command buffer and ensure the processing that modifies command-buffer content + // has completed + vkCmdPipelineBarrier(mainCmd, VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ...) + vkCmdExecuteCommands(mainCmd, 1, &generatedCmdBuffer); + +--------------------------------------------------- + +=== Version History + + * Revision 1, 2016-10-31 (Christoph Kubisch) + - Initial draft diff --git a/doc/specs/vulkan/appendices/boilerplate.txt b/doc/specs/vulkan/appendices/boilerplate.txt index 98587949..ed2caa6b 100644 --- a/doc/specs/vulkan/appendices/boilerplate.txt +++ b/doc/specs/vulkan/appendices/boilerplate.txt @@ -62,10 +62,16 @@ include::../api/flags/VkImageAspectFlags.txt[] include::../api/flags/VkImageCreateFlags.txt[] include::../api/flags/VkImageUsageFlags.txt[] include::../api/flags/VkImageViewCreateFlags.txt[] +ifdef::VK_NVX_device_generated_commands[] +include::../api/flags/VkIndirectCommandsLayoutUsageFlagsNVX.txt[] +endif::VK_NVX_device_generated_commands[] include::../api/flags/VkInstanceCreateFlags.txt[] include::../api/flags/VkMemoryHeapFlags.txt[] include::../api/flags/VkMemoryMapFlags.txt[] include::../api/flags/VkMemoryPropertyFlags.txt[] +ifdef::VK_NVX_device_generated_commands[] +include::../api/flags/VkObjectEntryUsageFlagsNVX.txt[] +endif::VK_NVX_device_generated_commands[] include::../api/flags/VkPipelineCacheCreateFlags.txt[] include::../api/flags/VkPipelineColorBlendStateCreateFlags.txt[] include::../api/flags/VkPipelineCreateFlags.txt[] diff --git a/doc/specs/vulkan/appendices/extensions.txt b/doc/specs/vulkan/appendices/extensions.txt index 18c3a1df..f4cd95ed 100644 --- a/doc/specs/vulkan/appendices/extensions.txt +++ b/doc/specs/vulkan/appendices/extensions.txt @@ -156,3 +156,14 @@ include::VK_NV_win32_keyed_mutex.txt[] endif::VK_NV_win32_keyed_mutex[] // :leveloffset: 1 + + +// == NVIDIA +NVX+ Vendor Extensions +// +// :leveloffset: 2 + +ifdef::VK_NVX_device_generated_commands[] +include::VK_NVX_device_generated_commands.txt[] +endif::VK_NVX_device_generated_commands[] + +// :leveloffset: 1 diff --git a/doc/specs/vulkan/appendices/glossary.txt b/doc/specs/vulkan/appendices/glossary.txt index 840d778a..1cf7d493 100644 --- a/doc/specs/vulkan/appendices/glossary.txt +++ b/doc/specs/vulkan/appendices/glossary.txt @@ -17,6 +17,10 @@ Accessible (Descriptor Binding):: Descriptors using that binding can: only be used by stages in which they are accessible. +Acquire Operation (Resource):: + An operation that acquires ownership of an image subresource or buffer + range. + Adjacent Vertex:: A vertex in an adjacency primitive topology that is not part of a given primitive, but is accessible in geometry shaders. @@ -53,8 +57,12 @@ Attachment (Render Pass):: information about the properties of the image view that will later be attached. +Availability Operation:: + An operation that causes the values generated by specified memory write + accesses to become available for future access. + Available:: - See Memory Dependency. + A state of values written to memory that allows them to be made visible. Back-Facing:: See Facingness. @@ -303,7 +311,7 @@ Execution Dependency:: events, or implicit ordering operations. Execution Dependency Chain:: - A sequence of execution dependencies that transitively act as an + A sequence of execution dependencies that transitively act as a single execution dependency. Extension Scope:: @@ -354,6 +362,23 @@ Framebuffer Coordinates:: x and/or y, with [eq]#(0,0)# in the upper left corner and pixel centers at half-integers. +Framebuffer-Space:: + Operating with respect to framebuffer coordinates. + +Framebuffer-Local:: + A framebuffer-local dependency guarantees that only for a single + framebuffer region, the first set of operations happens-before the + second set of operations. + +Framebuffer-Global:: + A framebuffer-global dependency guarantees that for all framebuffer + regions, the first set of operations happens-before the second set of + operations. + +Framebuffer Region:: + A framebuffer region is a set of sample (x, y, layer, sample) + coordinates that is a subset of the entire framebuffer. + Front-Facing:: See Facingness. @@ -366,18 +391,18 @@ Handle:: Each object type has a unique handle type. Happen-after:: - A command happens-after a dependency if they are separated by an - execution dependency chain, with the command included in the destination - of the last dependency of the chain. - A memory barrier makes visible memory writes to commands that - happen-after it. + A transitive, irreflexive and antisymmetric ordering relation between + operations. + An execution dependency with a source of *A* and a destination of *B* + enforces that *B* happens-after *A*. + The inverse relation of happens-before. Happen-before:: - A command happens-before a dependency if they are separated by an - execution dependency chain, with the command included in the source of - the first dependency of the chain. - A memory barrier makes available memory writes of commands that - happen-before it. + A transitive, irreflexive and antisymmetric ordering relation between + operations. + An execution dependency with a source of *A* and a destination of *B* + enforces that *A* happens-before *B*. + The inverse relation of happens-after. Helper Invocation:: A fragment shader invocation that is created solely for the purposes of @@ -444,6 +469,16 @@ Indirect Commands:: from structures in buffer memory. Includes flink:vkCmdDrawIndirect, flink:vkCmdDrawIndexedIndirect, and flink:vkCmdDispatchIndirect. + +ifdef::VK_NVX_device_generated_commands[] +Indirect Commands Layout:: + A definition of a sequence of commands, that are generated on the device + via flink:vkCmdProcessCommandsNVX. + Each sequence is comprised of multiple + sname:VkIndirectCommandsTokenTypeNVX, which represent asubset of + traditional command buffer commands. + Represented as sname:VkIndirectCommandsLayoutNVX. +endif::VK_NVX_device_generated_commands[] Initial State (Command Buffer):: A command buffer that has not begun recording commands. @@ -503,12 +538,12 @@ Mappable:: See Host-Visible Memory. Memory Dependency:: - A sequence of operations that makes writes available, performs an - execution dependency between the writes and subsequent accesses, and - makes available writes visible to later accesses. - In order for the effects of a write to be coherent with later accesses, - it must: be made available from the old access type and then made - visible to the new access type. + A memory dependency is an execution dependency which includes + availability and visibility operations such that: + + * The first set of operations happens-before the availability operation + * The availability operation happens-before the visibility operation + * The visibility operation happens-before the second set of operations Memory Heap:: A region of memory from which device memory allocations can: be made. @@ -534,11 +569,24 @@ Normalized Device Coordinates:: A coordinate space after perspective division is applied to clip coordinates, and before the viewport transformation converts to framebuffer coordinates. + +ifdef::VK_NVX_device_generated_commands[] +Object Table:: + A binding table for various resources (sname:VkPipeline, sname:VkBuffer, + sname:VkDescriptorSet), so that they can be referenced in + device-generated command processing. + Represented as sname:VkObjectTableNVX. + Entries are registered or unregistered via ftext:uint32_t indices. +endif::VK_NVX_device_generated_commands[] Overlapped Range (Aliased Range):: The aliased range of a device memory allocation that intersects a given image subresource of an image or range of a buffer. +Ownership (Resource):: + If an entity (e.g. a queue family) has ownership of a resource, access + to that resource is well-defined for access by that entity. + Packed Format:: A format whose components are stored as a single data element in memory, with their relative locations defined within that element. @@ -573,6 +621,10 @@ Pipeline Layout:: setting push constant values. Represented by a sname:VkPipelineLayout object. +Pipeline Stage:: + A logically independent execution unit that performs some of the + operations defined by an action command. + Point Sampling (Rasterization):: A rule that determines whether a fragment sample location is covered by a polygon primitive by testing whether the sample location is in the @@ -642,6 +694,10 @@ Recording State (Command Buffer):: A command buffer that is ready to record commands. See also Initial State and Executable State. +Release Operation (Resource):: + An operation that releases ownership of an image subresource or buffer + range. + Render Pass:: An object that represents a set of framebuffer attachments and phases of rendering using those attachments. @@ -815,14 +871,19 @@ Viewport Transformation:: A transformation from normalized device coordinates to framebuffer coordinates, based on a viewport rectangle and depth range. +Visibility Operation:: + An operation that causes available values to become visible to specified + memory accesses. + Visible:: - See Memory Dependency. + A state of values written to memory that allows them to be accessed by a + set of operations. // To be added per issue 18: // Current State <> // Barycentric Coordinates <> // Internal Allocations <> -// Unavailable, Available <> +// Unavailable, Available <> - NB: this clashes with available/visible in terms of memory // Signaled, Unsignaled <> <> // Interior Vertices <> // Inner Vertices <> <> diff --git a/doc/specs/vulkan/chapters/VK_EXT_debug_report.txt b/doc/specs/vulkan/chapters/VK_EXT_debug_report.txt index 6b495088..efbe0a5c 100644 --- a/doc/specs/vulkan/chapters/VK_EXT_debug_report.txt +++ b/doc/specs/vulkan/chapters/VK_EXT_debug_report.txt @@ -165,8 +165,6 @@ registered. .Valid Usage **** * pname:object may: be a Vulkan object - * pname:pLayerPrefix must: be a `NULL` terminated string - * pname:pMessage must: be a `NULL` terminated string **** include::../validity/protos/vkDebugReportMessageEXT.txt[] diff --git a/doc/specs/vulkan/chapters/VK_KHR_android_surface/platformCreateSurface_android.txt b/doc/specs/vulkan/chapters/VK_KHR_android_surface/platformCreateSurface_android.txt index df6e5fb3..cf9471eb 100644 --- a/doc/specs/vulkan/chapters/VK_KHR_android_surface/platformCreateSurface_android.txt +++ b/doc/specs/vulkan/chapters/VK_KHR_android_surface/platformCreateSurface_android.txt @@ -61,5 +61,10 @@ include::../../api/structs/VkAndroidSurfaceCreateInfoKHR.txt[] * pname:window is a pointer to the code:ANativeWindow to associate the surface with. +.Valid Usage +**** + * pname:window must: point to a valid Android code:ANativeWindow. +**** + include::../../validity/structs/VkAndroidSurfaceCreateInfoKHR.txt[] diff --git a/doc/specs/vulkan/chapters/VK_KHR_mir_surface/platformCreateSurface_mir.txt b/doc/specs/vulkan/chapters/VK_KHR_mir_surface/platformCreateSurface_mir.txt index 309f5957..29d6302a 100644 --- a/doc/specs/vulkan/chapters/VK_KHR_mir_surface/platformCreateSurface_mir.txt +++ b/doc/specs/vulkan/chapters/VK_KHR_mir_surface/platformCreateSurface_mir.txt @@ -36,6 +36,12 @@ include::../../api/structs/VkMirSurfaceCreateInfoKHR.txt[] code:MirConnection and code:MirSurface for the window to associate the surface with. +.Valid Usage +**** + * pname:connection must: point to a valid code:MirConnection. + * pname:surface must: point to a valid code:MirSurface. +**** + include::../../validity/structs/VkMirSurfaceCreateInfoKHR.txt[] On Mir, when a swapchain's pname:imageExtent does not match the surface's diff --git a/doc/specs/vulkan/chapters/VK_KHR_swapchain/wsi.txt b/doc/specs/vulkan/chapters/VK_KHR_swapchain/wsi.txt index fedd460e..ce7fb522 100644 --- a/doc/specs/vulkan/chapters/VK_KHR_swapchain/wsi.txt +++ b/doc/specs/vulkan/chapters/VK_KHR_swapchain/wsi.txt @@ -533,12 +533,10 @@ The fname:vkCmdWaitEvents or fname:vkCmdPipelineBarrier used to transition the image away from ename:VK_IMAGE_LAYOUT_PRESENT_SRC_KHR layout must: have pname:dstStageMask and pname:dstAccessMask parameters set based on the next use of the image. -The application must: use -<> and -<> -to prevent the image transition from occurring before the semaphore passed -to fname:vkAcquireNextImageKHR has signaled. +The application must: use <> and <> to prevent the image transition from occurring before the +semaphore passed to fname:vkAcquireNextImageKHR has signaled. [NOTE] .Note diff --git a/doc/specs/vulkan/chapters/VK_KHR_wayland_surface/platformCreateSurface_wayland.txt b/doc/specs/vulkan/chapters/VK_KHR_wayland_surface/platformCreateSurface_wayland.txt index 739b1126..b99d2295 100644 --- a/doc/specs/vulkan/chapters/VK_KHR_wayland_surface/platformCreateSurface_wayland.txt +++ b/doc/specs/vulkan/chapters/VK_KHR_wayland_surface/platformCreateSurface_wayland.txt @@ -35,6 +35,12 @@ include::../../api/structs/VkWaylandSurfaceCreateInfoKHR.txt[] * pname:display and pname:surface are pointers to the Wayland code:wl_display and code:wl_surface to associate the surface with. +.Valid Usage +**** + * pname:display must: point to a valid Wayland code:wl_display. + * pname:surface must: point to a valid Wayland code:wl_surface. +**** + include::../../validity/structs/VkWaylandSurfaceCreateInfoKHR.txt[] On Wayland, pname:currentExtent is undefined [eq]#(0,0)#. diff --git a/doc/specs/vulkan/chapters/VK_KHR_win32_surface/platformCreateSurface_win32.txt b/doc/specs/vulkan/chapters/VK_KHR_win32_surface/platformCreateSurface_win32.txt index f2e10551..c3ce030d 100644 --- a/doc/specs/vulkan/chapters/VK_KHR_win32_surface/platformCreateSurface_win32.txt +++ b/doc/specs/vulkan/chapters/VK_KHR_win32_surface/platformCreateSurface_win32.txt @@ -35,6 +35,12 @@ include::../../api/structs/VkWin32SurfaceCreateInfoKHR.txt[] * pname:hinstance and pname:hwnd are the Win32 code:HINSTANCE and code:HWND for the window to associate the surface with. +.Valid Usage +**** + * pname:hinstance must: be a valid Win32 code:HINSTANCE. + * pname:hwnd must: be a valid Win32 code:HWND. +**** + include::../../validity/structs/VkWin32SurfaceCreateInfoKHR.txt[] With Win32, pname:minImageExtent, pname:maxImageExtent, and diff --git a/doc/specs/vulkan/chapters/VK_KHR_xcb_surface/platformCreateSurface_xcb.txt b/doc/specs/vulkan/chapters/VK_KHR_xcb_surface/platformCreateSurface_xcb.txt index 6cf326c3..b21be483 100644 --- a/doc/specs/vulkan/chapters/VK_KHR_xcb_surface/platformCreateSurface_xcb.txt +++ b/doc/specs/vulkan/chapters/VK_KHR_xcb_surface/platformCreateSurface_xcb.txt @@ -38,6 +38,12 @@ include::../../api/structs/VkXcbSurfaceCreateInfoKHR.txt[] * pname:window is the code:xcb_window_t for the X11 window to associate the surface with. +.Valid Usage +**** + * pname:connection must: point to a valid X11 code:xcb_connection_t. + * pname:window must: be a valid X11 code:xcb_window_t. +**** + include::../../validity/structs/VkXcbSurfaceCreateInfoKHR.txt[] With Xcb, pname:minImageExtent, pname:maxImageExtent, and diff --git a/doc/specs/vulkan/chapters/VK_KHR_xlib_surface/platformCreateSurface_xlib.txt b/doc/specs/vulkan/chapters/VK_KHR_xlib_surface/platformCreateSurface_xlib.txt index e01c8008..121c1bbb 100644 --- a/doc/specs/vulkan/chapters/VK_KHR_xlib_surface/platformCreateSurface_xlib.txt +++ b/doc/specs/vulkan/chapters/VK_KHR_xlib_surface/platformCreateSurface_xlib.txt @@ -37,6 +37,12 @@ include::../../api/structs/VkXlibSurfaceCreateInfoKHR.txt[] server. * pname:window is an Xlib code:Window to associate the surface with. +.Valid Usage +**** + * pname:dpy must: point to a valid Xlib code:Display. + * pname:window must: be a valid Xlib code:Window. +**** + include::../../validity/structs/VkXlibSurfaceCreateInfoKHR.txt[] With Xlib, pname:minImageExtent, pname:maxImageExtent, and diff --git a/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/generatedcommands.txt b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/generatedcommands.txt new file mode 100644 index 00000000..6a2c2c91 --- /dev/null +++ b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/generatedcommands.txt @@ -0,0 +1,88 @@ +[[device-generated-commands]] += Device-Generated Commands + +This chapter discusses the generation of command buffer content on the +device. +These principle steps are to be taken to generate commands on the device: + + * Make resource bindings accessible for the device via registering in an + sname:VkObjectTableNVX. + * Define via sname:VkIndirectCommandsLayoutNVX the sequence of commands + which should be generated. + * Fill one or more sname:VkBuffer with the appropriate content that gets + interpreted by sname:VkIndirectCommandsLayoutNVX. + * Reserve command space via flink:vkCmdReserveSpaceForCommandsNVX in a + secondary sname:VkCommandBuffer where the generated commands should be + recorded. + * Generate the actual commands via flink:vkCmdProcessCommandsNVX passing + all required data. + +Execution of such generated commands can either be triggered directly with +the generation process, or by executing the secondary sname:VkCommandBuffer +that was chosen as optional target. +The latter allows re-using generated commands as well. +Similar to sname:VkDescriptorSet special care must be taken for the lifetime +of resources referenced in sname:VkObjectTableNVX, which may be accessed at +either generation or execution time. + +== Features and Limitations + +// refBegin vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX Returns device-generated commands related properties of a physical device + +To query the support of related features and limitations, call: + +include::../../api/protos/vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX.txt[] + + * pname:physicalDevice is the handle to the physical device whose + properties will be queried. + * pname:pFeatures points to an instance of the + slink:VkDeviceGeneratedCommandsFeaturesNVX structure, that will be + filled with returned information. + * pname:pLimits points to an instance of the + slink:VkDeviceGeneratedCommandsLimitsNVX structure, that will be filled + with returned information. + +include::../../validity/protos/vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX.txt[] + +// refBegin VkDeviceGeneratedCommandsFeaturesNVX Structure specifying physical device support + +The sname:VkDeviceGeneratedCommandsFeaturesNVX structure is defined as: + +include::../../api/structs/VkDeviceGeneratedCommandsFeaturesNVX.txt[] + + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. + * pname:computeBindingPointSupport indicates whether the + sname:VkObjectTableNVX supports entries with + ename:VK_OBJECT_ENTRY_USAGE_GRAPHICS_BIT_NVX bit set and + sname:VkIndirectCommandsLayoutNVX supports + ename:VK_PIPELINE_BIND_POINT_COMPUTE. + +include::../../validity/structs/VkDeviceGeneratedCommandsFeaturesNVX.txt[] + +// refBegin VkDeviceGeneratedCommandsLimitsNVX Structure specifying physical device limits + +The sname:VkDeviceGeneratedCommandsLimitsNVX structure is defined as: + +include::../../api/structs/VkDeviceGeneratedCommandsLimitsNVX.txt[] + + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. + * pname:maxIndirectCommandsLayoutTokenCount the maximum number of tokens + in sname:VkIndirectCommandsLayoutNVX. + * pname:maxObjectEntryCounts the maximum number of entries per resource + type in sname:VkObjectTableNVX. + * pname:minSequenceCountBufferOffsetAlignment the minimum alignment for + memory addresses optionally used in fname:vkCmdProcessCommandsNVX. + * pname:minSequenceIndexBufferOffsetAlignment the minimum alignment for + memory addresses optionally used in fname:vkCmdProcessCommandsNVX. + * pname:minCommandsTokenBufferOffsetAlignment the minimum alignment for + memory addresses optionally used in fname:vkCmdProcessCommandsNVX. + +include::../../validity/structs/VkDeviceGeneratedCommandsLimitsNVX.txt[] + +include::objecttable.txt[] + +include::indirectcommands.txt[] + +include::generation.txt[] \ No newline at end of file diff --git a/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/generation.txt b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/generation.txt new file mode 100644 index 00000000..c3d2609a --- /dev/null +++ b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/generation.txt @@ -0,0 +1,173 @@ +== Indirect Commands Generation + +// refBegin vkCmdReserveSpaceForCommandsNVX Perform a reservation of command buffer space + +Command space for generated commands recorded into a secondary command +buffer must: be reserved by calling: + +include::../../api/protos/vkCmdReserveSpaceForCommandsNVX.txt[] + + * pname:commandBuffer is the secondary command buffer in which the space + for device-generated commands is reserved. + * pname:pProcessCommandsInfo is a pointer to an instance of the + slink:vkCmdReserveSpaceForCommandsNVX structure containing parameters + affecting the reservation of command buffer space. + +.Valid Usage +**** + * The provided pname:commandBuffer must: not have had a prior space + reservation since its creation or the last reset. + * The state of the pname:commandBuffer must: be legal to execute all + commands within the sequence provided by the + pname:indirectCommandsLayout member of pname:pProcessCommandsInfo. +**** + +include::../../validity/protos/vkCmdReserveSpaceForCommandsNVX.txt[] + +// refBegin VkCmdReserveSpaceForCommandsInfoNVX Structure specifying parameters for the reservation of command buffer space + +include::../../api/structs/VkCmdReserveSpaceForCommandsInfoNVX.txt[] + + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. + * pname:objectTable is the sname:VkObjectTableNVX to be used for the + generation process. + Only registered objects at the time + flink:vkCmdReserveSpaceForCommandsNVX is called, will be taken into + account for the reservation. + * pname:indirectCommandsLayout is the sname:VkIndirectCommandsLayoutNVX + that must: also be used at generation time. + * pname:maxSequencesCount is the maximum number of sequences for which + command buffer space will be reserved. + +include::../../validity/structs/VkCmdReserveSpaceForCommandsInfoNVX.txt[] + +The generated commands will behave as if they were recorded within the call +to fname:vkCmdReserveSpaceForCommandsNVX, that means they can inherit state +defined in the command buffer prior this call. +However, given the stateless nature of the generated sequences, they will +not affect commands after the reserved space. +Treat the state that can: be affected by the provided +sname:VkIndirectCommandsLayoutNVX as undefined. + +// refBegin vkCmdProcessCommandsNVX Performs the generation of commands on the device + +The actual generation on the device is handled with: + +include::../../api/protos/vkCmdProcessCommandsNVX.txt[] + + * pname:commandBuffer is the primary command buffer in which the + generation process takes space. + * pname:pProcessCommandsInfo is a pointer to an instance of the + slink:VkCmdProcessCommandsInfoNVX structure containing parameters + affecting the processing of commands. + +include::../../validity/protos/vkCmdProcessCommandsNVX.txt[] + +// refBegin VkCmdProcessCommandsInfoNVX Structure specifying parameters for the generation of commands + +include::../../api/structs/VkCmdProcessCommandsInfoNVX.txt[] + + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. + * pname:objectTable is the sname:VkObjectTableNVX to be used for the + generation process. + Only registered objects at the time + flink:vkCmdReserveSpaceForCommandsNVX is called, will be taken into + account for the reservation. + * pname:indirectCommandsLayout is the sname:VkIndirectCommandsLayoutNVX + that provides the command sequence to generate. + * pname:indirectCommandsTokenCount defines the number of input tokens + used. + * pname:pIndirectCommandsTokens provides an array of + slink:VkIndirectCommandsTokenNVX that reference the input data for each + token command. + * pname:maxSequencesCount is the maximum number of sequences for which + command buffer space will be reserved. + If pname:sequencesCountBuffer is `NULL`, this is also the actual number + of sequences generated. + * pname:targetCommandBuffer can: be the secondary sname:VkCommandBuffer in + which the commands should be recorded. + If `NULL` an implicit reservation as well as execution takes place on + the processing sname:VkCommandBuffer. + * pname:sequencesCountBuffer can: be sname:VkBuffer from which the actual + amount of sequences is sourced from as ftext:uint32_t value. + * pname:sequencesCountOffset is the byte offset into + pname:sequencesCountBuffer where the count value is stored. + * pname:sequencesIndexBuffer must: be set if + pname:indirectCommandsLayout's + ename:VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT is set and + provides the used sequence indices as ftext:uint32_t array. + Otherwise it must: be `NULL`. + * pname:sequencesIndexOffset is the byte offset into + pname:sequencesIndexBuffer where the index values start. + + +.Valid Usage +**** + * The provided pname:objectTable must: include all objects referenced by + the generation process. + * pname:indirectCommandsTokenCount must match the + pname:indirectCommandsLayout's tokenCount. + * The pname:tokenType member of each entry in the + pname:pIndirectCommandsTokens array must match the values used at + creation time of pname:indirectCommandsLayout + * If pname:targetCommandBuffer is provided, it must have reserved command + space. + * If pname:targetCommandBuffer is provided, the pname:objectTable must: + match the reservation's objectTable and must: have had all referenced + objects registered at reservation time. + * If pname:targetCommandBuffer is provided, the + pname:indirectCommandsLayout must: match the reservation's + indirectCommandsLayout. + * If pname:targetCommandBuffer is provided, the pname:maxSequencesCount + must: not exceed the reservation's maxSequencesCount. + * If pname:sequencesCountBuffer is used, its usage flag must: have + ename:VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT bit set. + * If pname:sequencesCountBuffer is used, pname:sequencesCountOffset must + be aligned to + sname:VkDeviceGeneratedCommandsLimitsNVX::pname:minSequenceCountBufferOffsetAlignment. + * If pname:sequencesIndexBuffer is used, its usage flag must: have + ename:VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT bit set. + * If pname:sequencesIndexBuffer is used, pname:sequencesIndexOffset must + be aligned to + sname:VkDeviceGeneratedCommandsLimitsNVX::pname:minSequenceIndexBufferOffsetAlignment. +**** + +include::../../validity/structs/VkCmdProcessCommandsInfoNVX.txt[] + +Referencing the functions defined in <>, +fname:vkCmdProcessCommandsNVX behaves as: + +[source,c] +--------------------------------------------------- +// For targetCommandBuffers the existing reservedSpace is reset & overwritten. + +VkCommandBuffer cmd = targetCommandBuffer ? + targetCommandBuffer.reservedSpace : + commandBuffer; + +uint32_t sequencesCount = sequencesCountBuffer ? + min(maxSequencesCount, sequencesCountBuffer.load_uint32(sequencesCountOffset) : + maxSequencesCount; + + +cmdProcessAllSequences(cmd, objectTable, + indirectCommandsLayout, pIndirectCommandsTokens, + sequencesCount, + sequencesIndexBuffer, sequencesIndexOffset); + +// The stateful commands within indirectCommandsLayout will not +// affect the state of subsequent commands in the target +// command buffer (cmd) +--------------------------------------------------- + +[NOTE] +.Note +==== +It is important to note that the state that may be affected through +generated commands must be considered undefined for the commands following +them. +It is not possible to setup generated state and provoking work that uses +this state outside of the generated sequence. +==== diff --git a/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/indirectcommands.txt b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/indirectcommands.txt new file mode 100644 index 00000000..e2244849 --- /dev/null +++ b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/indirectcommands.txt @@ -0,0 +1,329 @@ +[[indirectmdslayout]] +== Indirect Commands Layout + +// refBegin VkIndirectCommandsLayoutNVX Opaque handle to an indirect commands layout object + +The device-side command generation happens through an iterative processing +of an atomic sequence comprised of command tokens, which are represented by: + +include::../../api/handles/VkIndirectCommandsLayoutNVX.txt[] + +// refEnd VkIndirectCommandsLayoutNVX + + +=== Tokenized Command Processing + +The processing is in principle illustrated below: + +[source,c] +--------------------------------------------------- +void cmdProcessSequence(cmd, objectTable, indirectCommandsLayout, pIndirectCommandsTokens, s) +{ + for (c = 0; c < indirectCommandsLayout.tokenCount; c++) + { + indirectCommandsLayout.pTokens[c].command (cmd, objectTable, pIndirectCommandsTokens[c], s); + } +} + +void cmdProcessAllSequences(cmd, objectTable, indirectCommandsLayout, pIndirectCommandsTokens, sequencesCount) +{ + for (s = 0; s < sequencesCount; s++) + { + cmdProcessSequence(cmd, objectTable, indirectCommandsLayout, pIndirectCommandsTokens, s); + } +} +--------------------------------------------------- + +The processing of each sequence is considered stateless, therefore all state +changes must occur prior work provoking commands within the sequence. +A single sequence is either strictly targeting +sname:VK_PIPELINE_BIND_POINT_GRAPHICS or +ename:VK_PIPELINE_BIND_POINT_COMPUTE. + +The primary input data for each token is provided through sname:VkBuffer +content at command generation time using flink:vkCmdProcessCommandsNVX, +however some functional arguments, for example binding sets, are specified +at layout creation time. +The input size is different for each token. + +// refBegin VkIndirectCommandsTokenTypeNVX Enum specifying + +The following tokens exist: + +include::../../api/enums/VkIndirectCommandsTokenTypeNVX.txt[] + +.Supported indirect command tokens +[width="80%",cols="67%,33%",options="header",align="center"] +|==== +|Token type | Equivalent command +|ename:VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX | fname:vkCmdBindPipeline +|ename:VK_INDIRECT_COMMANDS_TOKEN_DESCRIPTOR_SET_NVX | fname:vkCmdBindDescriptorSets +|ename:VK_INDIRECT_COMMANDS_TOKEN_INDEX_BUFFER_NVX | fname:vkCmdBindIndexBuffer +|ename:VK_INDIRECT_COMMANDS_TOKEN_VERTEX_BUFFER_NVX | fname:vkCmdBindVertexBuffers +|ename:VK_INDIRECT_COMMANDS_TOKEN_PUSH_CONSTANT_NVX | fname:vkCmdPushConstants +|ename:VK_INDIRECT_COMMANDS_TOKEN_DRAW_INDEXED_NVX | fname:vkCmdDrawIndexedIndirect +|ename:VK_INDIRECT_COMMANDS_TOKEN_DRAW_NVX | fname:vkCmdDrawIndirect +|ename:VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX | fname:vkCmdDispatchIndirect +|==== + +// refBegin VkIndirectCommandsLayoutTokenNVX Struct specifying the details of an indirect command layout token + +The sname:VkIndirectCommandsLayoutTokenNVX structure specifies details to +the function arguments that need to be known at layout creation time: + +include::../../api/structs/VkIndirectCommandsLayoutTokenNVX.txt[] + + * pname:type specifies the token command type. + * pname:bindingUnit has a different meaning depending on the type, please + refer pseudo code further down for details. + * pname:dynamicCount has a different meaning depending on the type, please + refer pseudo code further down for details. + * pname:divisor defines the rate at which the input data buffers are + accessed. + +.Valid Usage +**** + * pname:bindingUnit must stay within device supported limits for the + appropriate commands. + * pname:dynamicCount must stay within device supported limits for the + appropriate commands. + * pname:divisor must greater '0' and power of two. +**** + +include::../../validity/structs/VkIndirectCommandsLayoutTokenNVX.txt[] + +// refBegin VkIndirectCommandsTokenNVX Structure specifying parameters for the reservation of command buffer space + +The sname:VkIndirectCommandsTokenNVX structure specifies the input data for +a token at processing time. + +include::../../api/structs/VkIndirectCommandsTokenNVX.txt[] + + * pname:tokenType specifies the token command type. + * pname:buffer specifies the sname:VkBuffer storing the functional + arguments for each squence. + These argumetns can be written by the device. + * pname:offset specified an offset into pname:buffer where the arguments + start. + +.Valid Usage +**** + * The pname:buffer's usage flag must: have the + ename:VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT bit set. + * The pname:offset must: be aligned to + sname:VkDeviceGeneratedCommandsLimitsNVX::pname:minCommandsTokenBufferOffsetAlignment. +**** + +include::../../validity/structs/VkIndirectCommandsTokenNVX.txt[] + + +The following code provides detailed information on how an individual +sequence is processed: + +[source,c] +--------------------------------------------------- +void cmdProcessSequence(cmd, objectTable, indirectCommandsLayout, pIndirectCommandsTokens, s) +{ + for (uint32_t c = 0; c < indirectCommandsLayout.tokenCount; c++){ + input = pIndirectCommandsTokens[c]; + i = s / indirectCommandsLayout.pTokens[c].divisor; + + switch(input.type){ + VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX: + size_t stride = sizeof(uint32_t); + uint32_t* data = input.buffer.pointer( input.offset + stride * i ); + uint32_t object = data[0]; + + vkCmdBindPipeline(cmd, indirectCommandsLayout.pipelineBindPoint, + objectTable.pipelines[ object ].pipeline); + break; + + VK_INDIRECT_COMMANDS_TOKEN_DESCRIPTOR_SET_NVX: + size_t stride = sizeof(uint32_t) + sizeof(uint32_t) * indirectCommandsLayout.pTokens[c].dynamicCount; + uint32_t* data = input.buffer.pointer( input.offset + stride * i); + uint32_t object = data[0]; + + vkCmdBindDescriptorSets(cmd, indirectCommandsLayout.pipelineBindPoint, + objectTable.descriptorsets[ object ].layout, + indirectCommandsLayout.pTokens[ c ].bindingUnit, + 1, &objectTable.descriptorsets[ object ].descriptorSet, + indirectCommandsLayout.pTokens[ c ].dynamicCount, data + 1); + break; + + VK_INDIRECT_COMMANDS_TOKEN_PUSH_CONSTANT_NVX: + size_t stride = sizeof(uint32_t) + sizeof(uint32_t) * indirectCommandsLayout.pTokens[c].dynamicCount; + uint32_t* data = input.buffer.pointer( input.offset + stride * i ); + uint32_t object = data[0]; + + vkCmdPushConstants(cmd, + objectTable.pushconstants[ object ].layout, + objectTable.pushconstants[ object ].stageFlags, + indirectCommandsLayout.pTokens[ c ].bindingUnit, indirectCommandsLayout.pTokens[c].dynamicCount, data + 1); + break; + + VK_INDIRECT_COMMANDS_TOKEN_INDEX_BUFFER_NVX: + size_t s tride = sizeof(uint32_t) + sizeof(uint32_t) * indirectCommandsLayout.pTokens[c].dynamicCount; + uint32_t* data = input.buffer.pointer( input.offset + stride * i ); + uint32_t object = data[0]; + + vkCmdBindIndexBuffer(cmd, + objectTable.vertexbuffers[ object ].buffer, + indirectCommandsLayout.pTokens[ c ].dynamicCount ? data[1] : 0, + objectTable.vertexbuffers[ object ].indexType); + break; + + VK_INDIRECT_COMMANDS_TOKEN_VERTEX_BUFFER_NVX: + size_t stride = sizeof(uint32_t) + sizeof(uint32_t) * indirectCommandsLayout.pTokens[c].dynamicCount; + uint32_t* data = input.buffer.pointer( input.offset + stride * i ); + uint32_t object = data[0]; + + vkCmdBindVertexBuffers(cmd, + indirectCommandsLayout.pTokens[ c ].bindingUnit, 1, + &objectTable.vertexbuffers[ object ].buffer, + indirectCommandsLayout.pTokens[ c ].dynamicCount ? data + 1 : {0}); // device size handled as uint32_t + break; + + VK_INDIRECT_COMMANDS_TOKEN_DRAW_INDEXED_NVX: + vkCmdDrawIndexedIndirect(cmd, + input.buffer, + sizeof(VkDrawIndexedIndirectCommand) * i + input.offset, 1, 0); + break; + + VK_INDIRECT_COMMANDS_TOKEN_DRAW_NVX: + vkCmdDrawIndirect(cmd, + input.buffer, + sizeof(VkDrawIndirectCommand) * i + input.offset, 1, 0); + break; + + VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX: + vkCmdDispatchIndirect(cmd, + input.buffer, + sizeof(VkDispatchIndirectCommand) * i + input.offset); + break; + } + } +} +--------------------------------------------------- + +=== Creation and Deletion + +Indirect command layouts are created by: + +include::../../api/protos/vkCreateIndirectCommandsLayoutNVX.txt[] + + * pname:device is the logical device that creates the object table. + * pname:pCreateInfo is a pointer to an instance of the + sname:VkObjectTableCreateInfoNVX structure containing parameters + affecting creation of the table. + * pname:pAllocator controls host memory allocation as described in the + <> chapter. + * pname:pIndirectCommandsLayout points to a sname:VkObjectTableNVX handle + in which the resulting object table is returned. + +include::../../validity/protos/vkCreateIndirectCommandsLayoutNVX.txt[] + +// refBegin VkIndirectCommandsLayoutCreateInfoNVX Structure specifying the parameters of a newly created indirect commands layout object + +The sname:VkIndirectCommandsLayoutCreateInfoNVX structure is defined as: + +include::../../api/structs/VkIndirectCommandsLayoutCreateInfoNVX.txt[] + + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. + * pname:pipelineBindPoint is the sname:VkPipelineBindPoint that this + layout targets. + * pname:flags is a bitmask providing usage hints of this layout. + See elink:VkIndirectCommandsLayoutUsageFlagBitsNVX below for a + description of the supported bits. + * pname:tokenCount is the length of the individual command sequnce. + * pname:pTokens is an array describing each command token in detail. + See elink:VkIndirectCommandsTokenTypeNVX and + slink:VkIndirectCommandsLayoutTokenNVX below for details. + +// refBegin VkIndirectCommandsLayoutUsageFlagBitsNVX Bitmask specifying allowed usage of a indirect commands layout + +Bits which can: be set in pname:flags are: + +include::../../api/enums/VkIndirectCommandsLayoutUsageFlagBitsNVX.txt[] + + * ename:VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_NVX + indicates that the processing of sequences can: happen at an + implementation-dependent order, which is not guaranteed to be coherent + across multiple invocations. + * ename:VK_INDIRECT_COMMANDS_LAYOUT_USAGE_SPARSE_SEQUENCES_BIT_NVX + indicates that there is likely a high difference between allocated + number of sequences and actually used. + * ename:VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EMPTY_EXECUTIONS_BIT_NVX + indicates that there is likely many draw or dispatch calls that are + zero-sized (zero grid dimension, no primitives to render). + * ename:VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT_NVX + indicates that the input data for the sequences is not implicitly + indexed from 0..sequencesUsed but a user provided sname:VkBuffer + encoding the index is provided. + +The following code illustrates some of the key flags: + +[source,c] +--------------------------------------------------- +void cmdProcessAllSequences(cmd, objectTable, indirectCommandsLayout, pIndirectCommandsTokens, sequencesCount, indexbuffer, indexbufferoffset) +{ + for (s = 0; s < sequencesCount; s++) + { + sequence = s; + + if (indirectCommandsLayout.flags & VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_NVX) { + sequence = incoherent_implementation_dependent_permutation[ sequence ]; + } + if (indirectCommandsLayout.flags & VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT_NVX) { + sequence = indexbuffer.load_uint32( sequence * sizeof(uint32_t) + indexbufferoffset); + } + + cmdProcessSequence( cmd, objectTable, indirectCommandsLayout, pIndirectCommandsTokens, sequence ); + } +} +--------------------------------------------------- + +.Valid Usage +**** + * pname:tokenCount must: be greater than `0` and below + sname:VkDeviceGeneratedCommandsLimitsNVX::pname:maxIndirectCommandsLayoutTokenCount + * If the + sname:VkDeviceGeneratedCommandsFeaturesNVX::pname:computeBindingPointSupport + feature is not enabled, then pname:pipelineBindPoint must: not be + ename:VK_PIPELINE_BIND_POINT_COMPUTE + * If pname:pTokens contains an entry of + ename:VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX it must: be the first + element of the array and there must: be only a single element of such + token type. + * All state binding tokens in pname:pTokens must: occur prior work + provoking tokens (ename:VK_INDIRECT_COMMANDS_TOKEN_DRAW_NVX, + ename:VK_INDIRECT_COMMANDS_TOKEN_DRAW_INDEXED_NVX, + ename:VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX). + * The content of pname:pTokens must: include at least one work provoking + token. +**** + +include::../../validity/structs/VkIndirectCommandsLayoutCreateInfoNVX.txt[] + +// refBegin vkDestroyIndirectCommandsLayoutNVX Destroy a object table + +Indirect command layouts are destroyed by: + +include::../../api/protos/vkDestroyIndirectCommandsLayoutNVX.txt[] + + * pname:device is the logical device that destroys the layout. + * pname:indirectCommandsLayout is the table to destroy. + * pname:pAllocator controls host memory allocation as described in the + <> chapter. + +.Valid Usage +**** + * All submitted commands that refer to pname:indirectCommandsLayout must: + have completed execution + * If sname:VkAllocationCallbacks were provided when pname:objectTable was + created, a compatible set of callbacks must: be provided here + * If no sname:VkAllocationCallbacks were provided when pname:objectTable + was created, pname:pAllocator must: be `NULL` +**** + +include::../../validity/protos/vkDestroyIndirectCommandsLayoutNVX.txt[] \ No newline at end of file diff --git a/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/objecttable.txt b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/objecttable.txt new file mode 100644 index 00000000..cb9fb4c9 --- /dev/null +++ b/doc/specs/vulkan/chapters/VK_NVX_device_generated_commands/objecttable.txt @@ -0,0 +1,299 @@ +== Binding Object Table + +// refBegin VkObjectTableNVX Opaque handle to an object table + +The device-side bindings are registered inside a table: + +include::../../api/handles/VkObjectTableNVX.txt[] + +// refEnd VkObjectTableNVX + +This is required as the CPU-side object pointers, for example when binding a +sname:VkPipeline or sname:VkDescriptorSet, cannot be used by the device. +The combination of sname:VkObjectTableNVX and ftext:uint32_t table indices +stored inside a sname:VkBuffer serve that purpose during device command +generation. + +At creation time the table is defined with a fixed amount of registration +slots for the individual resource types. +A detailed resource binding can then later be registered via +flink:vkRegisterObjectsNVX at any ftext:uint32_t index below the allocated +maximum. + +=== Table Creation + +To create object tables, call: + +include::../../api/protos/vkCreateObjectTableNVX.txt[] + + * pname:device is the logical device that creates the object table. + * pname:pCreateInfo is a pointer to an instance of the + sname:VkObjectTableCreateInfoNVX structure containing parameters + affecting creation of the table. + * pname:pAllocator controls host memory allocation as described in the + <> chapter. + * pname:pObjectTable points to a sname:VkObjectTableNVX handle in which + the resulting object table is returned. + +include::../../validity/protos/vkCreateObjectTableNVX.txt[] + +// refBegin VkObjectTableCreateInfoNVX Structure specifying the parameters of a newly created object table + +The sname:VkObjectTableCreateInfoNVX structure is defined as: + +include::../../api/structs/VkObjectTableCreateInfoNVX.txt[] + + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. + * pname:objectCount is the number of entry configurations that the object + table supports. + The following array parameters must match the size provided here. + * pname:pObjectEntryTypes is an array of elink:VkObjectEntryTypeNVX + providing the entry type of a given configuration. + * pname:pObjectEntryCounts is an array of counts how many objects can be + registered in the table. + * pname:pObjectEntryUsageFlags is an array of bitmasks describing the + binding usage of the entry. + See elink:VkObjectEntryUsageFlagBitsNVX below for a description of the + supported bits. + * pname:maxUniformBuffersPerDescriptor is the maximum number of + ename:VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER or + ename:VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC used by any single + registered sname:VkDescriptorSet in this table. + * pname:maxStorageBuffersPerDescriptor is the maximum number of + ename:VK_DESCRIPTOR_TYPE_STORAGE_BUFFER or + ename:VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC used by any single + registered sname:VkDescriptorSet in this table. + * pname:maxStorageImagesPerDescriptor is the maximum number of + ename:VK_DESCRIPTOR_TYPE_STORAGE_IMAGE or + ename:VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER used by any single + registered sname:VkDescriptorSet in this table. + * pname:maxSampledImagesPerDescriptor is the maximum number of + ename:VK_DESCRIPTOR_TYPE_SAMPLER, + ename:VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + ename:VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER or + ename:VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT used by any single registered + sname:VkDescriptorSet in this table. + * pname:maxPipelineLayouts is the maximum number of unique + sname:VkPipelineLayout used by any registered sname:VkDescriptorSet or + sname:VkPipeline in this table. + +// refBegin VkObjectEntryTypeNVX Enum specifying object table entry type + +Types which can: be set in pname:pObjectEntryTypes are: + +include::../../api/enums/VkObjectEntryTypeNVX.txt[] + + * ename:VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX indicates a + sname:VkDescriptorSet resource entry that is registered via + sname:VkObjectTableDescriptorSetEntryNVX. + * ename:VK_OBJECT_ENTRY_PIPELINE_NVX indicates a sname:VkPipeline resource + entry that is registered via sname:VkObjectTablePipelineEntryNVX. + * ename:VK_OBJECT_ENTRY_INDEX_BUFFER_NVX indicates a sname:VkBuffer + resource entry that is registered via + sname:VkObjectTableIndexBufferEntryNVX. + * ename:VK_OBJECT_ENTRY_VERTEX_BUFFER_NVX indicates a sname:VkBuffer + resource entry that is registered via + sname:VkObjectTableVertexBufferEntryNVX. + * ename:VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX indicates the resource entry is + registered via sname:VkObjectTablePushConstantEntryNVX. + +// refBegin VkObjectEntryUsageFlagBitsNVX Bitmask specifying allowed usage of an object entry + +Bits which can: be set in pname:pObjectEntryUsageFlags are: + +include::../../api/enums/VkObjectEntryUsageFlagBitsNVX.txt[] + + * ename:VK_OBJECT_ENTRY_USAGE_GRAPHICS_BIT_NVX indicates that the resource + is bound to ename:VK_PIPELINE_BIND_POINT_GRAPHICS + * ename:VK_OBJECT_ENTRY_USAGE_COMPUTE_BIT_NVX indicates that the resource + is bound to ename:VK_PIPELINE_BIND_POINT_COMPUTE + +.Valid Usage +**** + * pname:objectCount must: be greater than `0` + * If the + sname:VkDeviceGeneratedCommandsFeaturesNVX::pname:computeBindingPointSupport + feature is not enabled, pname:pObjectEntryUsageFlags must: not contain + ename:VK_OBJECT_ENTRY_USAGE_COMPUTE_BIT_NVX + * Any value within pname:pObjectEntryCounts must not exceed + sname:VkDeviceGeneratedCommandsLimitsNVX::pname:maxObjectEntryCounts + * pname:maxUniformBuffersPerDescriptor must be within the limits supported + by the device. + * pname:maxStorageBuffersPerDescriptor must be within the limits supported + by the device. + * pname:maxStorageImagesPerDescriptor must be within the limits supported + by the device. + * pname:maxSampledImagesPerDescriptor must be within the limits supported + by the device. +**** + +include::../../validity/structs/VkObjectTableCreateInfoNVX.txt[] + +// refBegin vkDestroyObjectTableNVX Destroy a object table + +To destroy an object table, call: + +include::../../api/protos/vkDestroyObjectTableNVX.txt[] + + * pname:device is the logical device that destroys the table. + * pname:objectTable is the table to destroy. + * pname:pAllocator controls host memory allocation as described in the + <> chapter. + +.Valid Usage +**** + * All submitted commands that refer to pname:objectTable must: have + completed execution. + * If sname:VkAllocationCallbacks were provided when pname:objectTable was + created, a compatible set of callbacks must: be provided here. + * If no sname:VkAllocationCallbacks were provided when pname:objectTable + was created, pname:pAllocator must: be `NULL`. +**** + +include::../../validity/protos/vkDestroyObjectTableNVX.txt[] + +=== Registering Objects + +Resource bindings of Vulkan objects are registered at an arbitrary +ftext:uint32_t index within an object table. +As long as the object table references such objects, they must not be +deleted. + +include::../../api/protos/vkRegisterObjectsNVX.txt[] + + * pname:device is the logical device that creates the object table. + * pname:objectTable is the table for which the resources are registered. + * pname:objectCount is the number of resources to register. + * pname:ppObjectTableEntries provides an array for detailed binding + informations, each array element is a pointer to a struct of type + sname:VkObjectTablePipelineEntryNVX, + sname:VkObjectTableDescriptorSetEntryNVX, + sname:VkObjectTableVertexBufferEntryNVX, + sname:VkObjectTableIndexBufferEntryNVX or + sname:VkObjectTablePushConstantEntryNVX (see below for details). + * pname:pObjectIndices are the indices at which each resource is + registered. + +.Valid Usage +**** + * The contents of pname:pObjectTableEntry must yield plausible bindings + supported by the device. + * At any pname:pObjectIndices there must: not be a registered resource + already. + * Any value inside pname:pObjectIndices must: be below the appropriate + sname:VkObjectTableCreateInfoNVX::pname:pObjectEntryCounts limits + provided at pname:objectTable creation time. +**** + +include::../../validity/protos/vkRegisterObjectsNVX.txt[] + +Common to all resource entries are: + +include::../../api/structs/VkObjectTableEntryNVX.txt[] + + * pname:type defines the entry type + * pname:flags defines which sname:VkPipelineBindPoint the resource can be + used with. + Some entry types allow only a single flag to be set. + +.Valid Usage +**** + * If the + sname:VkDeviceGeneratedCommandsFeaturesNVX::pname:computeBindingPointSupport + feature is not enabled, pname:flags must: not contain + ename:VK_OBJECT_ENTRY_USAGE_COMPUTE_BIT_NVX +**** + +include::../../validity/structs/VkObjectTableEntryNVX.txt[] + +include::../../api/structs/VkObjectTablePipelineEntryNVX.txt[] + + * pname:pipeline specifies the sname:VkPipeline that this resource entry + references. + +.Valid Usage +**** + * pname:type must be ename:VK_OBJECT_ENTRY_PIPELINE_NVX +**** + +include::../../validity/structs/VkObjectTablePipelineEntryNVX.txt[] + +include::../../api/structs/VkObjectTableDescriptorSetEntryNVX.txt[] + + * pname:layout specifies the sname:VkPipelineLayout that the + pname:descriptorSet is used with. + * pname:descriptorSet specifies the sname:VkDescriptorSet that can be + bound with this entry. + +.Valid Usage +**** + * pname:type must be ename:VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX +**** + +include::../../validity/structs/VkObjectTableDescriptorSetEntryNVX.txt[] + +include::../../api/structs/VkObjectTableVertexBufferEntryNVX.txt[] + + * pname:buffer specifies the sname:VkBuffer that can be bound as vertex + bufer + +.Valid Usage +**** + * pname:type must be ename:VK_OBJECT_ENTRY_VERTEX_BUFFER_NVX +**** + +include::../../validity/structs/VkObjectTableVertexBufferEntryNVX.txt[] + +include::../../api/structs/VkObjectTableIndexBufferEntryNVX.txt[] + + * pname:buffer specifies the sname:VkBuffer that can be bound as index + buffer + * pname:indexType specifies the sname:VkIndexType used with this index + buffer + +.Valid Usage +**** + * pname:type must be ename:VK_OBJECT_ENTRY_INDEX_BUFFER_NVX +**** + +include::../../validity/structs/VkObjectTableIndexBufferEntryNVX.txt[] + +include::../../api/structs/VkObjectTablePushConstantEntryNVX.txt[] + + * pname:layout specifies the sname:VkPipelineLayout that pushconstants + using this pname:objectIndex are used with. + * pname:descriptorSet stageFlags the sname:VkShaderStageFlags that + pushconstants using this pname:objectIndex are used with. + +.Valid Usage +**** + * pname:type must be ename:VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX +**** + +include::../../validity/structs/VkObjectTablePushConstantEntryNVX.txt[] + +Using the following command to unregister resources from an object table: + +include::../../api/protos/vkUnregisterObjectsNVX.txt[] + + * pname:device is the logical device that creates the object table. + * pname:objectTable is the table from which the resources are + unregistered. + * pname:objectCount is the number of resources being removed from the + object table. + * pname:pObjectEntryType provides an array of sname:VkObjectEntryTypeNVX + for the resources being removed. + * pname:pObjectIndices provides the array of object indices to be removed. + +.Valid Usage +**** + * At any pname:pObjectIndices there must: be a registered resource + already. + * The pname:pObjectEntryTypes of the resource at pname:pObjectIndices + must: match. + * All operations on the device using the registered resource must have + been completed. +**** + +include::../../validity/protos/vkUnregisterObjectsNVX.txt[] \ No newline at end of file diff --git a/doc/specs/vulkan/chapters/VK_NV_external_memory/allocate_memory.txt b/doc/specs/vulkan/chapters/VK_NV_external_memory/allocate_memory.txt index 330a49d1..558f7f23 100644 --- a/doc/specs/vulkan/chapters/VK_NV_external_memory/allocate_memory.txt +++ b/doc/specs/vulkan/chapters/VK_NV_external_memory/allocate_memory.txt @@ -3,7 +3,7 @@ instance, add a slink:VkExportMemoryAllocateInfoNV structure to the pname:pNext chain of the slink:VkMemoryAllocateInfo structure, specifying the handle types that may: be exported. -The slink:VkMemoryAllocateInfo structure is defined as: +The slink:VkExportMemoryAllocateInfoNV structure is defined as: include::../../api/structs/VkExportMemoryAllocateInfoNV.txt[] diff --git a/doc/specs/vulkan/chapters/cmdbuffers.txt b/doc/specs/vulkan/chapters/cmdbuffers.txt index 3f8fbaee..d0b523f7 100644 --- a/doc/specs/vulkan/chapters/cmdbuffers.txt +++ b/doc/specs/vulkan/chapters/cmdbuffers.txt @@ -523,6 +523,11 @@ Once recording starts, an application records a sequence of commands (ftext:vkCmd*) to set state in the command buffer, draw, dispatch, and other commands. +ifdef::VK_NVX_device_generated_commands[] +Several commands can also be recorded indirectly from sname:VkBuffer +content, see <>. +endif::VK_NVX_device_generated_commands[] + // refBegin vkEndCommandBuffer Finish recording a command buffer To complete recording of a command buffer, call: @@ -581,6 +586,8 @@ possible. fname:vkQueueSubmit is a <>, with each batch defined by an element of pname:pSubmits as an instance of the slink:VkSubmitInfo structure. +Batches begin execution in the order they appear in pname:pSubmits, but may: +complete out of order. Fence and semaphore operations submitted with flink:vkQueueSubmit have additional ordering constraints compared to other submission commands, with @@ -600,6 +607,11 @@ operation>> section of <>. * If pname:fence is not dlink:VK_NULL_HANDLE, pname:fence must: not be associated with any other queue command that has not yet completed execution on that queue + * Any calls to flink:vkCmdSetEvent, flink:vkCmdResetEvent or + flink:vkCmdWaitEvents that have been recorded into any of the command + buffer elements of the pname:pCommandBuffers member of any element of + pname:pSubmits, must: not reference any slink:VkEvent that is referenced + by any of those commands that is pending execution on another queue. **** include::../validity/protos/vkQueueSubmit.txt[] @@ -764,8 +776,9 @@ ename:VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT. in * If fname:vkCmdExecuteCommands is being called within a render pass instance, any given element of pname:pCommandBuffers must: have been - recorded with a render pass that is compatible with the current render - pass - see <> + recorded with slink:VkCommandBufferInheritanceInfo::pname:renderPass set + to a render pass that is <> with + the current render pass. * If fname:vkCmdExecuteCommands is being called within a render pass instance, and any given element of pname:pCommandBuffers was recorded with sname:VkCommandBufferInheritanceInfo::pname:framebuffer not equal diff --git a/doc/specs/vulkan/chapters/fundamentals.txt b/doc/specs/vulkan/chapters/fundamentals.txt index 66239ca4..7cf3e8a3 100644 --- a/doc/specs/vulkan/chapters/fundamentals.txt +++ b/doc/specs/vulkan/chapters/fundamentals.txt @@ -159,12 +159,12 @@ individual batches can be expressed with Before a fence or semaphore is signaled, it is guaranteed that any previously submitted queue operations have completed execution, and that memory writes from those queue operations are -<> -to future queue operations. +<> to future +queue operations. Waiting on a signaled semaphore or fence guarantees that previous writes that are available are also -<> -to subsequent commands. +<> to subsequent +commands. Command buffer boundaries, both between primary command buffers of the same or different batches or submissions as well as between primary and secondary @@ -211,13 +211,13 @@ In general, action commands are those commands that alter framebuffer attachments, read/write buffer or image memory, or write to query pools. Synchronization commands introduce explicit -<> between two sets of action commands, where the second set of -commands depends on the first set of commands. +<> between +two sets of action commands, where the second set of commands depends on the +first set of commands. These dependencies enforce that both the execution of certain -<> in the later set -occur after the execution of certain stages in the source set, and that the -effects of <> +<> in the later set occur +after the execution of certain stages in the source set, and that the +effects of <> performed by certain pipeline stages occur in order and are visible to each other. When not enforced by an explicit dependency or otherwise forbidden by the @@ -347,6 +347,10 @@ buffers using the object are recording or pending execution: * sname:VkCommandPool * sname:VkDeviceMemory * sname:VkDescriptorSet +ifdef::VK_NVX_device_generated_commands[] + * sname:VkObjectTableNVX + * sname:VkIndirectCommandsLayout +endif::VK_NVX_device_generated_commands[] The following Vulkan objects must: not be destroyed while any queue is executing commands that use the object: diff --git a/doc/specs/vulkan/chapters/memory.txt b/doc/specs/vulkan/chapters/memory.txt index 6888d967..a7b419b8 100644 --- a/doc/specs/vulkan/chapters/memory.txt +++ b/doc/specs/vulkan/chapters/memory.txt @@ -876,8 +876,8 @@ The application must: guarantee that any previously submitted command that writes to this range has completed before the host reads from or writes to that range, and that any previously submitted command that reads from that range has completed before the host writes to that region (see -<> for details on fulfilling such -a guarantee). +<> for details on fulfilling +such a guarantee). If the device memory was allocated without the ename:VK_MEMORY_PROPERTY_HOST_COHERENT_BIT set, these guarantees must: be made for an extended range: the application must: round down the start of @@ -944,6 +944,14 @@ It must: be called after the host writes to non-coherent memory have completed and before command buffers that will read or write any of those memory locations are submitted to a queue. +[NOTE] +.Note +==== +Unmapping non-coherent memory does not implicitly flush the mapped memory, +and host writes that have not been flushed may: not ever be visible to the +device. +==== + include::../validity/protos/vkFlushMappedMemoryRanges.txt[] // refBegin vkInvalidateMappedMemoryRanges Invalidate ranges of mapped memory objects @@ -966,6 +974,14 @@ write any of those locations. If a range of non-coherent memory is written by the host and then invalidated without first being flushed, its contents are undefined. +[NOTE] +.Note +==== +Mapping non-coherent memory does not implicitly invalidate the mapped +memory, and device writes that have not been invalidated must: be made +visible before the host reads or overwrites them. +==== + include::../validity/protos/vkInvalidateMappedMemoryRanges.txt[] // refBegin VkMappedMemoryRange Structure specifying a mapped memory range @@ -1004,8 +1020,7 @@ ifdef::editing-notes[] .editing-note ==== TODO (Tobias) - There's a circular section reference between this next -section and the <>. +section and the <>. The information is all covered by both places, but it seems a bit weird to have them reference each other. Not sure how to resolve it. @@ -1020,15 +1035,15 @@ management operations to achieve coherency. For host writes to be seen by subsequent command buffer operations, a pipeline barrier from a source of ename:VK_ACCESS_HOST_WRITE_BIT and ename:VK_PIPELINE_STAGE_HOST_BIT to a destination of the relevant device -pipeline stages and access types must: be performed. +<> and +<> must: be performed. Note that such a barrier is performed -<> upon each -command buffer submission, so an explicit barrier is only rarely needed -(e.g. if a command buffer waits upon an event signaled by the host, where -the host wrote some data after submission). -For device writes to be seen by subsequent host reads, a pipeline barrier is -required: to <>. +<> upon each command +buffer submission, so an explicit barrier is only rarely needed (e.g. if a +command buffer waits upon an event signaled by the host, where the host +wrote some data after submission). +A pipeline barrier is required: to make writes visible to subsequent reads +on the host. // refBegin vkUnmapMemory Unmap a previously mapped memory object diff --git a/doc/specs/vulkan/chapters/primsrast.txt b/doc/specs/vulkan/chapters/primsrast.txt index 7679820f..240394d8 100644 --- a/doc/specs/vulkan/chapters/primsrast.txt +++ b/doc/specs/vulkan/chapters/primsrast.txt @@ -172,23 +172,27 @@ that sample location: * occlusion queries * blending, logic op and color write -ifndef::VK_AMD_rasterization_order[] -Rasterization order must: follow <>. -endif::VK_AMD_rasterization_order[] +Each of these operations is atomically executed for each primitive and +sample location. +Execution of these operations for each primitive in a subpass occurs in +ifndef::VK_AMD_rasterization_order[] +<>. +endif::VK_AMD_rasterization_order[] ifdef::VK_AMD_rasterization_order[] +an order determined by the application. + The application can: select a graphics pipeline to use one of the following primitive rasterization ordering rules: include::../api/enums/VkRasterizationOrderAMD.txt[] - * ename:VK_RASTERIZATION_ORDER_STRICT_AMD indicates that primitive - rasterization must: follow <>. - * ename:VK_RASTERIZATION_ORDER_RELAXED_AMD indicates that primitive - rasterization may: not follow <>. + * ename:VK_RASTERIZATION_ORDER_STRICT_AMD indicates that the order of + these operations for each primitive in a subpass must: occur in + <>. + * ename:VK_RASTERIZATION_ORDER_RELAXED_AMD indicates that the order of + these operations for each primitive in a subpass may: not occur in + <>. The rasterization order to use for a graphics pipeline is specified by chaining a sname:VkPipelineRasterizationStateRasterizationOrderAMD structure diff --git a/doc/specs/vulkan/chapters/renderpass.txt b/doc/specs/vulkan/chapters/renderpass.txt index 72012443..30d76ece 100644 --- a/doc/specs/vulkan/chapters/renderpass.txt +++ b/doc/specs/vulkan/chapters/renderpass.txt @@ -29,20 +29,23 @@ instance. A _subpass description_ describes the subset of attachments that is involved in the execution of a subpass. Each subpass can: read from some attachments as _input attachments_, write -to some as _color attachments_ or _depth/stencil attachments_, and do -resolve operations to others as _resolve attachments_. +to some as _color attachments_ or _depth/stencil attachments_, and perform +_multisample resolve operations_ to _resolve attachments_. A subpass description can: also include a set of _preserve attachments_, which are attachments that are not read or written by the subpass but whose contents must: be preserved throughout the subpass. -A subpass _uses_ an attachment if the attachment is a color, depth/stencil, -resolve, or input attachment for that subpass. +A subpass _uses an attachment_ if the attachment is a color, depth/stencil, +resolve, or input attachment for that subpass (as determined by the +pname:pColorAttachments, pname:pDepthStencilAttachment, +pname:pResolveAttachments, and pname:pInputAttachments members of +slink:VkSubpassDescription, respectively). A subpass does not use an attachment if that attachment is preserved by the subpass. -The first use of an attachment is in the lowest numbered subpass that uses +The _first use of an attachment_ is in the lowest numbered subpass that uses that attachment. -Similarly, the last use of an attachment is in the highest numbered subpass -that uses that attachment. +Similarly, the _last use of an attachment_ is in the highest numbered +subpass that uses that attachment. The subpasses in a render pass all render to the same dimensions, and fragments for pixel (x,y,layer) in one subpass can: only read attachment @@ -62,24 +65,16 @@ However, it is also quite common for a render pass to only contain a single subpass. ==== -_Subpass dependencies_ describe ordering restrictions between pairs of -subpasses. -If no dependencies are specified, implementations may: reorder or overlap -portions (e.g., certain shader stages) of the execution of subpasses. -Dependencies limit the extent of overlap or reordering, and are defined -using masks of pipeline stages and memory access types. -Each dependency acts as an -<>, similarly to how <> are defined. -Dependencies are needed if two subpasses operate on attachments with -overlapping ranges of the same sname:VkDeviceMemory object and at least one -subpass writes to that range. +_Subpass dependencies_ describe <> between subpasses. A _subpass dependency chain_ is a sequence of subpass dependencies in a render pass, where the source subpass of each subpass dependency (after the first) equals the destination subpass of the previous dependency. +Execution of subpasses may: overlap or execute out of order with regards to +other subpasses, unless otherwise enforced by an execution dependency. + A render pass describes the structure of subpasses and attachments independent of any specific image views for the attachments. The specific image views that will be used for the attachments, and their @@ -173,6 +168,11 @@ include::../api/structs/VkRenderPassCreateInfo.txt[] * The value of any element of the pname:pPreserveAttachments member in any given element of pname:pSubpasses must: not be ename:VK_ATTACHMENT_UNUSED + * For any member of pname:pAttachments with a pname:loadOp equal to + ename:VK_ATTACHMENT_LOAD_OP_CLEAR, the first use of that attachment + must: not specify a pname:layout equal to + pname:VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL or + pname:VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL. **** include::../validity/structs/VkRenderPassCreateInfo.txt[] @@ -203,14 +203,26 @@ include::../api/enums/VkAttachmentDescriptionFlagBits.txt[] // refBegin VkAttachmentLoadOp Specify how contents of an attachment are treated at the beginning of a subpass include::../api/enums/VkAttachmentLoadOp.txt[] -- - ** ename:VK_ATTACHMENT_LOAD_OP_LOAD means the contents within the render - area will be preserved. + ** ename:VK_ATTACHMENT_LOAD_OP_LOAD means the previous contents of the + image within the render area will be preserved. + For attachments with a depth/stencil format, this uses the access type + ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT. + For attachments with a color format, this uses the access type + ename:VK_ACCESS_COLOR_ATTACHMENT_READ_BIT. ** ename:VK_ATTACHMENT_LOAD_OP_CLEAR means the contents within the render area will be cleared to a uniform value, which is specified when a render pass instance is begun. - ** ename:VK_ATTACHMENT_LOAD_OP_DONT_CARE means the contents within the - area need not be preserved; the contents of the attachment will be - undefined inside the render area. + For attachments with a depth/stencil format, this uses the access type + ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT. + For attachments with a color format, this uses the access type + ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT. + ** ename:VK_ATTACHMENT_LOAD_OP_DONT_CARE means the previous contents + within the area need not be preserved; the contents of the attachment + will be undefined inside the render area. + For attachments with a depth/stencil format, this uses the access type + ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT. + For attachments with a color format, this uses the access type + ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT. * pname:storeOp specifies how the contents of color and depth components of the attachment are treated at the end of the subpass where it is last used: @@ -219,15 +231,19 @@ include::../api/enums/VkAttachmentLoadOp.txt[] // refBegin VkAttachmentStoreOp Specify how contents of an attachment are treated at the end of a subpass include::../api/enums/VkAttachmentStoreOp.txt[] -- - ** ename:VK_ATTACHMENT_STORE_OP_STORE means the contents within the render - area are written to memory and will be available for reading after the - render pass instance completes once the writes have been synchronized - with ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT (for color attachments) - or ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT (for - depth/stencil attachments). + ** ename:VK_ATTACHMENT_STORE_OP_STORE means the contents generated during + the render pass and within the render area are written to memory. + For attachments with a depth/stencil format, this uses the access type + ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT. + For attachments with a color format, this uses the access type + ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT. ** ename:VK_ATTACHMENT_STORE_OP_DONT_CARE means the contents within the render area are not needed after rendering, and may: be discarded; the contents of the attachment will be undefined inside the render area. + For attachments with a depth/stencil format, this uses the access type + ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT. + For attachments with a color format, this uses the access type + ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT. * pname:stencilLoadOp specifies how the contents of stencil components of the attachment are treated at the beginning of the subpass where it is first used, and must: be one of the same values allowed for pname:loadOp @@ -243,11 +259,35 @@ include::../api/enums/VkAttachmentStoreOp.txt[] During a render pass instance, an attachment can: use a different layout in each subpass, if desired. +[[renderpass-load-store-ops]] If the attachment uses a color format, then pname:loadOp and pname:storeOp are used, and pname:stencilLoadOp and pname:stencilStoreOp are ignored. If the format has depth and/or stencil components, pname:loadOp and pname:storeOp apply only to the depth data, while pname:stencilLoadOp and pname:stencilStoreOp define how the stencil data is handled. +pname:loadOp and pname:stencilLoadOp define the _load operations_ that +execute as part of the first subpass that uses the attachment. +pname:storeOp and pname:stencilStoreOp define the _store operations_ that +execute as part of the last subpass that uses the attachment. + +The load operation for each value in an attachment used by a subpass +happens-before any command recorded into that subpass reads from that value. +Load operations for attachments with a depth/stencil format execute in the +ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT pipeline stage. +Load operations for attachments with a color format execute in the +ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage. + +Store operations for each value in an attachment used by a subpass +happen-after any command recorded into that subpass writes to that value. +Store operations for attachments with a depth/stencil format execute in the +ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT pipeline stage. +Store operations for attachments with a color format execute in the +ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage. + +If an attachment is not used by any subpass, then pname:loadOp, +pname:storeOp, pname:stencilStoreOp, and pname:stencilLoadOp are ignored, +and the attachment's memory contents will not be modified by execution of a +render pass instance. [[renderpass-precision]] During a render pass instance, input/color attachments with color formats @@ -259,7 +299,7 @@ attachment format, but must: be represented with the same range. When such a component is loaded via the pname:loadOp, it will be converted into an implementation-dependent format used by the render pass. Such components must: be converted from the render pass format, to the -format of the attachment, before they are stored or resolved at the end of a +format of the attachment, before they are resolved or stored at the end of a render pass instance via pname:storeOp. Conversions occur as described in <> and <>. +The precise set of valid scenarios is described in more detail +<>. If a set of attachments alias each other, then all except the first to be used in the render pass must: use an pname:initialLayout of @@ -335,17 +379,14 @@ the first alias must: not be used in any later subpasses. However, an application can: assign the same image view to multiple aliasing attachment indices, which allows that image view to be used multiple times even if other aliases are used in between. -Once an attachment needs the ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT -bit, there should: be no additional cost of introducing additional aliases, -and using these additional aliases may: allow more efficient clearing of the -attachments on multiple uses via ename:VK_ATTACHMENT_LOAD_OP_CLEAR. [NOTE] .Note ==== -The exact set of attachment indices that alias with each other is not known -until a framebuffer is created using the render pass, so the above -conditions cannot: be validated at render pass creation time. +Once an attachment needs the ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT +bit, there should: be no additional cost of introducing additional aliases, +and using these additional aliases may: allow more efficient clearing of the +attachments on multiple uses via ename:VK_ATTACHMENT_LOAD_OP_CLEAR. ==== // refBegin VkSubpassDescription Structure specifying a subpass description @@ -383,14 +424,15 @@ include::../api/structs/VkSubpassDescription.txt[] pname:colorAttachmentCount slink:VkAttachmentReference structures that lists which of the render pass's attachments are resolved to at the end of the subpass, and what layout each attachment will be in during the - resolve. + multisample resolve operation. If pname:pResolveAttachments is not `NULL`, each of its elements corresponds to a color attachment (the element in - pname:pColorAttachments at the same index). - At the end of each subpass, the subpass's color attachments are resolved - to corresponding resolve attachments, unless the resolve attachment - index is ename:VK_ATTACHMENT_UNUSED or pname:pResolveAttachments is - `NULL`. + pname:pColorAttachments at the same index), and a multisample resolve + operation is defined for each attachment. + At the end of each subpass, multisample resolve operations read the + subpass's color attachments, and resolve the samples for each pixel to + the same pixel location in the corresponding resolve attachments, unless + the resolve attachment index is ename:VK_ATTACHMENT_UNUSED. If the first use of an attachment in a render pass is as a resolve attachment, then the pname:loadOp is effectively ignored as the resolve is guaranteed to overwrite all pixels in the render area. @@ -407,19 +449,20 @@ include::../api/structs/VkSubpassDescription.txt[] subpass. The contents of an attachment within the render area become undefined at the -start of a subpass S if all of the following conditions are true: +start of a subpass *S* if all of the following conditions are true: * The attachment is used as a color, depth/stencil, or resolve attachment in any subpass in the render pass. - * There is a subpass S1 that uses or preserves the attachment, and a - subpass dependency from S1 to S. - * The attachment is not used or preserved in subpass S. + * There is a subpass *S~1~* that uses or preserves the attachment, and a + subpass dependency from *S~1~* to *S*. + * The attachment is not used or preserved in subpass *S*. -Once the contents of an attachment become undefined in subpass S, they +Once the contents of an attachment become undefined in subpass *S*, they remain undefined for subpasses in subpass dependency chains starting with -subpass S until they are written again. +subpass *S* until they are written again. However, they remain valid for subpasses in other subpass dependency chains -starting with subpass S1 if those subpasses use or preserve the attachment. +starting with subpass *S~1~* if those subpasses use or preserve the +attachment. .Valid Usage **** @@ -470,8 +513,6 @@ include::../api/structs/VkAttachmentReference.txt[] ename:VK_ATTACHMENT_UNUSED, then no writes occur for those attachments. * pname:layout is a elink:VkImageLayout value specifying the layout the attachment uses during the subpass. - The implementation will automatically perform layout transitions as - needed between subpasses to make each subpass use the requested layouts. .Valid Usage **** @@ -487,58 +528,70 @@ The sname:VkSubpassDependency structure is defined as: include::../api/structs/VkSubpassDependency.txt[] - * pname:srcSubpass and pname:dstSubpass are the subpass indices of the - producer and consumer subpasses, respectively. - pname:srcSubpass and pname:dstSubpass can: also have the special value - ename:VK_SUBPASS_EXTERNAL. - The source subpass must: always be a lower numbered subpass than the - destination subpass (excluding external subpasses and - <>), so that the order of subpass descriptions is a - valid execution ordering, avoiding cycles in the dependency graph. - * pname:srcStageMask, pname:dstStageMask, pname:srcAccessMask, - pname:dstAccessMask, and pname:dependencyFlags describe an - <> between subpasses. - The bits that can: be included in pname:dependencyFlags are: -+ --- -// refBegin VkDependencyFlagBits Bitmask specifying dependencies between subpasses -include::../api/enums/VkDependencyFlagBits.txt[] --- - ** If pname:dependencyFlags contains ename:VK_DEPENDENCY_BY_REGION_BIT, - then the dependency is by-region as defined in - <>. + * pname:srcSubpass is the subpass index of the first subpass in the + dependency, or ename:VK_SUBPASS_EXTERNAL. + * pname:dstSubpass is the subpass index of the second subpass in the + dependency, or ename:VK_SUBPASS_EXTERNAL. + * pname:srcStageMask defines a <>. + * pname:dstStageMask defines a <>. + * pname:srcAccessMask defines a <>. + * pname:dstAccessMask defines a <>. + * pname:dependencyFlags is a bitmask of elink:VkDependencyFlagBits. -Each subpass dependency defines an execution and memory dependency between -two sets of commands, with the second set depending on the first set. -When pname:srcSubpass does not equal pname:dstSubpass then the first set of -commands is: +If pname:srcSubpass is equal to pname:dstSubpass then the +slink:VkSubpassDependency describes a +<>, and only constrains the pipeline barriers allowed within +a subpass instance. +Otherwise, when a render pass instance which includes a subpass dependency +is submitted to a queue, it defines a memory dependency between the +subpasses identified by pname:srcSubpass and pname:dstSubpass. - * All commands in the subpass indicated by pname:srcSubpass, if - pname:srcSubpass is not ename:VK_SUBPASS_EXTERNAL. - * All commands before the render pass instance, if pname:srcSubpass is - ename:VK_SUBPASS_EXTERNAL. +If pname:srcSubpass is equal to ename:VK_SUBPASS_EXTERNAL, the first +<> includes +commands submitted to the queue before the render pass instance began. +Otherwise, the first set of commands includes all commands submitted as part +of the subpass instance identified by pname:srcSubpass and any load, store +or multisample resolve operations on attachments used in pname:srcSubpass. +In either case, the first synchronization scope is limited to operations on +the pipeline stages determined by the +<> specified by +pname:srcStageMask. -While the corresponding second set of commands is: +If pname:dstSubpass is equal to ename:VK_SUBPASS_EXTERNAL, the second +<> includes +commands submitted after the render pass instance is ended. +Otherwise, the second set of commands includes all commands submitted as +part of the subpass instance identified by pname:dstSubpass and any load, +store or multisample resolve operations on attachments used in +pname:dstSubpass. +In either case, the second synchronization scope is limited to operations on +the pipeline stages determined by the +<> specified +by pname:dstStageMask. - * All commands in the subpass indicated by pname:dstSubpass, if - pname:dstSubpass is not ename:VK_SUBPASS_EXTERNAL. - * All commands after the render pass instance, if pname:dstSubpass is - ename:VK_SUBPASS_EXTERNAL. +The first <> is +limited to access in the pipeline stages determined by the +<> specified by +pname:srcStageMask. +It is also limited to access types in the <> specified by pname:srcAccessMask. -When pname:srcSubpass equals pname:dstSubpass then the first set consists of -commands in the subpass before a call to flink:vkCmdPipelineBarrier and the -second set consists of commands in the subpass following that same call as -described in the -<> section. +The second <> is +limited to access in the pipeline stages determined by the +<> specified +by pname:dstStageMask. +It is also limited to access types in the <> specified by pname:dstAccessMask. -The pname:srcStageMask, pname:dstStageMask, pname:srcAccessMask, -pname:dstAccessMask, and pname:dependencyFlags parameters of the dependency -are interpreted the same way as for other dependencies, as described in -<>. +The <> defined by a subpass dependency affect the execution +of <> within the +render pass. .Valid Usage **** @@ -576,196 +629,178 @@ are interpreted the same way as for other dependencies, as described in ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, or ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT - * If pname:srcSubpass is equal to pname:dstSubpass, the highest bit value - included in pname:srcStageMask must: be less than or equal to the lowest - bit value in pname:dstStageMask + * If pname:srcSubpass is equal to pname:dstSubpass, the + <> pipeline + stage in pname:srcStageMask must: be + <> than or + equal to the <> pipeline stage in pname:dstStageMask **** include::../validity/structs/VkSubpassDependency.txt[] -Automatic image layout transitions between subpasses also interact with the -subpass dependencies. -If two subpasses are connected by a dependency and those two subpasses use -the same attachment in a different layout, then the layout transition will -occur after the memory accesses via pname:srcAccessMask have completed in -all pipeline stages included in pname:srcStageMask in the source subpass, -and before any memory accesses via pname:dstAccessMask occur in any pipeline -stages included in pname:dstStageMask in the destination subpass. - -The automatic image layout transitions from pname:initialLayout to the first -used layout (if it is different) are performed according to the following -rules: - - * If the attachment does not include the - ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT bit and there is no - subpass dependency from ename:VK_SUBPASS_EXTERNAL to the first subpass - that uses the attachment, then it is as if there were such a dependency - with pname:srcStageMask = pname:srcAccessMask = 0 and pname:dstStageMask - and pname:dstAccessMask including all relevant bits (all graphics - pipeline stages and all access types that use image resources), with the - transition executing as part of that dependency. - In other words, it may: overlap work before the render pass instance and - is complete before the subpass begins. - * If the attachment does not include the - ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT bit and there is a subpass - dependency from ename:VK_SUBPASS_EXTERNAL to the first subpass that uses - the attachment, then the transition executes as part of that dependency - and according to its stage and access masks. - It must: not overlap work that came before the render pass instance that - is included in the source masks, but it may: overlap work in previous - subpasses. - * If the attachment includes the - ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT bit, then the transition - executes according to all the subpass dependencies with pname:dstSubpass - equal to the first subpass index that the attachment is used in. - That is, it occurs after all memory accesses in the source stages and - masks from all the source subpasses have completed and are available, - and before the union of all the destination stages begin, and the new - layout is visible to the union of all the destination access types. - If there are no incoming subpass dependencies, then this case follows - the first rule. - -Similar rules apply for the transition to the pname:finalLayout, using -dependencies with pname:dstSubpass equal to ename:VK_SUBPASS_EXTERNAL - -If an attachment specifies the ename:VK_ATTACHMENT_LOAD_OP_CLEAR load -operation, then it will logically be cleared at the start of the first -subpass where it is used. - +ifdef::editing-notes[] [NOTE] -.Note +.editing-note ==== -Implementations may: move clears earlier as long as it does not affect the -operation of a render pass instance. -For example, an implementation may: choose to clear all attachments at the -start of the render pass instance. -If an attachment has the ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT flag -set, then the clear must: occur at the start of subpass where the attachment -is first used, in order to preserve the operation of the render pass -instance. +The following two alleged implicit dependencies are practically no-ops, as +the operations they describe are already guaranteed by semaphores and +submission order (so they're almost entirely no-ops on their own). +The *only* reason they exist is because it simplifies reasoning about where +<> happen. +Further rewrites of this chapter could potentially remove the need for +these. ==== +endif::editing-notes[] -The first use of an attachment must: not specify a layout equal to -ename:VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL or -ename:VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL if the attachment specifies -that the pname:loadOp is ename:VK_ATTACHMENT_LOAD_OP_CLEAR. -If a subpass uses the same attachment as both an input attachment and either -a color attachment or a depth/stencil attachment, then both uses must: -observe the result of the clear. +If there is no subpass dependency from ename:VK_SUBPASS_EXTERNAL to the +first subpass that uses an attachment, then an implicit subpass dependency +exists from ename:VK_SUBPASS_EXTERNAL to the first subpass it is used in. +The subpass dependency operates as if defined with the following parameters: -Similarly, if an attachment specifies that the pname:storeOp is -ename:VK_ATTACHMENT_STORE_OP_STORE, then it will logically be stored at the -end of the last subpass where it is used. +[source,c] +---- +VkSubpassDependency implicitDependency = { + .srcSubpass = VK_SUBPASS_EXTERNAL; + .dstSubpass = firstSubpass; // First subpass attachment is used in + .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + .srcAccessMask = 0; + .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + .dependencyFlags = 0; +}; +---- -[NOTE] -.Note -==== -Implementations may: move stores later as long as it does not affect the -operation of a render pass instance. -If an attachment has the ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT flag -set, then the store must: occur at the end of the highest numbered subpass -that uses the attachment. -==== +Similarly, if there is no subpass dependency from the last subpass that uses +an attachment to ename:VK_SUBPASS_EXTERNAL, then an implicit subpass +dependency exists from the last subpass it is used in to +ename:VK_SUBPASS_EXTERNAL. +The subpass dependency operates as if defined with the following parameters: -If an attachment is not used by any subpass, then the pname:loadOp and the -pname:storeOp are ignored and the attachment's memory contents will not be -modified by execution of a render pass instance. +[source,c] +---- +VkSubpassDependency implicitDependency = { + .srcSubpass = lastSubpass; // Last subpass attachment is used in + .dstSubpass = VK_SUBPASS_EXTERNAL; + .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + .dstAccessMask = 0; + .dependencyFlags = 0; +}; +---- -It will be common for a render pass to consist of a simple linear graph of -dependencies, where subpass N depends on subpass N-1 for all N, and the -operation of the memory barriers and layout transitions is fairly -straightforward to reason about for those simple cases. -But for more complex graphs, there are some rules that govern when there -must: be dependencies between subpasses. +[[renderpass-layout-transitions]] +As subpasses may: overlap or execute out of order with regards to other +subpasses unless a subpass dependency chain describes otherwise, the layout +transitions required between subpasses cannot: be known to an application. +Instead, an application provides the layout that each attachment must: be in +at the start and end of a renderpass, and the layout it must: be in during +each subpass it is used in. +The implementation then must: execute layout transitions between subpasses +in order to guarantee that the images are in the layouts required by each +subpass, and in the final layout at the end of the render pass. -As stated earlier, render passes must: include subpass dependencies which -(either directly or via a subpass dependency chain) separate any two -subpasses that operate on the same attachment or aliasing attachments, if at -least one of those subpasses writes to the attachment. -If an image layout changes between those two subpasses, the implementation -uses the stageMasks and accessMasks indicated by the subpass dependency as -the masks that control when the layout transition must: occur. -If there is not a layout change on the attachment, or if an implementation -treats the two layouts identically, then it may: treat the dependency as a -simple execution/memory barrier. +Automatic layout transitions away from the layout used in a subpass +happen-after the availability operations for all dependencies with that +subpass as the pname:srcSubpass. -If two subpasses use the same attachment in different layouts but both uses -are read-only (i.e. input attachment, or read-only depth/stencil -attachment), the application does not need to express a dependency between -the two subpasses. -Implementations that treat the two layouts differently may: deduce and -insert a dependency between the subpasses, with the implementation choosing -the appropriate stage masks and access masks based on whether the attachment -is used as an input or depth/stencil attachment, and may: insert the -appropriate layout transition along with the execution/memory barrier. -Implementations that treat the two layouts identically need not insert a -barrier, and the two subpasses may: execute simultaneously. -The stage masks and access masks are chosen as follows: +Automatic layout transitions into the layout used in a subpass happen-before +the visibility operations for all dependencies with that subpass as the +pname:dstSubpass. - * for input attachments, stage mask = - ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, access mask = - ename:VK_ACCESS_INPUT_ATTACHMENT_READ_BIT. - * for depth/stencil attachments, stage mask = - ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, access mask = - ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT +Automatic layout transitions away from pname:initialLayout happens-after the +availability operations for all dependencies with a pname:srcSubpass equal +to ename:VK_SUBPASS_EXTERNAL, where pname:dstSubpass uses the attachment +that will be transitioned. +For attachments created with ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, +automatic layout transitions away from pname:initialLayout happen-after the +availability operations for all dependencies with a pname:srcSubpass equal +to ename:VK_SUBPASS_EXTERNAL, where pname:dstSubpass uses any aliased +attachment. -where pname:srcStageMask and pname:srcAccessMask are taken based on usage in -the source subpass and pname:dstStageMask and pname:dstAccessMask are taken -based on usage in the destination subpass. +Automatic layout transitions into pname:finalLayout happens-before the +visibility operations for all dependencies with a pname:dstSubpass equal to +ename:VK_SUBPASS_EXTERNAL, where pname:srcSubpass uses the attachment that +will be transitioned. +For attachments created with ename:VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, +automatic layout transitions into pname:finalLayout happen-before the +visibility operations for all dependencies with a pname:dstSubpass equal to +ename:VK_SUBPASS_EXTERNAL, where pname:srcSubpass uses any aliased +attachment. + +If two subpasses use the same attachment in different layouts, and both +layouts are read-only, no subpass dependency needs to be specified between +those subpasses. +If an implementation treats those layouts separately, it must: insert an +implicit subpass dependency between those subpasses to separate the uses in +each layout. +The subpass dependency operates as if defined with the following parameters: + +[source,c] +---- +// Used for input attachments +VkPipelineStageFlags inputAttachmentStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; +VkAccessFlags inputAttachmentAccess = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + +// Used for depth stencil attachments +VkPipelineStageFlags depthStencilAttachmentStages = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; +VkAccessFlags depthStencilAttachmentAccess = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + +VkSubpassDependency implicitDependency = { + .srcSubpass = firstSubpass; + .dstSubpass = secondSubpass; + .srcStageMask = inputAttachmentStages | depthStencilAttachmentStages; + .dstStageMask = inputAttachmentStages | depthStencilAttachmentStages; + .srcAccessMask = inputAttachmentAccess | depthStencilAttachmentAccess; + .dstAccessMask = inputAttachmentAccess | depthStencilAttachmentAccess; + .dependencyFlags = 0; +}; +---- [[renderpass-feedbackloop]] If a subpass uses the same attachment as both an input attachment and either -a color attachment or a depth/stencil attachment, reads from the input -attachment are not automatically coherent with writes through the color or -depth/stencil attachment. -In order to achieve well-defined results, one of two criteria must: be -satisfied. -First, if the color components or depth/stencil components read by the input -attachment are mutually exclusive with the components written by the color -or depth/stencil attachment then there is no _feedback loop_ and the reads -and writes both function normally, with the reads observing values from the -previous subpass(es) or from memory. -This option requires the graphics pipelines used by the subpass to disable -writes to color components that are read as inputs via the -pname:colorWriteMask, and to disable writes to depth/stencil components that -are read as inputs via pname:depthWriteEnable or pname:stencilTestEnable. +a color attachment or a depth/stencil attachment, writes via the color or +depth/stencil attachment are not automatically made visible to reads via the +input attachment, causing a _feedback loop_, except in any of the following +conditions: -Second, if the input attachment reads components that are written by the -color or depth/stencil attachment, then there is a feedback loop and a -pipeline barrier must: be used between when the attachment is written and -when it is subsequently read by later fragments. -This pipeline barrier must: follow the rules of a self-dependency as -described in -<>, where the barrier's flags include: + * If the color components or depth/stencil components read by the input + attachment are mutually exclusive with the components written by the + color or depth/stencil attachments, then there is no feedback loop. + This requires the graphics pipelines used by the subpass to disable + writes to color components that are read as inputs via the + pname:colorWriteMask, and to disable writes to depth/stencil components + that are read as inputs via pname:depthWriteEnable or + pname:stencilTestEnable. + * If the attachment is used as an input attachment and depth/stencil + attachment only, and the depth/stencil attachment is not written to. + * If a memory dependency is inserted between when the attachment is + written and when it is subsequently read by later fragments. + <> expressing a + <> are the only way to achieve this, and one must: be + inserted every time a fragment will read values at a particular sample + (x, y, layer, sample) coordinate, if those values have been written + since the most recent pipeline barrier; or the since start of the + subpass if there have been no pipeline barriers since the start of the + subpass. - * pname:dstStageMask = ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - * pname:dstAccessMask = ename:VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, and - * pname:srcAccessMask = ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT (for - color attachments) or pname:srcAccessMask = - ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT (for depth/stencil - attachments). - * pname:srcStageMask = ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - (for color attachments) or pname:srcStageMask = - ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT (for depth/stencil - attachments). - * pname:dependencyFlags = ename:VK_DEPENDENCY_BY_REGION_BIT. - -A pipeline barrier is needed each time a fragment will read a particular -(x,y,layer,sample) location if that location has been written since the most -recent pipeline barrier, or since the start of the subpass if there have -been no pipeline barriers since the start of the subpass. - -An attachment used as both an input attachment and color attachment must: be -in the ename:VK_IMAGE_LAYOUT_GENERAL layout. -An attachment used as both an input attachment and depth/stencil attachment -must: be in either the ename:VK_IMAGE_LAYOUT_GENERAL or -ename:VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL layout. -Since an attachment in the -ename:VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL layout is read-only, -this situation is not a feedback loop. +An attachment used as both an input attachment and a color attachment must: +be in the ename:VK_IMAGE_LAYOUT_GENERAL layout. +An attachment used as an input attachment and depth/stencil attachment must: +be in either ename:VK_IMAGE_LAYOUT_GENERAL or +ename:VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL. +An attachment must: not be used as both a depth/stencil attachment and a +color attachment. // refBegin vkDestroyRenderPass Destroy a render pass object @@ -1080,9 +1115,9 @@ include::../api/structs/VkRenderPassBeginInfo.txt[] pname:renderArea is the render area that is affected by the render pass instance. -The effects of attachment load, store and resolve operations are restricted -to the pixels whose x and y coordinates fall within the render area on all -attachments. +The effects of attachment load, store and multisample resolve operations are +restricted to the pixels whose x and y coordinates fall within the render +area on all attachments. The render area extends to all layers of pname:framebuffer. The application must: ensure (using scissor if necessary) that all rendering is contained within the render area, otherwise the pixels outside of the diff --git a/doc/specs/vulkan/chapters/resources.txt b/doc/specs/vulkan/chapters/resources.txt index cfd83ba8..b31c849c 100644 --- a/doc/specs/vulkan/chapters/resources.txt +++ b/doc/specs/vulkan/chapters/resources.txt @@ -85,7 +85,8 @@ include::../api/enums/VkBufferUsageFlagBits.txt[] * ename:VK_BUFFER_USAGE_TRANSFER_SRC_BIT indicates that the buffer can: be used as the source of a _transfer command_ (see the definition of - <>). + <>). * ename:VK_BUFFER_USAGE_TRANSFER_DST_BIT indicates that the buffer can: be used as the destination of a transfer command. * ename:VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT indicates that the buffer @@ -116,6 +117,11 @@ include::../api/enums/VkBufferUsageFlagBits.txt[] suitable for passing as the pname:buffer parameter to fname:vkCmdDrawIndirect, fname:vkCmdDrawIndexedIndirect, or fname:vkCmdDispatchIndirect. +ifdef::VK_NVX_device_generated_commands[] + It is also suitable for passing as the pname:buffer member of + sname:VkIndirectCommandsTokenNVX, or pname:sequencesCountBuffer or + pname:sequencesIndexBuffer member of sname:VkCmdProcessCommandsInfoNVX +endif::VK_NVX_device_generated_commands[] Any combination of bits can: be specified for pname:usage, but at least one of the bits must: be set in order to create a valid buffer. @@ -606,6 +612,8 @@ flink:vkGetPhysicalDeviceImageFormatProperties. * If the <> feature is not enabled, pname:flags must: not contain ename:VK_IMAGE_CREATE_SPARSE_BINDING_BIT + * If pname:imageType is ename:VK_IMAGE_TYPE_1D, pname:flags must: not + contain ename:VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT * If the <> feature is not enabled, and pname:imageType is ename:VK_IMAGE_TYPE_2D, pname:flags must: not contain @@ -1025,7 +1033,7 @@ Applications have control over which layout each image subresource uses, and can: transition an image subresource from one layout to another. Transitions can: happen with an image memory barrier, included as part of a fname:vkCmdPipelineBarrier or a fname:vkCmdWaitEvents command buffer command -(see <>), or as part of a subpass +(see <>), or as part of a subpass dependency within a render pass (see sname:VkSubpassDependency). The image layout state is per-image subresource, and separate image subresources of the same image can: be in different layouts at the same time @@ -1127,7 +1135,8 @@ The type(s) of device access supported by each layout are: ename:VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT usage bit enabled. * ename:VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: must: only be used as a source image of a transfer command (see the definition of - <>). + <>). This layout is valid only for image subresources of images created with the ename:VK_IMAGE_USAGE_TRANSFER_SRC_BIT usage bit enabled. * ename:VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: must: only be used as a @@ -1571,9 +1580,10 @@ attachment descriptors, and framebuffer attachments. .Valid Usage **** * If pname:levelCount is not ename:VK_REMAINING_MIP_LEVELS, - [eq]#pname:levelCount# must: be non-zero and [eq]#(pname:baseMipLevel + - pname:levelCount)# must: be less than or equal to the pname:mipLevels - specified in slink:VkImageCreateInfo when the image was created + [eq]#pname:levelCount# must: be non-zero and [eq]#(pname:baseMipLevel + {plus} pname:levelCount)# must: be less than or equal to the + pname:mipLevels specified in slink:VkImageCreateInfo when the image was + created * If pname:layerCount is not ename:VK_REMAINING_ARRAY_LAYERS, [eq]#pname:layerCount# must: be non-zero and [eq]#(pname:baseArrayLayer + pname:layerCount)# must: be less than or equal to the @@ -2003,51 +2013,28 @@ Ranges of buffers and image subresources of image objects created using ename:VK_SHARING_MODE_EXCLUSIVE must: only be accessed by queues in the same queue family at any given time. In order for a different queue family to be able to interpret the memory -contents of a range or image subresource, the application must: transfer -exclusive ownership of the range or image subresource between the source and -destination queue families with the following sequence of operations: - - . Release exclusive ownership from the source queue family to the - destination queue family. - . Use semaphores to ensure proper execution control for the ownership - transfer. - . Acquire exclusive ownership for the destination queue family from the - source queue family. - -To release exclusive ownership of a range of a buffer or image subresource -of an image object, the application must: execute a buffer or image memory -barrier, respectively (see slink:VkBufferMemoryBarrier and -slink:VkImageMemoryBarrier) on a queue from the source queue family. -The pname:srcQueueFamilyIndex parameter of the barrier must: be set to the -source queue family index, and the pname:dstQueueFamilyIndex parameter to -the destination queue family index. - -To acquire exclusive ownership, the application must: execute the same -buffer or image memory barrier (i.e. an identically defined instance of the -slink:VkBufferMemoryBarrier or slink:VkImageMemoryBarrier structure that was -used for the exclusive ownership release) on a queue from the destination -queue family. +contents of a range or image subresource, the application must: perform a +<>. Upon creation, resources using ename:VK_SHARING_MODE_EXCLUSIVE are not owned by any queue family. -A buffer or image memory barrier is not required to acquire ownership when +A buffer or image memory barrier is not required to acquire _ownership_ when no queue family owns the resource - it is implicitly acquired upon first use within a queue. -However, images still require a <> from ename:VK_IMAGE_LAYOUT_UNDEFINED or -ename:VK_IMAGE_LAYOUT_PREINITIALIZED before being used on the first queue. -This layout transition can: either be accomplished by an image memory -barrier or by use in a render pass instance. -Once a queue family has used a range or image subresource of an -ename:VK_SHARING_MODE_EXCLUSIVE resource, its contents are undefined to -other queue families unless ownership is transferred. -The contents may: also become undefined for other reasons, e.g. as a result -of writes to an image subresource that aliases the same memory. -A queue family can: take ownership of a range or image subresource without -an ownership transfer in the same way as for a resource that was just -created, however doing so means any contents written by other queue families -or via incompatible aliases are undefined. +.Note +[NOTE] +==== +Images still require a <> from +ename:VK_IMAGE_LAYOUT_UNDEFINED or ename:VK_IMAGE_LAYOUT_PREINITIALIZED +before being used on the first queue. +==== + +A queue family can: take ownership of an image subresource or buffer range +of a resource created with ename:VK_SHARING_MODE_EXCLUSIVE, without an +ownership transfer, in the same way as for a resource that was just created; +however, taking ownership in this way has the effect that the contents of +the image subresource or buffer range are undefined. Ranges of buffers and image subresources of image objects created using ename:VK_SHARING_MODE_CONCURRENT must: only be accessed by queues from the @@ -2132,8 +2119,9 @@ If any sparse blocks of a sparse image have been made undefined, then only the image subresources containing them must: be transitioned. Use of an overlapping range by two aliases must: be separated by a memory -dependency using the appropriate access types if at least one of those uses -performs writes, whether the aliases interpret memory consistently or not. +dependency using the appropriate <> if at least one of those uses performs writes, whether the aliases +interpret memory consistently or not. If buffer or image memory barriers are used, the scope of the barrier must: contain the entire range and/or set of image subresources that overlap. diff --git a/doc/specs/vulkan/chapters/shaders.txt b/doc/specs/vulkan/chapters/shaders.txt index 199b2e90..303e5d14 100644 --- a/doc/specs/vulkan/chapters/shaders.txt +++ b/doc/specs/vulkan/chapters/shaders.txt @@ -212,8 +212,9 @@ writes in finite time. ==== Stores issued to different memory locations within a single shader -invocation may: not be visible to other invocations in the order they were -performed. +invocation may: not be visible to other invocations, or may: not become +visible in the order they were performed. + The code:OpMemoryBarrier instruction can: be used to provide stronger ordering of reads and writes performed by a single invocation. code:OpMemoryBarrier guarantees that any memory transactions issued by the @@ -232,6 +233,25 @@ final write would also see the previous writes. Without the memory barrier, the final write may: be visible before the previous writes. +Writes that are the result of shader stores through a variable decorated +with code:Coherent automatically have available writes to the same buffer, +buffer view, or image view made visible to them, and are themselves +automatically made available to access by the same buffer, buffer view, or +image view. +Reads that are the result of shader loads through a variable decorated with +code:Coherent automatically have available writes to the same buffer, buffer +view, or image view made visible to them. +The order that coherent writes to different locations become available is +undefined, unless enforced by a memory barrier instruction or other memory +dependency. + +.Note +[NOTE] +==== +Explicit memory dependencies must: still be used to guarantee availability +and visibility for access via other buffers, buffer views, or image views. +==== + The built-in atomic memory transaction instructions can: be used to read and write a given memory address atomically. While built-in atomic functions issued by multiple shader invocations are @@ -239,6 +259,18 @@ executed in undefined order relative to each other, these functions perform both a read and a write of a memory address and guarantee that no other memory transaction will write to the underlying memory between the read and write. +Atomic operations ensure automatic availability and visibility for writes +and reads in the same way as those to code:Coherent variables. + +.Note +[[NOTE]] +==== +Memory accesses performed on different resource descriptors with the same +memory backing may: not be well-defined even with the code:Coherent +decoration or via atomics, due to things such as image layouts or ownership +of the resource - as described in the <> chapter. +==== [NOTE] .Note diff --git a/doc/specs/vulkan/chapters/sparsemem.txt b/doc/specs/vulkan/chapters/sparsemem.txt index c649cec4..0b592ec8 100644 --- a/doc/specs/vulkan/chapters/sparsemem.txt +++ b/doc/specs/vulkan/chapters/sparsemem.txt @@ -1351,6 +1351,8 @@ include::../api/protos/vkQueueBindSparse.txt[] fname:vkQueueBindSparse is a <>, with each batch defined by an element of pname:pBindInfo as an instance of the slink:VkBindSparseInfo structure. +Batches begin execution in the order they appear in pname:pBindInfo, but +may: complete out of order. Within a batch, a given range of a resource must: not be bound more than once. diff --git a/doc/specs/vulkan/chapters/synchronization.txt b/doc/specs/vulkan/chapters/synchronization.txt index 1c8ae0ed..b2ace7cf 100644 --- a/doc/specs/vulkan/chapters/synchronization.txt +++ b/doc/specs/vulkan/chapters/synchronization.txt @@ -5,39 +5,700 @@ = Synchronization and Cache Control Synchronization of access to resources is primarily the responsibility of -the application. -In Vulkan, there are four forms of concurrency during execution: between the -host and device, between the queues, between queue submissions, and between -commands within a command buffer. -Vulkan provides the application with a set of synchronization primitives for -these purposes. -Further, memory caches and other optimizations mean that the normal flow of -command execution does not guarantee that all memory transactions from a -command are immediately visible to other agents with views into a given -range of memory. -Vulkan also provides barrier operations to ensure this type of -synchronization. +the application in Vulkan. +The order of execution of commands with respect to the host and other +commands on the device has few implicit guarantees, and needs to be +explicitly specified. +Memory caches and other optimizations are also explicitly managed, requiring +that the flow of data through the system is largely under application +control. -Four synchronization primitive types are exposed by Vulkan. -These are: +Whilst some implicit guarantees exist between commands, four explicit +synchronization primitives are exposed by Vulkan: - * <> - * <> - * <> - * <> +<>:: + Fences can: be used to communicate to the host that execution of some + task on the device has completed. -Each is covered in detail in its own subsection of this chapter. -Fences are used to communicate completion of execution of command buffer -submissions to queues back to the application. -Fences can: therefore be used as a coarse-grained synchronization mechanism. -Semaphores are generally associated with resources or groups of resources -and can: be used to marshal ownership of shared data. -Their status is not visible to the host. -Events provide a finer-grained synchronization primitive which can: be -signaled at command level granularity by both device and host, and can: be -waited upon by either. -Barriers provide execution and memory synchronization between sets of -commands. +<>:: + Semaphores can: be used to control resource access across multiple + queues. + +<>:: + Events provide a fine-grained synchronization primitive which can: be + signaled either within a command buffer or by the host, and can: be + waited upon within a command buffer or queried on the host. + +<>:: + Pipeline barriers also provide synchronization control within a command + buffer, but at a single point, rather than with separate signal and wait + operations. + +In addition to the base primitives provided here, <> provide a useful synchronization framework for most rendering +tasks, built upon the concepts in this chapter. +Many cases that would otherwise need an application to use synchronization +primitives in this chapter can: be expressed more efficiently as part of a +render pass. + + +[[synchronization-dependencies]] +== Execution and Memory Dependencies + +An _operation_ is an arbitrary amount of work to be executed on the host, a +device, or an external entity such as a presentation engine. +Synchronization commands introduce explicit _execution dependencies_, and +_memory dependencies_ between two sets of operations defined by the +command's two _synchronization scopes_. + +[[synchronization-dependencies-scopes]] +The synchronization scopes define which other operations a synchronization +command is able to create execution dependencies with. +Any type of operation that is not in a synchronization command's +synchronization scopes will not be included in the resulting dependency. +For example, for many synchronization commands, the synchronization scopes +can: be limited to just operations executing in specific +<>, which allows other +pipeline stages to be excluded from a dependency. +Other scoping options are possible, depending on the particular command. + +[[synchronization-dependencies-execution]] +An _execution dependency_ is a guarantee that for two sets of operations, +the first set must: _happen-before_ the second set. +If an operation happens-before another operation, then the first operation +must: complete before the second operation is initiated. +More precisely: + + * Let *A* and *B* be separate sets of operations. + * Let *S* be a synchronization command. + * Let *A~S~* and *B~S~* be the synchronization scopes of *S*. + * Let *A'* be the intersection of sets *A* and *A~S~*. + * Let *B'* be the intersection of sets *B* and *B~S~*. + * Submitting *A*, *S* and *B* for execution, in that order, will result in + execution dependency *E* between *A'* and *B'*. + * Execution dependency *E* guarantees that *A'* happens-before *B'*. + +[[synchronization-dependencies-chains]] +An _execution dependency chain_ is a sequence of execution dependencies that +form a happens-before relation between the first dependency's *A'* and the +final dependency's *B'*. +For each consecutive pair of execution dependencies, a chain exists if the +intersection of *B~S~* in the first dependency and *A~S~* in the second +dependency is not an empty set. +The formation of a single execution dependency from an execution dependency +chain can be described by substituting the following in the description of +execution dependencies: + + * Let *S* be a set of synchronization commands that generate an execution + dependency chain. + * Let *A~S~* be the first synchronization scope of the first command in + *S*. + * Let *B~S~* be the second synchronization scope of the last command in + *S*. + +.Note +[NOTE] +==== +An execution dependency is inherently also multiple execution dependencies - +a dependency exists between each subset of *A'* and each subset of *B'*, and +the same is true for execution dependency chains. +For example, a synchronization command with multiple +<> in its stage masks +effectively generates one dependency between each source stage and each +destination stage. +This can be useful to think about when considering how execution chains are +formed if they don't involve all parts of a synchronization command's +dependency. +Similarly, any set of adjacent dependencies in an execution dependency chain +can: be considered an execution dependency chain in its own right. +==== + +Execution dependencies alone are not sufficient to guarantee that values +resulting from writes in one set of operations can: be read from another set +of operations. + +[[synchronization-dependencies-available-and-visible]] +Two additional types of operation are used to control memory access. +_Availability operations_ cause the values generated by specified memory +write accesses to become _available_ for future access. +Any available value remains available until a subsequent write to the same +memory location occurs (whether it is made available or not) or the memory +is freed. +_Visibility operations_ cause any available values to become _visible_ to +specified memory accesses. + +[[synchronization-dependencies-memory]] +A _memory dependency_ is an execution dependency which includes availability +and visibility operations such that: + + * The first set of operations happens-before the availability operation. + * The availability operation happens-before the visibility operation. + * The visibility operation happens-before the second set of operations. + +Once written values are made visible to a particular type of memory access, +they can: be read or written by that type of memory access. +Most synchronization commands in Vulkan define a memory dependency. + +[[synchronization-dependencies-access-scopes]] +The specific memory accesses that are made available and visible are defined +by the _access scopes_ of a memory dependency. +Any type of access that is in a memory dependency's first access scope and +occurs in *A'* is made available. +Any type of access that is in a memory dependency's second access scope and +occurs in *B'* has any available writes made visible to it. +Any type of operation that is not in a synchronization command's access +scopes will not be included in the resulting dependency. + +A memory dependency enforces availability and visibility of memory accesses +and execution order two sets of operations. +Adding to the description of <>: + + * Let *a* be the set of memory accesses performed by *A'*. + * Let *b* be the set of memory accesses performed by *B'*. + * Let *a~S~* be the first access scope of the first command in *S*. + * Let *b~S~* be the second access scope of the last command in *S*. + * Let *a'* be the intersection of sets *a* and *a~S~*. + * Let *b'* be the intersection of sets *b* and *b~S~*. + * Submitting *A*, *S* and *B* for execution, in that order, will result in + a memory dependency *m* between *A'* and *B'*. + * Memory dependency *m* guarantees that: + ** Memory writes in *a'* are made available. + ** Available memory writes, including those from *a'*, are made visible to + *b'*. + +[NOTE] +.Note +==== +Execution and memory dependencies are used to solve data hazards, i.e. to +ensure that read and write operations occur in a well-defined order. +Write-after-read hazards can be solved with just an execution dependency, +but read-after-write and write-after-write hazards need appropriate memory +dependencies to be included between them. +If an application does not include dependencies to solve these hazards, the +results and execution orders of memory accesses are undefined. +==== + + +[[synchronization-image-layout-transitions]] +=== Image Layout Transitions + +Image subresources can: be transitioned from one <> to another as part of a <> (e.g. by using an +<>). +When a layout transition is specified in a memory dependency, it +happens-after the availability operations in the memory dependency, and +happens-before the visibility operations. +Image layout transitions may: perform read and write accesses on all memory +bound to the image subresource range, so applications must: ensure that all +memory writes have been made +<> before a +layout transition is executed. +Available memory is automatically made visible to a layout transition, and +writes performed by a layout transition are automatically made available. + +Layout transitions always apply to a particular image subresource range, and +specify both an old layout and new layout. +If the old layout does not match the new layout, a transition occurs. +The old layout must: match the current layout of the image subresource +range, with one exception. +The old layout can: always be specified as ename:VK_IMAGE_LAYOUT_UNDEFINED, +though doing so invalidates the contents of the image subresource range. + +.Note +[NOTE] +==== +Setting the old layout to ename:VK_IMAGE_LAYOUT_UNDEFINED implies that the +contents of the image subresource need not be preserved. +Implementations may: use this information to avoid performing expensive data +transition operations. +==== + +.Note +[NOTE] +==== +Applications must: ensure that layout transitions happen-after all +operations accessing the image with the old layout, and happen-before any +operations that will access the image with the new layout. +Layout transitions are potentially read/write operations, so not defining +appropriate memory dependencies to guarantee this will result in a data +race. +==== + +The contents of any portion of another resource which aliases memory that is +bound to the transitioned image subresource range are undefined after an +image layout transition. + + +[[synchronization-pipeline-stages]] +=== Pipeline Stages + +The work performed by an <> consists of multiple operations, which are performed by a sequence +of logically independent execution units known as _pipeline stages_. +The exact pipeline stages executed depend on the particular action command +that is used, and current command buffer state when the action command was +recorded. +<>, <>, +<>, and <> all execute +<>. + +Execution of operations across pipeline stages must: adhere to +<>, +<>, and +<>. +Otherwise, execution across pipeline stages may: overlap or execute out of +order with regards to other stages, unless otherwise enforced by an +execution dependency. + +// refBegin VkPipelineStageFlagBits - Bitmask specifying pipeline stages + +Several of the <> include pipeline stage parameters, restricting the +<> for that +command to those stages. +This allows fine grained control over the exact execution dependencies and +accesses performed by action commands. +Implementations should: use these pipeline stages to avoid unnecessary +stalls or cache flushing. + +These pipeline stages are specified using a bitmask: + +include::../api/enums/VkPipelineStageFlagBits.txt[] + +The meaning of each bit is: + + * ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: Stage of the pipeline where any + commands are initially received by the queue. +ifdef::VK_NVX_device_generated_commands[] + * ename:VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX: Stage of the pipeline + where device-side generation of commands via + flink:vkCmdProcessCommandsNVX is handled. +endif::VK_NVX_device_generated_commands[] + * ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT: Stage of the pipeline where + Draw/DispatchIndirect data structures are consumed. + * ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT: Stage of the pipeline where + vertex and index buffers are consumed. + * ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT: Vertex shader stage. + * ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT: Tessellation + control shader stage. + * ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT: Tessellation + evaluation shader stage. + * ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT: Geometry shader stage. + * ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT: Fragment shader stage. + * ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT: Stage of the pipeline + where early fragment tests (depth and stencil tests before fragment + shading) are performed. + This stage also includes <> for framebuffer attachments with a depth/stencil format. + * ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT: Stage of the pipeline + where late fragment tests (depth and stencil tests after fragment + shading) are performed. + This stage also includes <> for framebuffer attachments with a depth/stencil format. + * ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT: Stage of the + pipeline after blending where the final color values are output from the + pipeline. + This stage also includes <> and multisample resolve operations for framebuffer + attachments with a color format. + * [[synchronization-pipeline-stages-transfer]] + ename:VK_PIPELINE_STAGE_TRANSFER_BIT: Execution of copy commands. + This includes the operations resulting from all <>, <> (with the exception of + flink:vkCmdClearAttachments), and flink:vkCmdCopyQueryPoolResults. + * ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT: Execution of a compute + shader. + * ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT: Final stage in the pipeline + where operations generated by all commands complete execution. + * ename:VK_PIPELINE_STAGE_HOST_BIT: A pseudo-stage indicating execution on + the host of reads/writes of device memory. + This stage is not invoked by any action commands. + * ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT: Execution of all graphics + pipeline stages. + Equivalent to the logical or of: + + ** ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT +ifdef::VK_NVX_device_generated_commands[] + ** ename:VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX +endif::VK_NVX_device_generated_commands[] + ** ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT + ** ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT + ** ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT + ** ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT + ** ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT + ** ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT + ** ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + ** ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + ** ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT + ** ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + ** ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT + + * ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT: Equivalent to the logical or + of every other pipeline stage flag that is supported on the queue it is + used with. + +[NOTE] +.Note +==== +An execution dependency with only ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT +in the destination stage mask will only prevent that stage from executing in +subsequently submitted commands. +As this stage doesn't perform any actual execution, this is not observable - +in effect, it does not delay processing of subsequent commands. +Similarly an execution dependency with only +ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask will +effectively not wait for any prior commands to complete. + +When defining a memory dependency, using only +ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT or +ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT would never make any accesses +available and/or visible because these stages do not access memory. + +ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT and +ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT are useful for accomplishing layout +transitions and queue ownership operations when the required execution +dependency is satisfied by other means - for example, semaphore operations +between queues. +==== + +// refEnd VkPipelineStageFlagBits + +[[synchronization-pipeline-stages-masks]] +If a synchronization command includes a source stage mask, its first +<> only includes +execution of the pipeline stages specified in that mask, as well as any +<> stages. +If a synchronization command includes a destination stage mask, its second +<> only includes +execution of the pipeline stages specified in that mask, as well as any +<> stages. + +<> are affected +in a similar way. +If a synchronization command includes a source stage mask, its first +<> only includes +memory access performed by pipeline stages specified in that mask. +If a synchronization command includes a destination stage mask, its second +<> only includes +memory access performed by pipeline stages specified in that mask. + +[NOTE] +.Note +==== +Implementations may: not support synchronization at every pipeline stage for +every synchronization operation. +If a pipeline stage that an implementation does not support synchronization +for appears in a source stage mask, then it may: substitute that stage for +any logically later stage. +If a pipeline stage that an implementation does not support synchronization +for appears in a destination stage mask, then it may: substitute that stage +for any logically earlier stage. + +For example, if an implementation is unable to signal an event immediately +after vertex shader execution is complete, it may: instead signal the event +after color attachment output has completed. + +If an implementation makes such a substitution, it must: not affect the +semantics of execution or memory dependencies or image and buffer memory +barriers. +==== + +Certain pipeline stages are only available on queues that support a +particular set of operations. +The following table lists, for each pipeline stage flag, which queue +capability flag must: be supported by the queue. +When multiple flags are enumerated in the second column of the table, it +means that the pipeline stage is supported on the queue if it supports any +of the listed capability flags. +For further details on queue capabilities see +<> +and <>. + +.Supported pipeline stage flags +[width="100%",cols="69%,31%",options="header",align="center"] +|==== +|Pipeline stage flag | Required queue capability flag +|ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | None +ifdef::VK_NVX_device_generated_commands[] +|ename:VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX | ename:VK_QUEUE_GRAPHICS_BIT or ename:VK_QUEUE_COMPUTE_BIT +endif::VK_NVX_device_generated_commands[] +|ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | ename:VK_QUEUE_GRAPHICS_BIT or ename:VK_QUEUE_COMPUTE_BIT +|ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | ename:VK_QUEUE_COMPUTE_BIT +|ename:VK_PIPELINE_STAGE_TRANSFER_BIT | ename:VK_QUEUE_GRAPHICS_BIT, ename:VK_QUEUE_COMPUTE_BIT or ename:VK_QUEUE_TRANSFER_BIT +|ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | None +|ename:VK_PIPELINE_STAGE_HOST_BIT | None +|ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | ename:VK_QUEUE_GRAPHICS_BIT +|ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT | None +|==== + +[[synchronization-pipeline-stages-order]] +Pipeline stages that execute as a result of a command logically complete +execution in a specific order, such that completion of a logically later +pipeline stage must: not happen-before completion of a logically earlier +stage. +This means that including any given stage in the source stage mask for a +particular synchronization command also implies that any logically earlier +stages are included in *A~S~* for that command. + +Similarly, initiation of a logically earlier pipeline stage must: not +happen-after initiation of a logically later pipeline stage. +Including any given stage in the destination stage mask for a particular +synchronization command also implies that any logically later stages are +included in *B~S~* for that command. + +.Note +[NOTE] +==== +Logically earlier/later stages are not included when defining the +<> of a +<>. +==== + +[[synchronization-pipeline-stages-types]] +The order of pipeline stages depends on the particular pipeline; graphics, +compute, transfer or host. + +For the graphics pipeline, the following stages occur in this order: + + * ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT +ifdef::VK_NVX_device_generated_commands[] + * ename:VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX +endif::VK_NVX_device_generated_commands[] + * ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT + * ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT + * ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT + * ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT + * ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT + * ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT + * ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + * ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + * ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT + * ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + * ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT + +For the compute pipeline, the following stages occur in this order: + + * ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT +ifdef::VK_NVX_device_generated_commands[] + * ename:VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX +endif::VK_NVX_device_generated_commands[] + * ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT + * ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT + * ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT + +For the transfer pipeline, the following stages occur in this order: + + * ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT + * ename:VK_PIPELINE_STAGE_TRANSFER_BIT + * ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT + +For host operations, only one pipeline stage occurs, so no order is +guaranteed: + + * ename:VK_PIPELINE_STAGE_HOST_BIT + + +[[synchronization-access-types]] +=== Access Types + +Memory in Vulkan can: be accessed from within shader invocations and via +some fixed-function stages of the pipeline. +The _access type_ is a function of the <> +used, or how a fixed-function stage accesses memory. +Each access type corresponds to a bit flag in slink:VkAccessFlagBits. + +[[synchronization-access-masks]] +Some synchronization commands take sets of access types as parameters to +define the <> of +a memory dependency. +If a synchronization command includes a source access mask, its first +<> only includes +accesses via the access types specified in that mask. +Similarly, if a synchronization command includes a destination access mask, +its second <> only +includes accesses via the access types specified in that mask. + +// refBegin VkAccessFlagBits Bitmask specifying memory access types that will participate in a memory dependency + +Access types that can be set in an access mask include: + +[[synchronization-access-flags]] +include::../api/enums/VkAccessFlagBits.txt[] + + * ename:VK_ACCESS_INDIRECT_COMMAND_READ_BIT: Read access to an indirect + command structure read as part of an indirect drawing or dispatch + command. + * ename:VK_ACCESS_INDEX_READ_BIT: Read access to an index buffer as part + of an indexed drawing command, bound by flink:vkCmdBindIndexBuffer. + * ename:VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: Read access to a vertex + buffer as part of a drawing command, bound by + flink:vkCmdBindVertexBuffers. + * ename:VK_ACCESS_UNIFORM_READ_BIT: Read access to a + <>. + * ename:VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: Read access to an + <> within a renderpass during fragment + shading. + * ename:VK_ACCESS_SHADER_READ_BIT: Read access to a + <>, + <>, + <>, + <>, or + <>. + * ename:VK_ACCESS_SHADER_WRITE_BIT: Write access to a + <>, + <>, or + <>. + * ename:VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: Read access to a + <>, such as via <>, <>, or via certain + <>. + * ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: Write access to a + <> during a <> or via certain <>. + * ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: Read access to a + <>, via <> or via certain <>. + * ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: Write access to a + <>, via <> or via certain <>. + * ename:VK_ACCESS_TRANSFER_READ_BIT: Read access to an image or buffer in + a <> operation. + * ename:VK_ACCESS_TRANSFER_WRITE_BIT: Write access to an image or buffer + in a <> or <> operation. + * ename:VK_ACCESS_HOST_READ_BIT: Read access by a host operation. + * ename:VK_ACCESS_HOST_WRITE_BIT: Write access by a host operation. + * ename:VK_ACCESS_MEMORY_READ_BIT: Read access via non-specific entities. + These entities include the Vulkan device and host, but may: also include + entities external to the Vulkan device or otherwise not part of the core + Vulkan pipeline. + When included in a destination access mask, makes all available writes + visible to all future read accesses on entities known to the Vulkan + device. + * ename:VK_ACCESS_MEMORY_WRITE_BIT: Write access via non-specific + entities. + These entities include the Vulkan device and host, but may: also include + entities external to the Vulkan device or otherwise not part of the core + Vulkan pipeline. + When included in a source access mask, all writes that are performed by + entities known to the Vulkan device are made available. + When included in a destination access mask, makes all available writes + visible to all future write accesses on entities known to the Vulkan + device. + +Certain access types are only performed by a subset of pipeline stages. +Any synchronization command that takes both stage masks and access masks +uses both to define the <> - only the specified access types performed by the specified stages +are included in the access scope. +An application must: not specify an access flag in a synchronization command +if it does not include a pipeline stage in the corresponding stage mask that +is able to perform accesses of that type. +The following table lists, for each access flag, which pipeline stages can: +perform that type of access. + +.Supported access types +[width="100%",cols="67%,33%",options="header",align="center"] +|==== +|Access flag | Supported pipeline stages +|ename:VK_ACCESS_INDIRECT_COMMAND_READ_BIT | ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT +|ename:VK_ACCESS_INDEX_READ_BIT | ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT +|ename:VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT +|ename:VK_ACCESS_UNIFORM_READ_BIT | ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, or ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT +|ename:VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT +|ename:VK_ACCESS_SHADER_READ_BIT | ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, or ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT +|ename:VK_ACCESS_SHADER_WRITE_BIT | ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, or ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT +|ename:VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT +|ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT +|ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, or ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT +|ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, or ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT +|ename:VK_ACCESS_TRANSFER_READ_BIT | ename:VK_PIPELINE_STAGE_TRANSFER_BIT +|ename:VK_ACCESS_TRANSFER_WRITE_BIT | ename:VK_PIPELINE_STAGE_TRANSFER_BIT +|ename:VK_ACCESS_HOST_READ_BIT | ename:VK_PIPELINE_STAGE_HOST_BIT +|ename:VK_ACCESS_HOST_WRITE_BIT | ename:VK_PIPELINE_STAGE_HOST_BIT +|ename:VK_ACCESS_MEMORY_READ_BIT | N/A +|ename:VK_ACCESS_MEMORY_WRITE_BIT | N/A +|==== + + +[[synchronization-framebuffer-regions]] +=== Framebuffer Region Dependencies + +<> that operate on, or +with respect to, the framebuffer are collectively the _framebuffer-space_ +pipeline stages. +These stages are: + + * ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + * ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + * ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT + * ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + +For these pipeline stages, an execution or memory dependency from the first +set of operations to the second set can: either be a single +_framebuffer-global_ dependency, or split into multiple _framebuffer-local_ +dependencies. +A dependency with non-framebuffer-space pipeline stages is neither +framebuffer-global nor framebuffer-local. + +A _framebuffer region_ is a set of sample (x, y, layer, sample) coordinates +that is a subset of the entire framebuffer. + +A single framebuffer-local dependency guarantees that only for a single +framebuffer region, the first set of operations and availability operations +happen-before visibility operations and the second set of operations. +No ordering guarantees are made between framebuffer regions for a +framebuffer-local dependency. + +A framebuffer-global dependency guarantees that the first set of operations +for all framebuffer regions happens-before the second set of operations for +any framebuffer region. + +.Note +[NOTE] +==== +Since fragment invocations are not specified to run in any particular +groupings, the size of a framebuffer region is implementation-dependent, not +known to the application, and must: be assumed to be no larger than a single +sample. +==== + +If a synchronization command includes a pname:dependencyFlags parameter, and +specifies the ename:VK_DEPENDENCY_BY_REGION_BIT flag, then it defines +framebuffer-local dependencies for the framebuffer-space pipeline stages in +that synchronization command, for all framebuffer regions. +If no pname:dependencyFlags parameter is included, or the +ename:VK_DEPENDENCY_BY_REGION_BIT flag is not specified, then a +framebuffer-global dependency is specified for those stages. +The ename:VK_DEPENDENCY_BY_REGION_BIT flag does not affect the dependencies +between non-framebuffer-space pipeline stages, nor does it affect the +dependencies between framebuffer-space and non-framebuffer-space pipeline +stages. + +.Note +[NOTE] +==== +Framebuffer-local dependencies are more optimal for most architectures; +particularly tile-based architectures - which can keep framebuffer-regions +entirely in on-chip registers and thus avoid external bandwidth across such +a dependency. +Including a framebuffer-global dependency in your rendering will usually +force all implementations to flush data to memory, or to a higher level +cache, breaking any potential locality optimizations. +==== [[synchronization-fences]] @@ -45,12 +706,14 @@ commands. // refBegin VkFence Opaque handle to a fence object -Fences can: be used by the host to determine completion of execution of -_queue operations_. - -A fence's status is always either _signaled_ or _unsignaled_. -The host can: poll the status of a single fence, or wait for any or all of a -group of fences to become signaled. +Fences are a synchronization primitive that can: be used to insert a +dependency from a queue to the host. +Fences have two states - signaled and unsignaled. +A fence can: be signaled as part of the execution of a +<> command. +Fences can: be unsignaled on the host with flink:vkResetFences. +Fences can: be waited on by the host with the flink:vkWaitForFences command, +and the current state can: be queried with flink:vkGetFenceStatus. Fences are represented by sname:VkFence handles: @@ -60,13 +723,14 @@ include::../api/handles/VkFence.txt[] // refBegin vkCreateFence Create a new fence object -To create a new fence object, use the command +To create a fence, call: include::../api/protos/vkCreateFence.txt[] * pname:device is the logical device that creates the fence. - * pname:pCreateInfo points to a slink:VkFenceCreateInfo structure - specifying the state of the fence object. + * pname:pCreateInfo is a pointer to an instance of the + sname:VkFenceCreateInfo structure which contains information about how + the fence is to be created. * pname:pAllocator controls host memory allocation as described in the <> chapter. * pname:pFence points to a handle in which the resulting fence object is @@ -80,6 +744,8 @@ The sname:VkFenceCreateInfo structure is defined as: include::../api/structs/VkFenceCreateInfo.txt[] + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. * pname:flags defines the initial state and behavior of the fence. Bits which can: be set include: + @@ -89,8 +755,8 @@ include::../api/enums/VkFenceCreateFlagBits.txt[] -- + If pname:flags contains ename:VK_FENCE_CREATE_SIGNALED_BIT then the fence -object is created in the signaled state. -Otherwise it is created in the unsignaled state. +object is created in the signaled state; otherwise it is created in the +unsignaled state. include::../validity/structs/VkFenceCreateInfo.txt[] @@ -107,8 +773,8 @@ include::../api/protos/vkDestroyFence.txt[] .Valid Usage **** - * pname:fence must: not be associated with any queue command that has not - yet completed execution on that queue + * All <> commands that refer + to pname:fence must: have completed execution * If sname:VkAllocationCallbacks were provided when pname:fence was created, a compatible set of callbacks must: be provided here * If no sname:VkAllocationCallbacks were provided when pname:fence was @@ -119,35 +785,48 @@ include::../validity/protos/vkDestroyFence.txt[] // refBegin vkGetFenceStatus Return the status of a fence -To query the status of a fence from the host, use the command +To query the status of a fence from the host, call: include::../api/protos/vkGetFenceStatus.txt[] * pname:device is the logical device that owns the fence. * pname:fence is the handle of the fence to query. -Upon success, fname:vkGetFenceStatus returns the status of the fence, which -is one of: +Upon success, fname:vkGetFenceStatus returns the status of the fence object, +with the following return codes: - * ename:VK_SUCCESS indicates that the fence is signaled. - * ename:VK_NOT_READY indicates that the fence is unsignaled. +.Fence Object Status Codes +[width="80%",options="header"] +|==== +| Status | Meaning +| ename:VK_SUCCESS | The fence specified by pname:fence is signaled. +| ename:VK_NOT_READY | The fence specified by pname:fence is unsignaled. +|==== + +If a <> command is pending +execution, then the value returned by this command may: immediately be out +of date. include::../validity/protos/vkGetFenceStatus.txt[] +[[synchronization-fences-unsignaling]] // refBegin vkResetFences Resets one or more fence objects -To reset the status of one or more fences to the unsignaled state, use the -command: +To set the state of fences to unsignaled from the host, call: include::../api/protos/vkResetFences.txt[] * pname:device is the logical device that owns the fences. * pname:fenceCount is the number of fences to reset. - * pname:pFences is a pointer to an array of pname:fenceCount fence handles - to reset. + * pname:pFences is a pointer to an array of fence handles to reset. -If a fence is already in the unsignaled state, then resetting it has no -effect. +When flink:vkResetFences is executed on the host, it defines a _fence +unsignal operation_ for each fence, which resets the fence to the unsignaled +state. + +If any member of pname:pFences is already in the unsignaled state when +flink:vkResetFences is executed, then flink:vkResetFences has no effect on +that fence. .Valid Usage **** @@ -159,24 +838,32 @@ effect. include::../validity/protos/vkResetFences.txt[] [[synchronization-fences-signaling]] -Fences can: be signaled by including them in a <> command, defining a queue operation to signal that fence. -This _fence signal operation_ defines the first half of a memory dependency, -guaranteeing that all memory accesses defined by the queue submission are -made available, and that queue operations described by that submission have -completed execution. -This half of the memory dependency does not include host availability of -memory accesses. -The second half of the dependency can: be defined by flink:vkWaitForFences. +When a fence is submitted to a queue as part of a +<> command, it defines a memory +dependency on the batches that were submitted as part of that command, and +defines a _fence signal operation_ which sets the fence to the signaled +state. -Fence signal operations for flink:vkQueueSubmit additionally include all -queue operations previously submitted via flink:vkQueueSubmit in their half -of a memory dependency. +The first <> +includes every batch submitted in the same <> command. +Fence signal operations that are defined by flink:vkQueueSubmit additionally +include all previous queue submissions to the same queue via +flink:vkQueueSubmit in the first synchronization scope. + +The second <> +only includes the fence signal operation. + +The first <> +includes all memory access performed by the device. + +The second <> is +empty. // refBegin vkWaitForFences Wait for one or more fences to become signaled -To cause the host to wait until any one or all of a group of fences is -signaled, use the command: +To wait for one or more fences to enter the signaled state on the host, +call: include::../api/protos/vkWaitForFences.txt[] @@ -212,12 +899,25 @@ fname:vkWaitForFences returns ename:VK_TIMEOUT. If the condition is satisfied before pname:timeout nanoseconds has expired, fname:vkWaitForFences returns ename:VK_SUCCESS. -[[synchronization-fences-devicewrites]] -fname:vkWaitForFences defines the second half of a memory dependency with -the host, for each fence being waited on. -The memory dependency defined by signaling a fence and waiting on the host -does not guarantee that the results of memory accesses will be visible to -the host, or that the memory is available. +include::../validity/protos/vkWaitForFences.txt[] + +[[synchronization-fences-waiting]] +An execution dependency is defined by waiting for a fence to become +signaled, either via flink:vkWaitForFences or by polling on +flink:vkGetFenceStatus. + +The first <> +includes only the fence signal operation. + +The second <> +includes the host operations of flink:vkWaitForFences or +flink:vkGetFenceStatus indicating that the fence has become signaled. + +.Note +[NOTE] +==== +Signaling a fence and waiting on the host does not guarantee that the +results of memory accesses will be visible to the host. To provide that guarantee, the application must: insert a memory barrier between the device writes and the end of the submission that will signal the fence, with pname:dstAccessMask having the ename:VK_ACCESS_HOST_READ_BIT bit @@ -229,8 +929,7 @@ ename:VK_MEMORY_PROPERTY_HOST_COHERENT_BIT set, then fname:vkInvalidateMappedMemoryRanges must: be called after the fence is signaled in order to ensure the writes are visible to the host, as described in <>. - -include::../validity/protos/vkWaitForFences.txt[] +==== [[synchronization-semaphores]] @@ -238,9 +937,14 @@ include::../validity/protos/vkWaitForFences.txt[] // refBegin VkSemaphore Opaque handle to a semaphore object -Semaphores are used to coordinate queue operations both within a queue and -between different queues. -A semaphore's status is always either _signaled_ or _unsignaled_. +Semaphores are a synchronization primitive that can: be used to insert a +dependency between batches submitted to queues. +Semaphores have two states - signaled and unsignaled. +The state of a semaphore can: be signaled after execution of a batch of +commands is completed. +A batch can: wait for a semaphore to become signaled before it begins +execution, and the semaphore is also unsignaled before the batch begins +execution. Semaphores are represented by sname:VkSemaphore handles: @@ -250,18 +954,20 @@ include::../api/handles/VkSemaphore.txt[] // refBegin vkCreateSemaphore Create a new queue semaphore object -To create a new semaphore object, use the command +To create a semaphore, call: include::../api/protos/vkCreateSemaphore.txt[] * pname:device is the logical device that creates the semaphore. - * pname:pCreateInfo points to a slink:VkSemaphoreCreateInfo structure - specifying the state of the semaphore object. + * pname:pCreateInfo is a pointer to an instance of the + sname:VkSemaphoreCreateInfo structure which contains information about + how the semaphore is to be created. * pname:pAllocator controls host memory allocation as described in the <> chapter. * pname:pSemaphore points to a handle in which the resulting semaphore object is returned. - The semaphore is created in the unsignaled state. + +When created, the semaphore is in the unsignaled state. include::../validity/protos/vkCreateSemaphore.txt[] @@ -290,8 +996,8 @@ include::../api/protos/vkDestroySemaphore.txt[] .Valid Usage **** - * pname:semaphore must: not be associated with any queue command that has - not yet completed execution on that queue + * All submitted batches that refer to pname:semaphore must: have completed + execution * If sname:VkAllocationCallbacks were provided when pname:semaphore was created, a compatible set of callbacks must: be provided here * If no sname:VkAllocationCallbacks were provided when pname:semaphore was @@ -300,50 +1006,71 @@ include::../api/protos/vkDestroySemaphore.txt[] include::../validity/protos/vkDestroySemaphore.txt[] -[[synchronization-semaphores-signaling]] -Semaphores can: be signaled by including them in a batch as part of a -<> command, defining a queue -operation to signal that semaphore. -This _semaphore signal operation_ defines the first half of a memory -dependency, guaranteeing that all memory accesses defined by the submitted -queue operations in the batch are made available, and that those queue -operations have completed execution. -Semaphore signal operations for flink:vkQueueSubmit additionally include all -queue operations previously submitted via flink:vkQueueSubmit in their half -of a memory dependency, and all batches that are stored at a lower index in -the same pname:pSubmits array. +[[synchronization-semaphores-signaling]] +=== Semaphore Signaling + +When a batch is submitted to a queue via a <>, and it includes semaphores to be signaled, it defines a memory +dependency on the batch, and defines _semaphore signal operations_ which set +the semaphores to the signaled state. + +The first <> +includes every command submitted in the same batch. +Semaphore signal operations that are defined by flink:vkQueueSubmit +additionally include all batches previously submitted to the same queue via +flink:vkQueueSubmit, including batches that are submitted in the same +<> command, but at a lower index +within the array of batches. + +The second <> +includes only the semaphore signal operation. + +The first <> +includes all memory access performed by the device. + +The second <> is +empty. + [[synchronization-semaphores-waiting]] -Signaling of semaphores can: be waited on by similarly including them in a -batch, defining a queue operation to wait for a signal. -A semaphore wait operation defines the second half of a memory dependency -for the semaphores being waited on. -This half of the memory dependency guarantees that the first half has -completed execution, and also guarantees that all available memory accesses -are made visible to the queue operations in the batch. +=== Semaphore Waiting & Unsignaling -Semaphore wait operations for flink:vkQueueSubmit additionally include all -queue operations subsequently submitted via flink:vkQueueSubmit in their -half of a memory dependency, and all batches that are stored at a higher -index in the same pname:pSubmits array. +When a batch is submitted to a queue via a <>, and it includes semaphores to be waited on, it defines a +memory dependency between prior semaphore signal operations and the batch, +and defines _semaphore unsignal operations_ which set the semaphores to the +unsignaled state. -When queue execution reaches a semaphore wait operation, the queue will -stall execution of queue operations in the batch until each semaphore -becomes signaled. -Once all semaphores are signaled, the semaphores will be reset to the -unsignaled state, and subsequent queue operations will be permitted to -execute. +The first synchronization scope includes all semaphore signal operations +that operate on semaphores waited on in the same batch, and that +happen-before the wait completes. -Semaphore wait operations defined by flink:vkQueueSubmit only wait at -specific pipeline stages, rather than delaying all of each command buffer's -execution, with the pipeline stages determined by the corresponding element -of the pname:pWaitDstStageMask member of sname:VkSubmitInfo. -Execution of work by those stages in subsequent commands is stalled until -the corresponding semaphore reaches the signaled state. +The second <> +includes every command submitted in the same batch. +In the case of flink:vkQueueSubmit, the second synchronization scope is +limited to operations on the pipeline stages determined by the +<> specified +by the corresponding element of pname:pWaitDstStageMask. +Also, in the case of flink:vkQueueSubmit, the second synchronization scope +additionally includes all batches subsequently submitted to the same queue +via flink:vkQueueSubmit, including batches that are submitted in the same +<> command, but at a higher +index within the array of batches. -[NOTE] +The first <> is +empty. + +The second <> +includes all memory access performed by the device. + +The semaphore unsignal operation happens-after the first set of operations +in the execution dependency, and happens-before the second set of operations +in the execution dependency. + +ifdef::VK_KHR_swapchain[] .Note +[NOTE] ==== A common scenario for using pname:pWaitDstStageMask with values other than ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT is when synchronizing a window @@ -384,11 +1111,8 @@ completes, then the semaphore wait stalls the ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT stage, then there is a dependency from that same stage to itself with the layout transition performed in between. - -(The primary use case for this example is with the presentation extensions, -thus the etext:VK_IMAGE_LAYOUT_PRESENT_SRC_KHR token is used even though it -is not defined in the core Vulkan specification.) ==== +endif::VK_KHR_swapchain[] [[synchronization-events]] @@ -396,15 +1120,16 @@ is not defined in the core Vulkan specification.) // refBegin VkEvent Opaque handle to a event object -Events represent a fine-grained synchronization primitive that can: be used -to gauge progress through a sequence of commands executed on a queue by -Vulkan. -An event is initially in the unsignaled state. -It can: be signaled by a device, using commands inserted into the command -buffer, or by the host. -It can: also be reset to the unsignaled state by a device or the host. -The host can: query the state of an event. -A device can: wait for one or more events to become signaled. +Events are a synchronization primitive that can: be used to insert a +fine-grained dependency between commands submitted to the same queue, or +between the host and a queue. +Events have two states - signaled and unsignaled. +An application can: signal an event, or unsignal it, on either the host or +the device. +A device can: wait for an event to become signaled before executing further +operations. +No command exists to wait for an event to become signaled on the host, but +the current state of an event can: be queried. Events are represented by sname:VkEvent handles: @@ -437,6 +1162,8 @@ The sname:VkEventCreateInfo structure is defined as: include::../api/structs/VkEventCreateInfo.txt[] + * pname:sType is the type of this structure. + * pname:pNext is `NULL` or a pointer to an extension-specific structure. * pname:flags is reserved for future use. include::../validity/structs/VkEventCreateInfo.txt[] @@ -496,6 +1223,7 @@ state has no effect. include::../validity/protos/vkGetEventStatus.txt[] +[[synchronization-events-signaling-host]] // refBegin vkSetEvent Set an event to signaled state To set the state of an event to signaled from the host, call: @@ -505,8 +1233,16 @@ include::../api/protos/vkSetEvent.txt[] * pname:device is the logical device that owns the event. * pname:event is the event to set. +When flink:vkSetEvent is executed on the host, it defines an _event signal +operation_ which sets the event to the signaled state. + +If pname:event is already in the signaled state when flink:vkSetEvent is +executed, then flink:vkSetEvent has no effect, and no event signal operation +occurs. + include::../validity/protos/vkSetEvent.txt[] +[[synchronization-events-unsignaling-host]] // refBegin vkResetEvent Reset an event to non-signaled state To set the state of an event to unsignaled from the host, call: @@ -516,6 +1252,13 @@ include::../api/protos/vkResetEvent.txt[] * pname:device is the logical device that owns the event. * pname:event is the event to reset. +When flink:vkResetEvent is executed on the host, it defines an _event +unsignal operation_ which resets the event to the unsignaled state. + +If pname:event is already in the unsignaled state when flink:vkResetEvent is +executed, then flink:vkResetEvent has no effect, and no event unsignal +operation occurs. + .Valid Usage **** * pname:event must: not be waited on by a fname:vkCmdWaitEvents command @@ -524,10 +1267,13 @@ include::../api/protos/vkResetEvent.txt[] include::../validity/protos/vkResetEvent.txt[] -// refBegin vkCmdSetEvent Set an event object to signaled state The state of an event can: also be updated on the device by commands inserted in command buffers. + +[[synchronization-events-signaling-device]] +// refBegin vkCmdSetEvent Set an event object to signaled state + To set the state of an event to signaled from a device, call: include::../api/protos/vkCmdSetEvent.txt[] @@ -535,8 +1281,26 @@ include::../api/protos/vkCmdSetEvent.txt[] * pname:commandBuffer is the command buffer into which the command is recorded. * pname:event is the event that will be signaled. - * pname:stageMask specifies the pipeline stage at which the state of - pname:event is updated as described below. + * pname:stageMask specifies the <> used to determine when the pname:event is signaled. + +When flink:vkCmdSetEvent is submitted to a queue, it defines an execution +dependency on commands that were submitted before it, and defines an event +signal operation which sets the event to the signaled state. + +The first <> +includes every command previously submitted to the same queue, including +those in the same command buffer and batch. +The synchronization scope is limited to operations on the pipeline stages +determined by the <> specified by pname:stageMask. + +The second <> +includes only the event signal operation. + +If pname:event is already in the signaled state when flink:vkCmdSetEvent is +executed on the device, then flink:vkCmdSetEvent has no effect, no event +signal operation occurs, and no execution dependency is generated. .Valid Usage **** @@ -551,6 +1315,7 @@ include::../api/protos/vkCmdSetEvent.txt[] include::../validity/protos/vkCmdSetEvent.txt[] +[[synchronization-events-unsignaling-device]] // refBegin vkCmdResetEvent Reset an event object to non-signaled state To set the state of an event to unsignaled from a device, call: @@ -559,9 +1324,27 @@ include::../api/protos/vkCmdResetEvent.txt[] * pname:commandBuffer is the command buffer into which the command is recorded. - * pname:event is the event that will be reset. - * pname:stageMask specifies the pipeline stage at which the state of - pname:event is updated as described below. + * pname:event is the event that will be unsignaled. + * pname:stageMask specifies the <> used to determine when the pname:event is unsignaled. + +When flink:vkCmdResetEvent is submitted to a queue, it defines an execution +dependency on commands that were submitted before it, and defines an event +unsignal operation which resets the event to the unsignaled state. + +The first <> +includes every command previously submitted to the same queue, including +those in the same command buffer and batch. +The synchronization scope is limited to operations on the pipeline stages +determined by the <> specified by pname:stageMask. + +The second <> +includes only the event unsignal operation. + +If pname:event is already in the unsignaled state when flink:vkCmdResetEvent +is executed on the device, then flink:vkCmdResetEvent has no effect, no +event unsignal operation occurs, and no execution dependency is generated. .Valid Usage **** @@ -578,68 +1361,98 @@ include::../api/protos/vkCmdResetEvent.txt[] include::../validity/protos/vkCmdResetEvent.txt[] -For both fname:vkCmdSetEvent and fname:vkCmdResetEvent, the status of -pname:event is updated once the pipeline stages specified by pname:stageMask -(see <>) have completed executing -prior commands. -The command modifying the event is passed through the pipeline bound to the -command buffer at time of execution. - // refBegin vkCmdWaitEvents Wait for one or more events and insert a set of memory To wait for one or more events to enter the signaled state on a device, call: +[[synchronization-events-waiting-device]] include::../api/protos/vkCmdWaitEvents.txt[] * pname:commandBuffer is the command buffer into which the command is recorded. * pname:eventCount is the length of the pname:pEvents array. * pname:pEvents is an array of event object handles to wait on. - * pname:srcStageMask (see <>) is the - bitwise OR of the pipeline stages used to signal the event object - handles in pname:pEvents. - * pname:dstStageMask is the pipeline stages at which the wait will occur. - * pname:pMemoryBarriers is a pointer to an array of - pname:memoryBarrierCount sname:VkMemoryBarrier structures. + * pname:srcStageMask is the <> + * pname:dstStageMask is the <>. + * pname:memoryBarrierCount is the length of the pname:pMemoryBarriers + array. + * pname:pMemoryBarriers is a pointer to an array of slink:VkMemoryBarrier + structures. + * pname:bufferMemoryBarrierCount is the length of the + pname:pBufferMemoryBarriers array. * pname:pBufferMemoryBarriers is a pointer to an array of - pname:bufferMemoryBarrierCount sname:VkBufferMemoryBarrier structures. + slink:VkBufferMemoryBarrier structures. + * pname:imageMemoryBarrierCount is the length of the + pname:pImageMemoryBarriers array. * pname:pImageMemoryBarriers is a pointer to an array of - pname:imageMemoryBarrierCount sname:VkImageMemoryBarrier structures. - See <> for more details about memory - barriers. + slink:VkImageMemoryBarrier structures. -fname:vkCmdWaitEvents waits for events set by either fname:vkSetEvent or -fname:vkCmdSetEvent to become signaled. -Logically, it has three phases: +When fname:vkCmdWaitEvents is submitted to a queue, it defines a memory +dependency between prior event signal operations, and subsequent commands. - . Wait at the pipeline stages specified by pname:dstStageMask (see - <>) until the pname:eventCount - event objects specified by pname:pEvents become signaled. - Implementations may: wait for each event object to become signaled in - sequence (starting with the first event object in pname:pEvents, and - ending with the last), or wait for all of the event objects to become - signaled at the same time. - . Execute the memory barriers specified by pname:pMemoryBarriers, - pname:pBufferMemoryBarriers and pname:pImageMemoryBarriers (see - <>). - . Resume execution of pipeline stages specified by pname:dstStageMask +The first synchronization scope only includes event signal operations that +operate on members of pname:pEvents, and the operations that happened-before +the event signal operations. +Event signal operations performed by flink:vkCmdSetEvent that were +previously submitted to the same queue are included in the first +synchronization scope, if the <> pipeline stage in their pname:stageMask parameter is +<> than or equal +to the <> pipeline +stage in pname:srcStageMask. +Event signal operations performed by flink:vkSetEvent are only included in +the first synchronization scope if ename:VK_PIPELINE_STAGE_HOST_BIT is +included in pname:srcStageMask. -Implementations may: not execute commands in a pipelined manner, so -fname:vkCmdWaitEvents may: not observe the results of a subsequent -fname:vkCmdSetEvent or fname:vkCmdResetEvent command, even if the stages in -pname:dstStageMask occur after the stages in pname:srcStageMask. +The second <> +includes commands subsequently submitted to the same queue, including those +in the same command buffer and batch. +The second synchronization scope is limited to operations on the pipeline +stages determined by the <> specified by pname:dstStageMask. -Commands that update the state of events in different pipeline stages may: -execute out of order, unless the ordering is enforced by execution -dependencies. +The first <> is +limited to access in the pipeline stages determined by the +<> specified by +pname:srcStageMask. +Within that, the first access scope only includes the first access scopes +defined by elements of the pname:pMemoryBarriers, +pname:pBufferMemoryBarriers and pname:pImageMemoryBarriers arrays, which +each define a set of <>. +If no memory barriers are specified, then the first access scope includes no +accesses. + +The second <> is +limited to access in the pipeline stages determined by the +<> specified +by pname:dstStageMask. +Within that, the second access scope only includes the second access scopes +defined by elements of the pname:pMemoryBarriers, +pname:pBufferMemoryBarriers and pname:pImageMemoryBarriers arrays, which +each define a set of <>. +If no memory barriers are specified, then the second access scope includes +no accesses. + +[NOTE] +.Note +==== +flink:vkCmdWaitEvents is used with flink:vkCmdSetEvent to define a memory +dependency between two sets of action commands, roughly in the same way as +pipeline barriers, but split into two commands such that work between the +two may: execute unhindered. +==== [NOTE] .Note ==== Applications should: be careful to avoid race conditions when using events. -For example, an event should: only be reset if no fname:vkCmdWaitEvents -command is executing that waits upon that event. +There is no direct ordering guarantee between a flink:vkCmdResetEvent +command and a flink:vkCmdWaitEvents command submitted after it, so some +other execution dependency must: be included between these commands (e.g. a +semaphore). ==== .Valid Usage @@ -670,247 +1483,15 @@ command is executing that waits upon that event. include::../validity/protos/vkCmdWaitEvents.txt[] -An act of setting or resetting an event in one queue may: not affect or be -visible to other queues. -For cross-queue synchronization, semaphores can: be used. - - -[[synchronization-execution-and-memory-dependencies]] -== Execution And Memory Dependencies - -Synchronization commands introduce explicit execution and memory -dependencies between two sets of action commands, where the second set of -commands depends on the first set of commands. -The two sets can: be: - - * First set: commands before a flink:vkCmdSetEvent command. -+ -Second set: commands after a flink:vkCmdWaitEvents command in the same -queue, using the same event. - - * First set: commands in a lower numbered subpass (or before a render pass - instance). -+ -Second set: commands in a higher numbered subpass (or after a render pass -instance), where there is a <> between the -two subpasses (or between a subpass and ename:VK_SUBPASS_EXTERNAL). - - * First set: commands before a - <>. -+ -Second set: commands after that pipeline barrier in the same queue (possibly - limited to within the same subpass). - -An _execution dependency_ is a single dependency between a set of source and -destination pipeline stages, which guarantees that all work performed by the -set of pipeline stages included in pname:srcStageMask (see -<>) of the first -set of commands completes before any work performed by the set of pipeline -stages included in pname:dstStageMask of the second set of commands begins. - -An _execution dependency chain_ from a set of source pipeline stages [eq]#A# -to a set of destination pipeline stages [eq]#B# is a sequence of execution -dependencies submitted to a queue in order between a first set of commands -and a second set of commands, satisfying the following conditions: - - * the first dependency includes [eq]#A# or - ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT or - ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT in the pname:srcStageMask. - And, - * the final dependency includes [eq]#B# or - ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT or - ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT in the pname:dstStageMask. - And, - * for each dependency in the sequence (except the first) at least one of - the following conditions is true: - ** pname:srcStageMask of the current dependency includes at least one bit - [eq]#C# that is present in the pname:dstStageMask of the previous - dependency. - Or, - ** pname:srcStageMask of the current dependency includes - ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT or - ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT. - Or, - ** pname:dstStageMask of the previous dependency includes - ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT or - ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT. - Or, - ** pname:srcStageMask of the current dependency includes - ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, and pname:dstStageMask of the - previous dependency includes at least one graphics pipeline stage. - Or, - ** pname:dstStageMask of the previous dependency includes - ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, and pname:srcStageMask of the - current dependency includes at least one graphics pipeline stage. - * for each dependency in the sequence (except the first), at least one of - the following conditions is true: - ** the current dependency is a fname:vkCmdSetEvent/fname:vkCmdWaitEvents - pair (where the fname:vkCmdWaitEvents may: be inside or outside a - render pass instance), or a fname:vkCmdPipelineBarrier outside of a - render pass instance, or a subpass dependency with pname:srcSubpass - equal to ename:VK_SUBPASS_EXTERNAL for a render pass instance that - begins with a fname:vkCmdBeginRenderPass command, and the previous - dependency is any of: - *** a fname:vkCmdSetEvent/fname:vkCmdWaitEvents pair or a - fname:vkCmdPipelineBarrier, either one outside of a render pass - instance, that precedes the current dependency in the queue execution - order. - Or, - *** a subpass dependency, with pname:dstSubpass equal to - ename:VK_SUBPASS_EXTERNAL, for a renderpass instance that was ended - with a fname:vkCmdEndRenderPass command that precedes the current - dependency in the queue execution order. - ** the current dependency is a subpass dependency for a render pass - instance, and the previous dependency is any of: - *** another dependency for the same render pass instance, with a - pname:dstSubpass equal to the pname:srcSubpass of the current - dependency. - Or, - *** a fname:vkCmdPipelineBarrier of the same render pass instance, - recorded for the subpass indicated by the pname:srcSubpass of the - current dependency. - Or, - *** a fname:vkCmdSetEvent/fname:vkCmdWaitEvents pair, where the - fname:vkCmdWaitEvents is inside the same render pass instance, - recorded for the subpass indicated by the pname:srcSubpass of the - current dependency. - ** the current dependency is a fname:vkCmdPipelineBarrier inside a subpass - of a render pass instance, and the previous dependency is any of: - *** a subpass dependency for the same render pass instance, with a - pname:dstSubpass equal to the subpass of the - fname:vkCmdPipelineBarrier. - Or, - *** a fname:vkCmdPipelineBarrier of the same render pass instance, - recorded for the same subpass, before the current dependency. - Or, - *** a fname:vkCmdSetEvent/fname:vkCmdWaitEvents pair, where the - fname:vkCmdWaitEvents is inside the same render pass instance, - recorded for the same subpass, before the current dependency. - -A pair of consecutive execution dependencies in an execution dependency -chain accomplishes a dependency between the stages [eq]#A# and [eq]#B# via -intermediate stages [eq]#C#, even if no work is executed between them that -uses the pipeline stages included in [eq]#C#. - -An execution dependency chain guarantees that the work performed by the -pipeline stages [eq]#A# in the first set of commands completes before the -work performed by pipeline stages [eq]#B# in the second set of commands -begins. - -A command [eq]#C~1~# is said to _happen-before_ an execution dependency -[eq]#D~2~# for a pipeline stage [eq]#S# if all the following conditions are -true: - - * [eq]#C~1~# is in the first set of commands for an execution dependency - [eq]#D~1~# that includes [eq]#S# in its pname:srcStageMask. - And, - * there exists an execution dependency chain that includes [eq]#D~1~# and - [eq]#D~2~#, where [eq]#D~2~# follows [eq]#D~1~# in the execution - dependency sequence. - -Similarly, a command [eq]#C~2~# is said to _happen-after_ an execution -dependency [eq]#D~1~# for a pipeline stage [eq]#S# if all the following -conditions are true: - - * [eq]#C~2~# is in the second set of commands for an execution dependency - [eq]#D~2~# that includes [eq]#S# in its pname:dstStageMask. - And, - * there exists an execution dependency chain that includes [eq]#D~1~# and - [eq]#D~2~#, where [eq]#D~2~# follows [eq]#D~1~# in the execution - dependency sequence. - - -An execution dependency is _by-region_ if its pname:dependencyFlags -parameter includes ename:VK_DEPENDENCY_BY_REGION_BIT. -Such a barrier describes a per-region (x,y,layer) dependency. -That is, for each region, the implementation must: ensure that the source -stages for the first set of commands complete execution before any -destination stages begin execution in the second set of commands for the -same region. -Since fragment shader invocations are not specified to run in any particular -groupings, the size of a region is implementation-dependent, not known to -the application, and must: be assumed to be no larger than a single pixel. -If pname:dependencyFlags does not include ename:VK_DEPENDENCY_BY_REGION_BIT, -it describes a global dependency, that is for all pixel regions, the source -stages must: have completed for preceding commands before any destination -stages starts for subsequent commands. - -[[synchronization-execution-and-memory-dependencies-available-and-visible]] -_Memory dependencies_ are coupled to execution dependencies, and synchronize -accesses to memory between two sets of commands. -They operate according to two ``halves'' of a dependency to synchronize two -sets of commands, the commands that happen-before the execution dependency -for the pname:srcStageMask vs the commands that happen-after the execution -dependency for the pname:dstStageMask, as described above. -The first half of the dependency makes memory accesses using the set of -access types in pname:srcAccessMask performed in pipeline stages in -pname:srcStageMask by the first set of commands complete and writes be -_available_ for subsequent commands. -The second half of the dependency makes any available writes from previous -commands _visible_ to pipeline stages in pname:dstStageMask using the set of -access types in pname:dstAccessMask for the second set of commands, if those -writes have been made available with the first half of the same or a -previous dependency. -The two halves of a memory dependency can: either be expressed as part of a -single command, or can: be part of separate barriers as long as there is an -execution dependency chain between them. -The application must: use memory dependencies to make writes visible before -subsequent reads can: rely on them, and before subsequent writes can: -overwrite them. -Failure to do so causes the result of the reads to be undefined, and the -order of writes to be undefined. - -[[synchronization-execution-and-memory-dependencies-types]] -<> apply to -all resources owned by the device. -<> and -<> apply to the -buffer range(s) or image subresource(s) included in the command. -For accesses to a byte of a buffer or image subresource of an image to be -synchronized between two sets of commands, the byte or image subresource -must: be included in both the first and second halves of the dependencies -described above, but need not be included in each step of the execution -dependency chain between them. - -An execution dependency chain is _by-region_ if all stages in all -dependencies in the chain are framebuffer-space pipeline stages, and if the -ename:VK_DEPENDENCY_BY_REGION_BIT bit is included in all dependencies in the -chain. -Otherwise, the execution dependency chain is not by-region. -The two halves of a memory dependency form a by-region dependency if *all* -execution dependency chains between them are by-region. -In other words, if there is any execution dependency between two sets of -commands that is not by-region, then the memory dependency is not by-region. - -When an image memory barrier includes a layout transition, the barrier first -makes writes via pname:srcStageMask and pname:srcAccessMask available, then -performs the layout transition, then makes the contents of the image -subresource(s) in the new layout visible to memory accesses in -pname:dstStageMask and pname:dstAccessMask, as if there is an execution and -memory dependency between the source masks and the transition, as well as -between the transition and the destination masks. -Any writes that have previously been made available are included in the -layout transition, but any previous writes that have not been made available -may: become lost or corrupt the image. - -All dependencies must: include at least one bit in each of the -pname:srcStageMask and pname:dstStageMask. - -Memory dependencies are used to solve data hazards, e.g. to ensure that -write operations are visible to subsequent read operations (read-after-write -hazard), as well as write-after-write hazards. -Write-after-read and read-after-read hazards only require execution -dependencies to synchronize. - [[synchronization-pipeline-barriers]] == Pipeline Barriers -A _pipeline barrier_ inserts an execution dependency and a set of memory -dependencies between a set of commands earlier in the command buffer and a -set of commands later in the command buffer. +flink:vkCmdPipelineBarrier is a synchronization command that inserts a +dependency between commands submitted to the same queue, or between commands +in the same subpass. -// refBegin vkCmdPipelineBarrier Insert a set of execution and memory barriers +// refBegin vkCmdPipelineBarrier Insert a memory dependency To record a pipeline barrier, call: @@ -918,21 +1499,20 @@ include::../api/protos/vkCmdPipelineBarrier.txt[] * pname:commandBuffer is the command buffer into which the command is recorded. - * pname:srcStageMask is a bitmask of elink:VkPipelineStageFlagBits - specifying a set of source pipeline stages (see - <>). - * pname:dstStageMask is a bitmask specifying a set of destination pipeline - stages. -+ -The pipeline barrier specifies an execution dependency such that all work -performed by the set of pipeline stages included in pname:srcStageMask of -the first set of commands completes before any work performed by the set of -pipeline stages included in pname:dstStageMask of the second set of commands -begins. -+ + * pname:srcStageMask defines a <>. + * pname:dstStageMask defines a <>. * pname:dependencyFlags is a bitmask of elink:VkDependencyFlagBits. - The execution dependency is by-region if the mask includes - ename:VK_DEPENDENCY_BY_REGION_BIT. + The bits that can: be included in pname:dependencyFlags are: ++ +-- +// refBegin VkDependencyFlagBits Bitmask specifying how execution and memory dependencies are formed +include::../api/enums/VkDependencyFlagBits.txt[] +-- + ** ename:VK_DEPENDENCY_BY_REGION_BIT signifies that dependencies will be + <>. + * pname:memoryBarrierCount is the length of the pname:pMemoryBarriers array. * pname:pMemoryBarriers is a pointer to an array of slink:VkMemoryBarrier @@ -946,20 +1526,61 @@ begins. * pname:pImageMemoryBarriers is a pointer to an array of slink:VkImageMemoryBarrier structures. -Each element of the pname:pMemoryBarriers, pname:pBufferMemoryBarriers and -pname:pImageMemoryBarriers arrays specifies two halves of a memory -dependency, as defined above. -Specifics of each type of memory barrier and the memory access types are -defined further in <>. +When flink:vkCmdPipelineBarrier is submitted to a queue, it defines a memory +dependency between commands that were submitted before it, and those +submitted after it. -If fname:vkCmdPipelineBarrier is called outside a render pass instance, then -the first set of commands is all prior commands submitted to the queue and -recorded in the command buffer and the second set of commands is all -subsequent commands recorded in the command buffer and submitted to the -queue. -If fname:vkCmdPipelineBarrier is called inside a render pass instance, then -the first set of commands is all prior commands in the same subpass and the -second set of commands is all subsequent commands in the same subpass. +If flink:vkCmdPipelineBarrier was recorded outside a render pass instance, +the first <> +includes every command submitted to the same queue before it, including +those in the same command buffer and batch. +If flink:vkCmdPipelineBarrier was recorded inside a render pass instance, +the first synchronization scope includes only commands submitted before it +within the same subpass. +In either case, the first synchronization scope is limited to operations on +the pipeline stages determined by the +<> specified by +pname:srcStageMask. + +If flink:vkCmdPipelineBarrier was recorded outside a render pass instance, +the second <> +includes every command submitted to the same queue after it, including those +in the same command buffer and batch. +If flink:vkCmdPipelineBarrier was recorded inside a render pass instance, +the second synchronization scope includes only commands submitted after it +within the same subpass. +In either case, the second synchronization scope is limited to operations on +the pipeline stages determined by the +<> specified +by pname:dstStageMask. + +The first <> is +limited to access in the pipeline stages determined by the +<> specified by +pname:srcStageMask. +Within that, the first access scope only includes the first access scopes +defined by elements of the pname:pMemoryBarriers, +pname:pBufferMemoryBarriers and pname:pImageMemoryBarriers arrays, which +each define a set of <>. +If no memory barriers are specified, then the first access scope includes no +accesses. + +The second <> is +limited to access in the pipeline stages determined by the +<> specified +by pname:dstStageMask. +Within that, the second access scope only includes the second access scopes +defined by elements of the pname:pMemoryBarriers, +pname:pBufferMemoryBarriers and pname:pImageMemoryBarriers arrays, which +each define a set of <>. +If no memory barriers are specified, then the second access scope includes +no accesses. + +If pname:dependencyFlags includes ename:VK_DEPENDENCY_BY_REGION_BIT, then +any dependency between <> pipeline stages is +<> - otherwise it is +<>. .Valid Usage **** @@ -1064,170 +1685,28 @@ the pname:srcQueueFamilyIndex and pname:dstQueueFamilyIndex must: be ename:VK_QUEUE_FAMILY_IGNORED. -[[synchronization-pipeline-stage-flags]] -=== Pipeline Stage Flags - -// refBegin VkPipelineStageFlagBits Bitmask specifying pipeline stages - -Several of the event commands, fname:vkCmdPipelineBarrier, and -sname:VkSubpassDependency depend on being able to specify where in the -logical pipeline events can: be signaled, or the source and destination of -an execution dependency. -These pipeline stages are specified using a bitmask: - -include::../api/enums/VkPipelineStageFlagBits.txt[] - -The meaning of each bit is: - - * ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: Stage of the pipeline where - commands are initially received by the queue. - * ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT: Stage of the pipeline where - Draw/DispatchIndirect data structures are consumed. - * ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT: Stage of the pipeline where - vertex and index buffers are consumed. - * ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT: Vertex shader stage. - * ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT: Tessellation - control shader stage. - * ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT: Tessellation - evaluation shader stage. - * ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT: Geometry shader stage. - * ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT: Fragment shader stage. - * ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT: Stage of the pipeline - where early fragment tests (depth and stencil tests before fragment - shading) are performed. - * ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT: Stage of the pipeline - where late fragment tests (depth and stencil tests after fragment - shading) are performed. - * ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT: Stage of the - pipeline after blending where the final color values are output from the - pipeline. - This stage also includes resolve operations that occur at the end of a - subpass. - Note that this does not necessarily indicate that the values have been - committed to memory. - * [[synchronization-transfer]]ename:VK_PIPELINE_STAGE_TRANSFER_BIT: - Execution of copy commands. - This includes the operations resulting from all transfer commands. - The set of transfer commands comprises fname:vkCmdCopyBuffer, - fname:vkCmdCopyImage, fname:vkCmdBlitImage, - fname:vkCmdCopyBufferToImage, fname:vkCmdCopyImageToBuffer, - fname:vkCmdUpdateBuffer, fname:vkCmdFillBuffer, - fname:vkCmdClearColorImage, fname:vkCmdClearDepthStencilImage, - fname:vkCmdResolveImage, and fname:vkCmdCopyQueryPoolResults. - * ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT: Execution of a compute - shader. - * ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT: Final stage in the pipeline - where commands complete execution. - * ename:VK_PIPELINE_STAGE_HOST_BIT: A pseudo-stage indicating execution on - the host of reads/writes of device memory. - * ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT: Execution of all graphics - pipeline stages. - * ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT: Execution of all stages - supported on the queue. - -[NOTE] -.Note -==== -The ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT and -ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT differ from -ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in that they correspond to all -(or all graphics) stages, rather than to a specific stage at the end of the -pipeline. -An execution dependency with only ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT -in pname:dstStageMask will not delay subsequent commands, while including -either of the other two bits will. -Similarly, when defining a memory dependency, if the stage mask(s) refer to -all stages, then the indicated access types from all stages will be made -available and/or visible, but using only -ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT would not make any accesses -available and/or visible because this stage does not access memory. -The ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT is useful for accomplishing -memory barriers and layout transitions when the next accesses will be done -in a different queue or by a presentation engine; in these cases subsequent -commands in the same queue do not need to wait, but the barrier or -transition must: complete before semaphores associated with the batch -signal. -==== - -// refEnd VkPipelineStageFlagBits - -[NOTE] -.Note -==== -If an implementation is unable to update the state of an event at any -specific stage of the pipeline, it may: instead update the event at any -logically later stage. -For example, if an implementation is unable to signal an event immediately -after vertex shader execution is complete, it may: instead signal the event -after color attachment output has completed. -In the limit, an event may: be signaled after all graphics stages complete. -If an implementation is unable to wait on an event at any specific stage of -the pipeline, it may: instead wait on it at any logically earlier stage. - -Similarly, if an implementation is unable to implement an execution -dependency at specific stages of the pipeline, it may: implement the -dependency in a way where additional source pipeline stages complete and/or -where additional destination pipeline stages' execution is blocked to -satisfy the dependency. - -If an implementation makes such a substitution, it must: not affect the -semantics of execution or memory dependencies or image and buffer memory -barriers. -==== - -Certain pipeline stages are only available on queues that support a -particular set of operations. -The following table lists, for each pipeline stage flag, which queue -capability flag must: be supported by the queue. -When multiple flags are enumerated in the second column of the table, it -means that the pipeline stage is supported on the queue if it supports any -of the listed capability flags. -For further details on queue capabilities see -<> -and <>. - -.Supported pipeline stage flags -[width="100%",cols="69%,31%",options="header",align="center"] -|==== -|Pipeline stage flag | Required queue capability flag -|ename:VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | None -|ename:VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | ename:VK_QUEUE_GRAPHICS_BIT or ename:VK_QUEUE_COMPUTE_BIT -|ename:VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | ename:VK_QUEUE_COMPUTE_BIT -|ename:VK_PIPELINE_STAGE_TRANSFER_BIT | ename:VK_QUEUE_GRAPHICS_BIT, ename:VK_QUEUE_COMPUTE_BIT or ename:VK_QUEUE_TRANSFER_BIT -|ename:VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | None -|ename:VK_PIPELINE_STAGE_HOST_BIT | None -|ename:VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_PIPELINE_STAGE_ALL_COMMANDS_BIT | None -|==== - - [[synchronization-memory-barriers]] -=== Memory Barriers +== Memory Barriers -_Memory barriers_ express the two halves of a memory dependency between an -earlier set of memory accesses against a later set of memory accesses. -Vulkan provides three types of memory barriers: global memory, buffer -memory, and image memory. +_Memory barriers_ are used to explicitly control access to buffer and image +subresource ranges. +Memory barriers are used to <>, +<>, and +define <>. +They explicitly define the <> +and buffer and image subresource ranges that are included in the +<> of a memory +dependency that is created by a synchronization command that includes them. - -[[synchronization-global-memory-barrier]] +[[synchronization-global-memory-barriers]] === Global Memory Barriers -The global memory barrier type is specified with an instance of the -sname:VkMemoryBarrier structure. -This type of barrier applies to memory accesses involving all memory objects +Global memory barriers apply to memory accesses involving all memory objects that exist at the time of its execution. -// refBegin VkMemoryBarrier Structure specifying a memory barrier +// refBegin VkMemoryBarrier Structure specifying a global memory barrier The sname:VkMemoryBarrier structure is defined as: @@ -1235,169 +1714,36 @@ include::../api/structs/VkMemoryBarrier.txt[] * pname:sType is the type of this structure. * pname:pNext is `NULL` or a pointer to an extension-specific structure. - * pname:srcAccessMask is a bitmask of the classes of memory accesses - performed by the first set of commands that will participate in the - dependency. - * pname:dstAccessMask is a bitmask of the classes of memory accesses - performed by the second set of commands that will participate in the - dependency. + * pname:srcAccessMask defines a <>. + * pname:dstAccessMask defines a <>. -pname:srcAccessMask and pname:dstAccessMask, along with pname:srcStageMask -and pname:dstStageMask from flink:vkCmdPipelineBarrier, define the two -halves of a memory dependency and an execution dependency. -Memory accesses using the set of access types in pname:srcAccessMask -performed in pipeline stages in pname:srcStageMask by the first set of -commands must: complete and be available to later commands. -The side effects of the first set of commands will be visible to memory -accesses using the set of access types in pname:dstAccessMask performed in -pipeline stages in pname:dstStageMask by the second set of commands. -If the barrier is by-region, these requirements only apply to invocations -within the same framebuffer-space region, for pipeline stages that perform -framebuffer-space work. -The execution dependency guarantees that execution of work by the -destination stages of the second set of commands will not begin until -execution of work by the source stages of the first set of commands has -completed. +The first <> is +limited to access types in the <> specified by pname:srcAccessMask. -A common type of memory dependency is to avoid a read-after-write hazard. -In this case, the source access mask and stages will include writes from a -particular stage, and the destination access mask and stages will indicate -how those writes will be read in subsequent commands. -However, barriers can: also express write-after-read dependencies and -write-after-write dependencies, and are even useful to express -read-after-read dependencies across an image layout change. - -// refBegin VkAccessFlagBits Bitmask specifying classes of memory access the will participate in a memory barrier dependency - -Bits which can: be set in slink:VkMemoryBarrier::pname:srcAccessMask and -slink:VkMemoryBarrier::pname:dstAccessMask include: - -[[synchronization-access-flags]] -include::../api/enums/VkAccessFlagBits.txt[] - - * ename:VK_ACCESS_INDIRECT_COMMAND_READ_BIT indicates that the access is - an indirect command structure read as part of an indirect drawing - command. - * ename:VK_ACCESS_INDEX_READ_BIT indicates that the access is an index - buffer read. - * ename:VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT indicates that the access is a - read via the vertex input bindings. - * ename:VK_ACCESS_UNIFORM_READ_BIT indicates that the access is a read via - a uniform buffer or dynamic uniform buffer descriptor. - * ename:VK_ACCESS_INPUT_ATTACHMENT_READ_BIT indicates that the access is a - read via an input attachment descriptor. - * ename:VK_ACCESS_SHADER_READ_BIT indicates that the access is a read from - a shader via any other descriptor type. - * ename:VK_ACCESS_SHADER_WRITE_BIT indicates that the access is a write or - atomic from a shader via the same descriptor types as in - ename:VK_ACCESS_SHADER_READ_BIT. - * ename:VK_ACCESS_COLOR_ATTACHMENT_READ_BIT indicates that the access is a - read via a color attachment. - * ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT indicates that the access is - a write via a color or resolve attachment. - * ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT indicates that the - access is a read via a depth/stencil attachment. - * ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT indicates that the - access is a write via a depth/stencil attachment. - * ename:VK_ACCESS_TRANSFER_READ_BIT indicates that the access is a read - from a transfer (copy, blit, resolve, etc.) operation. - For the complete set of transfer operations, see - <>. - * ename:VK_ACCESS_TRANSFER_WRITE_BIT indicates that the access is a write - from a transfer (copy, blit, resolve, etc.) operation. - For the complete set of transfer operations, see - <>. - * ename:VK_ACCESS_HOST_READ_BIT indicates that the access is a read via - the host. - * ename:VK_ACCESS_HOST_WRITE_BIT indicates that the access is a write via - the host. - * ename:VK_ACCESS_MEMORY_READ_BIT indicates that the access is a read via - a non-specific unit attached to the memory. - This unit may: be external to the Vulkan device or otherwise not part of - the core Vulkan pipeline. - When included in pname:dstAccessMask, all writes using access types in - pname:srcAccessMask performed by pipeline stages in pname:srcStageMask - must: be visible in memory. - * ename:VK_ACCESS_MEMORY_WRITE_BIT indicates that the access is a write - via a non-specific unit attached to the memory. - This unit may: be external to the Vulkan device or otherwise not part of - the core Vulkan pipeline. - When included in pname:srcAccessMask, all access types in - pname:dstAccessMask from pipeline stages in pname:dstStageMask will - observe the side effects of commands that executed before the barrier. - When included in pname:dstAccessMask all writes using access types in - pname:srcAccessMask performed by pipeline stages in pname:srcStageMask - must: be visible in memory. - -Color attachment reads and writes are automatically (without memory or -execution dependencies) coherent and ordered against themselves and each -other for a given sample within a subpass of a render pass instance, -executing in <>. -Similarly, depth/stencil attachment reads and writes are automatically -coherent and ordered against themselves and each other in the same -circumstances. - -Shader reads and/or writes through two variables (in the same or different -shader invocations) decorated with code:Coherent and which use the same -image view or buffer view are automatically coherent with each other, but -require execution dependencies if a specific order is desired. -Similarly, shader atomic operations are coherent with each other and with -code:Coherent variables. -Non-code:Coherent shader memory accesses require memory dependencies for -writes to be available and reads to be visible. - -Certain memory access types are only supported on queues that support a -particular set of operations. -The following table lists, for each access flag, which queue capability flag -must: be supported by the queue. -When multiple flags are enumerated in the second column of the table it -means that the access type is supported on the queue if it supports any of -the listed capability flags. -For further details on queue capabilities see -<> -and <>. - -.Supported access flags -[width="100%",cols="67%,33%",options="header",align="center"] -|==== -|Access flag | Required queue capability flag -|ename:VK_ACCESS_INDIRECT_COMMAND_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT or ename:VK_QUEUE_COMPUTE_BIT -|ename:VK_ACCESS_INDEX_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_ACCESS_UNIFORM_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT or ename:VK_QUEUE_COMPUTE_BIT -|ename:VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_ACCESS_SHADER_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT or ename:VK_QUEUE_COMPUTE_BIT -|ename:VK_ACCESS_SHADER_WRITE_BIT | ename:VK_QUEUE_GRAPHICS_BIT or ename:VK_QUEUE_COMPUTE_BIT -|ename:VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ename:VK_QUEUE_GRAPHICS_BIT -|ename:VK_ACCESS_TRANSFER_READ_BIT | ename:VK_QUEUE_GRAPHICS_BIT, ename:VK_QUEUE_COMPUTE_BIT or ename:VK_QUEUE_TRANSFER_BIT -|ename:VK_ACCESS_TRANSFER_WRITE_BIT | ename:VK_QUEUE_GRAPHICS_BIT, ename:VK_QUEUE_COMPUTE_BIT or ename:VK_QUEUE_TRANSFER_BIT -|ename:VK_ACCESS_HOST_READ_BIT | None -|ename:VK_ACCESS_HOST_WRITE_BIT | None -|ename:VK_ACCESS_MEMORY_READ_BIT | None -|ename:VK_ACCESS_MEMORY_WRITE_BIT | None -|==== +The second <> is +limited to access types in the <> specified by pname:dstAccessMask. include::../validity/structs/VkMemoryBarrier.txt[] -[[synchronization-buffer-memory-barrier]] + +[[synchronization-buffer-memory-barriers]] === Buffer Memory Barriers -The buffer memory barrier type is specified with an instance of the -sname:VkBufferMemoryBarrier structure. -This type of barrier only applies to memory accesses involving a specific -range of the specified buffer object. -That is, a memory dependency formed from a buffer memory barrier is -<> to the -specified range of the buffer. -It is also used to transfer ownership of a buffer range from one queue -family to another, as described in the <> section. +Buffer memory barriers only apply to memory accesses involving a specific +buffer range. +That is, a memory dependency formed from an buffer memory barrier is +<> to access via the +specified buffer range. +Buffer memory barriers can: also be used to define a +<> for the +specified buffer range. -// refBegin VkBufferMemoryBarrier Structure specifying the parameters of a buffer memory barrier +// refBegin VkBufferMemoryBarrier Structure specifying a buffer memory barrier The sname:VkBufferMemoryBarrier structure is defined as: @@ -1405,18 +1751,14 @@ include::../api/structs/VkBufferMemoryBarrier.txt[] * pname:sType is the type of this structure. * pname:pNext is `NULL` or a pointer to an extension-specific structure. - * pname:srcAccessMask is a bitmask of the classes of memory accesses - performed by the first set of commands that will participate in the - dependency. - * pname:dstAccessMask is a bitmask of the classes of memory accesses - performed by the second set of commands that will participate in the - dependency. - * pname:srcQueueFamilyIndex is the queue family that is relinquishing - ownership of the range of pname:buffer to another queue, or - ename:VK_QUEUE_FAMILY_IGNORED if there is no transfer of ownership. - * pname:dstQueueFamilyIndex is the queue family that is acquiring - ownership of the range of pname:buffer from another queue, or - ename:VK_QUEUE_FAMILY_IGNORED if there is no transfer of ownership. + * pname:srcAccessMask defines a <>. + * pname:dstAccessMask defines a <>. + * pname:srcQueueFamilyIndex is the source queue family for a + <> + * pname:dstQueueFamilyIndex is the destination queue family for a + <> * pname:buffer is a handle to the buffer whose backing memory is affected by the barrier. * pname:offset is an offset in bytes into the backing memory for @@ -1426,6 +1768,28 @@ include::../api/structs/VkBufferMemoryBarrier.txt[] pname:buffer, or ename:VK_WHOLE_SIZE to use the range from pname:offset to the end of the buffer. +The first <> is +limited to access to the memory backing the specified buffer range, via +access types in the <> +specified by pname:srcAccessMask. + +The second <> is +limited to access to the memory backing the specified buffer range, via +access types in the <> specified by pname:dstAccessMask. + +If pname:srcQueueFamilyIndex is not equal to pname:dstQueueFamilyIndex, and +pname:srcQueueFamilyIndex is equal to the current queue family, then the +memory barrier defines a <> for the specified buffer range, and the second +access scope includes no access, as if pname:dstAccessMask was `0`. + +If pname:dstQueueFamilyIndex is not equal to pname:srcQueueFamilyIndex, and +pname:dstQueueFamilyIndex is equal to the current queue family, then the +memory barrier defines a <> for the specified buffer range, and the first +access scope includes no access, as if pname:srcAccessMask was `0`. + .Valid Usage **** * pname:offset must: be less than the size of pname:buffer @@ -1451,20 +1815,18 @@ include::../api/structs/VkBufferMemoryBarrier.txt[] include::../validity/structs/VkBufferMemoryBarrier.txt[] -[[synchronization-image-memory-barrier]] +[[synchronization-image-memory-barriers]] === Image Memory Barriers -The image memory barrier type is specified with an instance of the -sname:VkImageMemoryBarrier structure. -This type of barrier only applies to memory accesses involving a specific -image subresource range of the specified image object. +Image memory barriers only apply to memory accesses involving a specific +image subresource range. That is, a memory dependency formed from an image memory barrier is -<> to the -specified image subresources of the image. -It is also used to perform a layout transition for an image subresource -range, or to transfer ownership of an image subresource range from one queue -family to another as described in the <> -section. +<> to access via the +specified image subresource range. +Image memory barriers can: also be used to define +<> or a +<> for the +specified image subresource range. // refBegin VkImageMemoryBarrier Structure specifying the parameters of an image memory barrier @@ -1474,21 +1836,18 @@ include::../api/structs/VkImageMemoryBarrier.txt[] * pname:sType is the type of this structure. * pname:pNext is `NULL` or a pointer to an extension-specific structure. - * pname:srcAccessMask is a bitmask of the classes of memory accesses - performed by the first set of commands that will participate in the - dependency. - * pname:dstAccessMask is a bitmask of the classes of memory accesses - performed by the second set of commands that will participate in the - dependency. - * pname:oldLayout describes the current layout of the image - subresource(s). - * pname:newLayout describes the new layout of the image subresource(s). - * pname:srcQueueFamilyIndex is the queue family that is relinquishing - ownership of the image subresource(s) to another queue, or - ename:VK_QUEUE_FAMILY_IGNORED if there is no transfer of ownership). - * pname:dstQueueFamilyIndex is the queue family that is acquiring - ownership of the image subresource(s) from another queue, or - ename:VK_QUEUE_FAMILY_IGNORED if there is no transfer of ownership). + * pname:srcAccessMask defines a <>. + * pname:dstAccessMask defines a <>. + * pname:oldLayout is the old layout in an + <>. + * pname:newLayout is the new layout in an + <>. + * pname:srcQueueFamilyIndex is the source queue family for a + <> + * pname:dstQueueFamilyIndex is the destination queue family for a + <> * pname:image is a handle to the image whose backing memory is affected by the barrier. * pname:subresourceRange describes an area of the backing memory for @@ -1496,30 +1855,38 @@ include::../api/structs/VkImageMemoryBarrier.txt[] sname:VkImageSubresourceRange), as well as the set of image subresources whose image layouts are modified. -If pname:oldLayout differs from pname:newLayout, a layout transition occurs -as part of the image memory barrier, affecting the data contained in the -region of the image defined by the pname:subresourceRange. -If pname:oldLayout is ename:VK_IMAGE_LAYOUT_UNDEFINED, then the data is -undefined after the layout transition. -This may: allow a more efficient transition, since the data may: be -discarded. -The layout transition must: occur after all operations using the old layout -are completed and before all operations using the new layout are started. -This is achieved by ensuring that there is a memory dependency between -previous accesses and the layout transition, as well as between the layout -transition and subsequent accesses, where the layout transition occurs -between the two halves of a memory dependency in an image memory barrier. +The first <> is +limited to access to the memory backing the specified image subresource +range, via access types in the <> specified by pname:srcAccessMask. -Layout transitions that are performed via image memory barriers are -automatically ordered against other layout transitions, including those that -occur as part of a render pass instance. +The second <> is +limited to access to the memory backing the specified image subresource +range, via access types in the <> specified by pname:dstAccessMask. -[NOTE] -.Note -==== -See <> for details on available image layouts and -their usages. -==== +If pname:srcQueueFamilyIndex is not equal to pname:dstQueueFamilyIndex, and +pname:srcQueueFamilyIndex is equal to the current queue family, then the +memory barrier defines a <> for the specified image subresource range, and +the second access scope includes no access, as if pname:dstAccessMask was +`0`. + +If pname:dstQueueFamilyIndex is not equal to pname:srcQueueFamilyIndex, and +pname:dstQueueFamilyIndex is equal to the current queue family, then the +memory barrier defines a <> for the specified image subresource range, and +the first access scope includes no access, as if pname:srcAccessMask was +`0`. + +If pname:oldLayout is not equal to pname:newLayout, then the memory barrier +defines an <> for the specified image subresource range. +Layout transitions that are performed via image memory barriers +automatically happen-after layout transitions previously submitted to the +same queue, and automatically happen-before layout transitions subsequently +submitted to the same queue; this includes layout transitions that occur as +part of a render pass instance, in both cases. .Valid Usage **** @@ -1572,8 +1939,117 @@ their usages. include::../validity/structs/VkImageMemoryBarrier.txt[] -[[synchronization-waitidle]] -=== Wait Idle Operations +[[synchronization-queue-transfers]] +=== Queue Family Ownership Transfer + +Resources created with a elink:VkSharingMode of +ename:VK_SHARING_MODE_EXCLUSIVE must: have their ownership explicitly +transferred from one queue family to another in order to access their +content in a well-defined manner on a queue in a different queue family. +If memory dependencies are correctly expressed between uses of such a +resource between two queues in different families, but no ownership transfer +is defined, the contents of that resource are undefined for any read +accesses performed by the second queue family. + +.Note +[NOTE] +==== +If an application does not need the contents of a resource to remain valid +when transferring from one queue family to another, then the ownership +transfer should: be skipped. +==== + +A queue family ownership transfer consists of two distinct parts: + + 1. + Release exclusive ownership from the source queue family 2. + Acquire exclusive ownership for the destination queue family + +An application must: ensure that these operations occur in the correct order +by defining an execution dependency between them, e.g. using a semaphore. + +[[synchronization-queue-transfers-release]] A _release operation_ is used to +release exclusive ownership of a range of a buffer or image subresource +range. +A release operation is defined by executing a +<> (for a +buffer range) or an <> (for an image subresource range), on a queue from the source queue +family. +The pname:srcQueueFamilyIndex parameter of the barrier must: be set to the +source queue family index, and the pname:dstQueueFamilyIndex parameter to +the destination queue family index. +pname:dstStageMask is ignored for such a barrier, such that no visibility +operation is executed - the value of this mask does not affect the validity +of the barrier. +The release operation happens-after the availability operation. + +[[synchronization-queue-transfers-acquire]] An _acquire operation_ is used +to acquire exclusive ownership of a range of a buffer or image subresource +range. +An acquire operation is defined by executing a +<> (for a +buffer range) or an <> (for an image subresource range), on a queue from the destination +queue family. +The pname:srcQueueFamilyIndex parameter of the barrier must: be set to the +source queue family index, and the pname:dstQueueFamilyIndex parameter to +the destination queue family index. +pname:srcStageMask is ignored for such a barrier, such that no availability +operation is executed - the value of this mask does not affect the validity +of the barrier. +The acquire operation happens-before the visibility operation. + +.Note +[NOTE] +==== +Whilst it is not invalid to provide destination or source access masks for +memory barriers used for release or acquire operations, respectively, they +have no practical effect. +Access after a release operation has undefined results, and so visibility +for those accesses has no practical effect. +Similarly, write access before an acquire operation will produce undefined +results for future access, so availability of those writes has no practical +use. +In an earlier version of the specification, these were required to match on +both sides - but this was subsequently relaxed. +It is now recommended that these masks are simply set to 0. +==== + +If the transfer is via an image memory barrier, and an +<> is +desired, then the values of pname:oldLayout and pname:newLayout in the +release memory barrier must: be equal to values of pname:oldLayout and +pname:newLayout in the acquire memory barrier. +Although the image layout transition is submitted twice, it will only be +executed once. +A layout transition specified in this way happens-after the release +operation and happens-before the acquire operation. + +If the values of pname:srcQueueFamilyIndex and pname:dstQueueFamilyIndex are +equal, no ownership transfer is performed, and the barrier operates as if +they were both set to ename:VK_QUEUE_FAMILY_IGNORED. + +Queue family ownership transfers may: perform read and write accesses on all +memory bound to the image subresource or buffer range, so applications must: +ensure that all memory writes have been made +<> before a +queue family ownership transfer is executed. +Available memory is automatically made visible to queue family release and +acquire operations, and writes performed by those operations are +automatically made available. + +Once a queue family has acquired ownership of a buffer range or image +subresource range of an ename:VK_SHARING_MODE_EXCLUSIVE resource, its +contents are undefined to other queue families unless ownership is +transferred. +The contents of any portion of another resource which aliases memory that is +bound to the transferred buffer or image subresource range are undefined +after a release or acquire operation. + + +[[synchronization-wait-idle]] +== Wait Idle Operations // refBegin vkQueueWaitIdle Wait for a queue to become idle @@ -1604,13 +2080,24 @@ all queues owned by pname:device. include::../validity/protos/vkDeviceWaitIdle.txt[] -[[synchronization-implicit-ordering-hostwrites]] +[[synchronization-submission-host-writes]] == Host Write Ordering Guarantees -When submitting batches of command buffers to a queue via -flink:vkQueueSubmit, it is guaranteed that: +When batches of command buffers are submitted to a queue via +flink:vkQueueSubmit, it defines a memory dependency with prior host +operations, and execution of command buffers submitted to the queue. - * Host writes to mappable device memory that occurred before the call to - fname:vkQueueSubmit are visible to the queue operation resulting from - that submission, if the device memory is coherent or if the memory range - was flushed with flink:vkFlushMappedMemoryRanges. +The first <> is +defined by the host execution model, but includes execution of +flink:vkQueueSubmit on the host and anything that happened-before it. + +The second <> +includes every command submitted in the same <> command, and all future submissions to the same queue. + +The first <> +includes all host writes to mappable device memory that are either coherent, +or have been flushed with flink:vkFlushMappedMemoryRanges. + +The second <> +includes all memory access performed by the device. diff --git a/doc/specs/vulkan/checkLinks.py b/doc/specs/vulkan/checkLinks.py index ffe1d046..5bd9219b 100755 --- a/doc/specs/vulkan/checkLinks.py +++ b/doc/specs/vulkan/checkLinks.py @@ -178,7 +178,7 @@ def checkLinks(infile, follow = False, included = False): return inPath = os.path.dirname(curFile) - fp = open(curFile, 'r') + fp = open(curFile, 'r', encoding='utf-8') for line in fp: curLine = curLine + 1 diff --git a/doc/specs/vulkan/genRef.py b/doc/specs/vulkan/genRef.py index ad3bdf74..2dff3ce6 100755 --- a/doc/specs/vulkan/genRef.py +++ b/doc/specs/vulkan/genRef.py @@ -220,7 +220,7 @@ def refPageTail(pageName, seeAlso, fp, auto = False): # file - list of strings making up the file, indexed by pi def emitPage(baseDir, specDir, pi, file): pageName = baseDir + '/' + pi.name + '.txt' - fp = open(pageName, 'w') + fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page global genDict @@ -269,7 +269,7 @@ def emitPage(baseDir, specDir, pi, file): # file - list of strings making up the file, indexed by pi def autoGenEnumsPage(baseDir, pi, file): pageName = baseDir + '/' + pi.name + '.txt' - fp = open(pageName, 'w') + fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page global genDict @@ -313,7 +313,7 @@ flagNamePat = re.compile('(?P\w+)Flags(?P[A-Z]*)') # flagName - Vk*Flags name def autoGenFlagsPage(baseDir, flagName): pageName = baseDir + '/' + flagName + '.txt' - fp = open(pageName, 'w') + fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page global genDict @@ -361,7 +361,7 @@ def autoGenFlagsPage(baseDir, flagName): # @@ interface in generator.py. def autoGenHandlePage(baseDir, handleName): pageName = baseDir + '/' + handleName + '.txt' - fp = open(pageName, 'w') + fp = open(pageName, 'w', encoding='utf-8') # Add a dictionary entry for this page global genDict @@ -509,7 +509,7 @@ def genSinglePageRef(baseDir): # Write head and body to the output file pageName = baseDir + '/apispec.txt' - fp = open(pageName, 'w') + fp = open(pageName, 'w', encoding='utf-8') print(head.getvalue(), file=fp, end='') print(body.getvalue(), file=fp, end='') diff --git a/doc/specs/vulkan/genspec.py b/doc/specs/vulkan/genspec.py index c27a7852..47dda25b 100644 --- a/doc/specs/vulkan/genspec.py +++ b/doc/specs/vulkan/genspec.py @@ -57,7 +57,8 @@ allExtensions = KHRextensions + [ 'VK_NV_external_memory_capabilities', 'VK_NV_external_memory_win32', 'VK_NV_win32_keyed_mutex', - 'VK_NV_glsl_shader' + 'VK_NV_glsl_shader', + 'VK_NVX_device_generated_commands' ] # Return the Vulkan release number, used for tags diff --git a/doc/specs/vulkan/insertTags.py b/doc/specs/vulkan/insertTags.py index e7a6cdc7..e9ef863b 100755 --- a/doc/specs/vulkan/insertTags.py +++ b/doc/specs/vulkan/insertTags.py @@ -96,7 +96,7 @@ def insertTags(specFile, baseDir): pageName = baseDir + '/' + os.path.basename(specFile) logDiag('Creating output file', pageName) - fp = open(pageName, 'w') + fp = open(pageName, 'w', encoding='utf-8') fp.writelines(file) fp.close() diff --git a/doc/specs/vulkan/makeAllExts b/doc/specs/vulkan/makeAllExts index cef0e00e..1ee16daa 100755 --- a/doc/specs/vulkan/makeAllExts +++ b/doc/specs/vulkan/makeAllExts @@ -42,6 +42,7 @@ for ext in \ VK_AMD_shader_trinary_minmax \ VK_EXT_debug_marker \ VK_EXT_debug_report \ + VK_EXT_validation_flags \ VK_IMG_filter_cubic \ VK_NV_dedicated_allocation \ VK_NV_external_memory \ @@ -49,7 +50,7 @@ for ext in \ VK_NV_external_memory_win32 \ VK_NV_win32_keyed_mutex \ VK_NV_glsl_shader \ - VK_EXT_validation_flags \ + VK_NVX_device_generated_commands \ ; do extensions="$extensions $ext" done diff --git a/doc/specs/vulkan/reflib.py b/doc/specs/vulkan/reflib.py index 82c719ab..ed5b82b0 100644 --- a/doc/specs/vulkan/reflib.py +++ b/doc/specs/vulkan/reflib.py @@ -41,7 +41,7 @@ def setLogFile(setDiag, setWarn, filename): elif filename == '-': fp = sys.stdout else: - fp = open(filename, 'w') + fp = open(filename, 'w', encoding='utf-8') if setDiag: diagFile = fp @@ -306,7 +306,8 @@ def fixupRefs(pageMap, specFile, file): printPageInfo(pi, file) printPageInfo(embed, file) # If an embed is found, change the error to a warning - elif pi.include >= embed.begin and pi.include <= embed.end: + elif (pi.include != None and pi.include >= embed.begin and + pi.include <= embed.end): logDiag('fixupRefs: Found embed for:', name, 'inside:', embedName, 'in', specFile, 'at line', pi.include ) diff --git a/doc/specs/vulkan/style/writing.txt b/doc/specs/vulkan/style/writing.txt index ac12e0b7..d36ffcce 100644 --- a/doc/specs/vulkan/style/writing.txt +++ b/doc/specs/vulkan/style/writing.txt @@ -128,6 +128,10 @@ of a structure as a ``color space'' value. 3+h| Exceptions | mipmap | mip map | Exception for historical reasons | swapchain | swap chain | Exception due to heavy use in WSI extensions +| happen-before + + happen-after | happen before + + happen after | As used in concurrent languages such as + C++11, Java and OpenCL C. |==== ==== Words With "Pre-" Prefixes diff --git a/doc/specs/vulkan/vkspec.txt b/doc/specs/vulkan/vkspec.txt index 18d2a073..1e70a511 100644 --- a/doc/specs/vulkan/vkspec.txt +++ b/doc/specs/vulkan/vkspec.txt @@ -106,6 +106,11 @@ include::chapters/framebuffer.txt[] // Compute include::chapters/dispatch.txt[] +// Device Generated Commands +ifdef::VK_NVX_device_generated_commands[] +include::chapters/VK_NVX_device_generated_commands/generatedcommands.txt[] +endif::VK_NVX_device_generated_commands[] + // Sparse include::chapters/sparsemem.txt[] diff --git a/src/ext_loader/vulkan_ext.c b/src/ext_loader/vulkan_ext.c index 2d99f2e0..24a71ee9 100644 --- a/src/ext_loader/vulkan_ext.c +++ b/src/ext_loader/vulkan_ext.c @@ -670,6 +670,133 @@ VkResult vkGetMemoryWin32HandleNV( #endif /* VK_USE_PLATFORM_WIN32_KHR */ #endif /* VK_NV_external_memory_win32 */ +#ifdef VK_NVX_device_generated_commands +static PFN_vkCmdProcessCommandsNVX pfn_vkCmdProcessCommandsNVX; +void vkCmdProcessCommandsNVX( + VkCommandBuffer commandBuffer, + const VkCmdProcessCommandsInfoNVX* pProcessCommandsInfo) +{ + pfn_vkCmdProcessCommandsNVX( + commandBuffer, + pProcessCommandsInfo + ); +} + +static PFN_vkCmdReserveSpaceForCommandsNVX pfn_vkCmdReserveSpaceForCommandsNVX; +void vkCmdReserveSpaceForCommandsNVX( + VkCommandBuffer commandBuffer, + const VkCmdReserveSpaceForCommandsInfoNVX* pReserveSpaceInfo) +{ + pfn_vkCmdReserveSpaceForCommandsNVX( + commandBuffer, + pReserveSpaceInfo + ); +} + +static PFN_vkCreateIndirectCommandsLayoutNVX pfn_vkCreateIndirectCommandsLayoutNVX; +VkResult vkCreateIndirectCommandsLayoutNVX( + VkDevice device, + const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkIndirectCommandsLayoutNVX* pIndirectCommandsLayout) +{ + return pfn_vkCreateIndirectCommandsLayoutNVX( + device, + pCreateInfo, + pAllocator, + pIndirectCommandsLayout + ); +} + +static PFN_vkDestroyIndirectCommandsLayoutNVX pfn_vkDestroyIndirectCommandsLayoutNVX; +void vkDestroyIndirectCommandsLayoutNVX( + VkDevice device, + VkIndirectCommandsLayoutNVX indirectCommandsLayout, + const VkAllocationCallbacks* pAllocator) +{ + pfn_vkDestroyIndirectCommandsLayoutNVX( + device, + indirectCommandsLayout, + pAllocator + ); +} + +static PFN_vkCreateObjectTableNVX pfn_vkCreateObjectTableNVX; +VkResult vkCreateObjectTableNVX( + VkDevice device, + const VkObjectTableCreateInfoNVX* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkObjectTableNVX* pObjectTable) +{ + return pfn_vkCreateObjectTableNVX( + device, + pCreateInfo, + pAllocator, + pObjectTable + ); +} + +static PFN_vkDestroyObjectTableNVX pfn_vkDestroyObjectTableNVX; +void vkDestroyObjectTableNVX( + VkDevice device, + VkObjectTableNVX objectTable, + const VkAllocationCallbacks* pAllocator) +{ + pfn_vkDestroyObjectTableNVX( + device, + objectTable, + pAllocator + ); +} + +static PFN_vkRegisterObjectsNVX pfn_vkRegisterObjectsNVX; +VkResult vkRegisterObjectsNVX( + VkDevice device, + VkObjectTableNVX objectTable, + uint32_t objectCount, + const VkObjectTableEntryNVX* const* ppObjectTableEntries, + const uint32_t* pObjectIndices) +{ + return pfn_vkRegisterObjectsNVX( + device, + objectTable, + objectCount, + ppObjectTableEntries, + pObjectIndices + ); +} + +static PFN_vkUnregisterObjectsNVX pfn_vkUnregisterObjectsNVX; +VkResult vkUnregisterObjectsNVX( + VkDevice device, + VkObjectTableNVX objectTable, + uint32_t objectCount, + const VkObjectEntryTypeNVX* pObjectEntryTypes, + const uint32_t* pObjectIndices) +{ + return pfn_vkUnregisterObjectsNVX( + device, + objectTable, + objectCount, + pObjectEntryTypes, + pObjectIndices + ); +} + +static PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX pfn_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX; +void vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX( + VkPhysicalDevice physicalDevice, + VkDeviceGeneratedCommandsFeaturesNVX* pFeatures, + VkDeviceGeneratedCommandsLimitsNVX* pLimits) +{ + pfn_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX( + physicalDevice, + pFeatures, + pLimits + ); +} + +#endif /* VK_NVX_device_generated_commands */ void vkExtInitInstance(VkInstance instance) { @@ -758,6 +885,17 @@ void vkExtInitInstance(VkInstance instance) pfn_vkGetMemoryWin32HandleNV = (PFN_vkGetMemoryWin32HandleNV)vkGetInstanceProcAddr(instance, "vkGetMemoryWin32HandleNV"); #endif /* VK_USE_PLATFORM_WIN32_KHR */ #endif /* VK_NV_external_memory_win32 */ +#ifdef VK_NVX_device_generated_commands + pfn_vkCmdProcessCommandsNVX = (PFN_vkCmdProcessCommandsNVX)vkGetInstanceProcAddr(instance, "vkCmdProcessCommandsNVX"); + pfn_vkCmdReserveSpaceForCommandsNVX = (PFN_vkCmdReserveSpaceForCommandsNVX)vkGetInstanceProcAddr(instance, "vkCmdReserveSpaceForCommandsNVX"); + pfn_vkCreateIndirectCommandsLayoutNVX = (PFN_vkCreateIndirectCommandsLayoutNVX)vkGetInstanceProcAddr(instance, "vkCreateIndirectCommandsLayoutNVX"); + pfn_vkDestroyIndirectCommandsLayoutNVX = (PFN_vkDestroyIndirectCommandsLayoutNVX)vkGetInstanceProcAddr(instance, "vkDestroyIndirectCommandsLayoutNVX"); + pfn_vkCreateObjectTableNVX = (PFN_vkCreateObjectTableNVX)vkGetInstanceProcAddr(instance, "vkCreateObjectTableNVX"); + pfn_vkDestroyObjectTableNVX = (PFN_vkDestroyObjectTableNVX)vkGetInstanceProcAddr(instance, "vkDestroyObjectTableNVX"); + pfn_vkRegisterObjectsNVX = (PFN_vkRegisterObjectsNVX)vkGetInstanceProcAddr(instance, "vkRegisterObjectsNVX"); + pfn_vkUnregisterObjectsNVX = (PFN_vkUnregisterObjectsNVX)vkGetInstanceProcAddr(instance, "vkUnregisterObjectsNVX"); + pfn_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX = (PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX"); +#endif /* VK_NVX_device_generated_commands */ } void vkExtInitDevice(VkDevice device) @@ -847,5 +985,16 @@ void vkExtInitDevice(VkDevice device) pfn_vkGetMemoryWin32HandleNV = (PFN_vkGetMemoryWin32HandleNV)vkGetDeviceProcAddr(device, "vkGetMemoryWin32HandleNV"); #endif /* VK_USE_PLATFORM_WIN32_KHR */ #endif /* VK_NV_external_memory_win32 */ +#ifdef VK_NVX_device_generated_commands + pfn_vkCmdProcessCommandsNVX = (PFN_vkCmdProcessCommandsNVX)vkGetDeviceProcAddr(device, "vkCmdProcessCommandsNVX"); + pfn_vkCmdReserveSpaceForCommandsNVX = (PFN_vkCmdReserveSpaceForCommandsNVX)vkGetDeviceProcAddr(device, "vkCmdReserveSpaceForCommandsNVX"); + pfn_vkCreateIndirectCommandsLayoutNVX = (PFN_vkCreateIndirectCommandsLayoutNVX)vkGetDeviceProcAddr(device, "vkCreateIndirectCommandsLayoutNVX"); + pfn_vkDestroyIndirectCommandsLayoutNVX = (PFN_vkDestroyIndirectCommandsLayoutNVX)vkGetDeviceProcAddr(device, "vkDestroyIndirectCommandsLayoutNVX"); + pfn_vkCreateObjectTableNVX = (PFN_vkCreateObjectTableNVX)vkGetDeviceProcAddr(device, "vkCreateObjectTableNVX"); + pfn_vkDestroyObjectTableNVX = (PFN_vkDestroyObjectTableNVX)vkGetDeviceProcAddr(device, "vkDestroyObjectTableNVX"); + pfn_vkRegisterObjectsNVX = (PFN_vkRegisterObjectsNVX)vkGetDeviceProcAddr(device, "vkRegisterObjectsNVX"); + pfn_vkUnregisterObjectsNVX = (PFN_vkUnregisterObjectsNVX)vkGetDeviceProcAddr(device, "vkUnregisterObjectsNVX"); + pfn_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX = (PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX)vkGetDeviceProcAddr(device, "vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX"); +#endif /* VK_NVX_device_generated_commands */ } diff --git a/src/spec/docgenerator.py b/src/spec/docgenerator.py index 9249898b..45dca2dc 100644 --- a/src/spec/docgenerator.py +++ b/src/spec/docgenerator.py @@ -125,7 +125,7 @@ class DocOutputGenerator(OutputGenerator): # Create file filename = directory + '/' + basename + '.txt' self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w') + fp = open(filename, 'w', encoding='utf-8') # Asciidoc anchor write('// WARNING: DO NOT MODIFY! This file is automatically generated from the vk.xml registry', file=fp) diff --git a/src/spec/extensionStubSource.py b/src/spec/extensionStubSource.py index 8553d298..2e516fb7 100644 --- a/src/spec/extensionStubSource.py +++ b/src/spec/extensionStubSource.py @@ -109,14 +109,16 @@ class ExtensionStubSourceOutputGenerator(OutputGenerator): # # Multiple inclusion protection & C++ wrappers. - # Internal state - accumulators for function pointers and function pointer initializatoin + # Internal state - accumulators for function pointers and function + # pointer initializatoin self.pointers = []; self.pointerInitializersInstance = []; self.pointerInitializersDevice = []; # # Write header protection - self.outFileHeader = open(self.genOpts.directory + '/' + 'vulkan_ext.h', 'w') + filename = self.genOpts.directory + '/' + 'vulkan_ext.h' + self.outFileHeader = open(filename, 'w', encoding='utf-8') write('#ifndef VULKAN_EXT_H', file=self.outFileHeader) write('#define VULKAN_EXT_H', file=self.outFileHeader) diff --git a/src/spec/generator.py b/src/spec/generator.py index 043121cd..3c5cc0a1 100644 --- a/src/spec/generator.py +++ b/src/spec/generator.py @@ -318,7 +318,8 @@ class OutputGenerator: # Open specified output file. Not done in constructor since a # Generator can be used without writing to a file. if (self.genOpts.filename != None): - self.outFile = open(self.genOpts.directory + '/' + self.genOpts.filename, 'w') + filename = self.genOpts.directory + '/' + self.genOpts.filename + self.outFile = open(filename, 'w', encoding='utf-8') else: self.outFile = sys.stdout def endFile(self): diff --git a/src/spec/genvk.py b/src/spec/genvk.py index 33e8055e..a03b84a8 100755 --- a/src/spec/genvk.py +++ b/src/spec/genvk.py @@ -302,16 +302,16 @@ if __name__ == '__main__': if (args.dump): write('* Dumping registry to regdump.txt', file=sys.stderr) - reg.dumpReg(filehandle = open('regdump.txt','w')) + reg.dumpReg(filehandle = open('regdump.txt', 'w', encoding='utf-8')) # create error/warning & diagnostic files if (args.errfile): - errWarn = open(args.errfile, 'w') + errWarn = open(args.errfile, 'w', encoding='utf-8') else: errWarn = sys.stderr if (args.diagfile): - diag = open(args.diagfile, 'w') + diag = open(args.diagfile, 'w', encoding='utf-8') else: diag = None diff --git a/src/spec/hostsyncgenerator.py b/src/spec/hostsyncgenerator.py index ea412d5d..7ac72484 100644 --- a/src/spec/hostsyncgenerator.py +++ b/src/spec/hostsyncgenerator.py @@ -55,7 +55,7 @@ class HostSynchronizationOutputGenerator(OutputGenerator): # Create file filename = self.genOpts.directory + '/' + 'parameters.txt' self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w') + fp = open(filename, 'w', encoding='utf-8') # Host Synchronization write('// WARNING: DO NOT MODIFY! This file is automatically generated from the vk.xml registry', file=fp) @@ -69,7 +69,7 @@ class HostSynchronizationOutputGenerator(OutputGenerator): # Create file filename = self.genOpts.directory + '/' + '/parameterlists.txt' self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w') + fp = open(filename, 'w', encoding='utf-8') # Host Synchronization write('// WARNING: DO NOT MODIFY! This file is automatically generated from the vk.xml registry', file=fp) @@ -83,7 +83,7 @@ class HostSynchronizationOutputGenerator(OutputGenerator): # Create file filename = self.genOpts.directory + '/' + '/implicit.txt' self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w') + fp = open(filename, 'w', encoding='utf-8') # Host Synchronization write('// WARNING: DO NOT MODIFY! This file is automatically generated from the vk.xml registry', file=fp) diff --git a/src/spec/realign.py b/src/spec/realign.py index 2e697217..54347e9e 100755 --- a/src/spec/realign.py +++ b/src/spec/realign.py @@ -40,11 +40,11 @@ def realignXML(fp): match = regexp[i].match(line) if (match): if (not emitted): - #print("# While processing line: " + line, end='') + #print('# While processing line: ' + line, end='') emitted = True - #print("# matched expression: " + patterns[i][0]) - #print("# clause 1 = " + match.group(1)) - #print("# clause 2 = " + match.group(2)) + #print('# matched expression: ' + patterns[i][0]) + #print('# clause 1 = ' + match.group(1)) + #print('# clause 2 = ' + match.group(2)) line = match.group(1).ljust(column[i]) + match.group(2) if (emitted): print(line) @@ -53,6 +53,6 @@ def realignXML(fp): if __name__ == '__main__': if (len(sys.argv) > 1): - realignXML(open(sys.argv[1],"r")) + realignXML(open(sys.argv[1], 'r', encoding='utf-8')) else: realignXML(sys.stdin) diff --git a/src/spec/validitygenerator.py b/src/spec/validitygenerator.py index d26695f4..9ae2fdfb 100644 --- a/src/spec/validitygenerator.py +++ b/src/spec/validitygenerator.py @@ -82,7 +82,7 @@ class ValidityOutputGenerator(OutputGenerator): filename = directory + '/' + basename + '.txt' self.logMsg('diag', '# Generating include file:', filename) - fp = open(filename, 'w') + fp = open(filename, 'w', encoding='utf-8') # Asciidoc anchor write('// WARNING: DO NOT MODIFY! This file is automatically generated from the vk.xml registry', file=fp) @@ -861,7 +861,7 @@ class ValidityOutputGenerator(OutputGenerator): def makeThreadSafetyBlock(self, cmd, paramtext): """Generate C function pointer typedef for Element""" paramdecl = '' - + # Find and add any parameters that are thread unsafe explicitexternsyncparams = cmd.findall(paramtext + "[@externsync]") if (explicitexternsyncparams is not None): @@ -888,7 +888,7 @@ class ValidityOutputGenerator(OutputGenerator): paramdecl += '* ' paramdecl += 'Host access to the sname:VkCommandPool that pname:commandBuffer was allocated from must: be externally synchronized' paramdecl += '\n' - + # Find and add any "implicit" parameters that are thread unsafe implicitexternsyncparams = cmd.find('implicitexternsyncparams') if (implicitexternsyncparams is not None): diff --git a/src/spec/vk.xml b/src/spec/vk.xml index ed30fcda..350eb965 100644 --- a/src/spec/vk.xml +++ b/src/spec/vk.xml @@ -104,7 +104,7 @@ maintained in the master branch of the Khronos Vulkan GitHub project. // Vulkan 1.0 version number #define VK_API_VERSION_1_0 VK_MAKE_VERSION(1, 0, 0) // Version of this file -#define VK_HEADER_VERSION 34 +#define VK_HEADER_VERSION 35 #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; @@ -195,6 +195,10 @@ maintained in the master branch of the Khronos Vulkan GitHub project. typedef VkFlags VkDescriptorPoolCreateFlags; typedef VkFlags VkDescriptorPoolResetFlags; typedef VkFlags VkDependencyFlags; + + typedef VkFlags VkIndirectCommandsLayoutUsageFlagsNVX; + typedef VkFlags VkObjectEntryUsageFlagsNVX; + typedef VkFlags VkCompositeAlphaFlagsKHR; typedef VkFlags VkDisplayPlaneAlphaFlagsKHR; @@ -239,6 +243,8 @@ maintained in the master branch of the Khronos Vulkan GitHub project. VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFramebuffer) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineCache) + VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX) + VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayKHR) @@ -334,6 +340,10 @@ maintained in the master branch of the Khronos Vulkan GitHub project. + + + + @@ -1444,21 +1454,21 @@ maintained in the master branch of the Khronos Vulkan GitHub project. VkStructureType sType const void* pNext VkAndroidSurfaceCreateFlagsKHR flags - ANativeWindow* window + ANativeWindow* window VkStructureType sType const void* pNext VkMirSurfaceCreateFlagsKHR flags - MirConnection* connection - MirSurface* mirSurface + MirConnection* connection + MirSurface* mirSurface VkStructureType sType const void* pNext VkWaylandSurfaceCreateFlagsKHR flags - struct wl_display* display - struct wl_surface* surface + struct wl_display* display + struct wl_surface* surface VkStructureType sType @@ -1471,14 +1481,14 @@ maintained in the master branch of the Khronos Vulkan GitHub project. VkStructureType sType const void* pNext VkXlibSurfaceCreateFlagsKHR flags - Display* dpy + Display* dpy Window window VkStructureType sType const void* pNext VkXcbSurfaceCreateFlagsKHR flags - xcb_connection_t* connection + xcb_connection_t* connection xcb_window_t window @@ -1571,33 +1581,33 @@ maintained in the master branch of the Khronos Vulkan GitHub project. VkImage image VkBuffer buffer - + VkImageFormatProperties imageFormatProperties - VkExternalMemoryFeatureFlagsNV externalMemoryFeatures - VkExternalMemoryHandleTypeFlagsNV exportFromImportedHandleTypes - VkExternalMemoryHandleTypeFlagsNV compatibleHandleTypes + VkExternalMemoryFeatureFlagsNV externalMemoryFeatures + VkExternalMemoryHandleTypeFlagsNV exportFromImportedHandleTypes + VkExternalMemoryHandleTypeFlagsNV compatibleHandleTypes VkStructureType sType const void* pNext - VkExternalMemoryHandleTypeFlagsNV handleTypes + VkExternalMemoryHandleTypeFlagsNV handleTypes VkStructureType sType const void* pNext - VkExternalMemoryHandleTypeFlagsNV handleTypes + VkExternalMemoryHandleTypeFlagsNV handleTypes VkStructureType sType const void* pNext VkExternalMemoryHandleTypeFlagsNV handleType - HANDLE handle + HANDLE handle VkStructureType sType const void* pNext - const SECURITY_ATTRIBUTES* pAttributes - DWORD dwAccess + const SECURITY_ATTRIBUTES* pAttributes + DWORD dwAccess VkStructureType sType @@ -1610,6 +1620,106 @@ maintained in the master branch of the Khronos Vulkan GitHub project. const VkDeviceMemory* pReleaseSyncs const uint64_t* pReleaseKeys + + + VkStructureType sType + const void* pNext + VkBool32 computeBindingPointSupport + + + VkStructureType sType + const void* pNext + uint32_t maxIndirectCommandsLayoutTokenCount + uint32_t maxObjectEntryCounts + uint32_t minSequenceCountBufferOffsetAlignment + uint32_t minSequenceIndexBufferOffsetAlignment + uint32_t minCommandsTokenBufferOffsetAlignment + + + VkIndirectCommandsTokenTypeNVX tokenType + VkBuffer buffer + VkDeviceSize offset + + + VkIndirectCommandsTokenTypeNVX tokenType + uint32_t bindingUnit + uint32_t dynamicCount + uint32_t divisor + + + VkStructureType sType + const void* pNext + VkPipelineBindPoint pipelineBindPoint + VkIndirectCommandsLayoutUsageFlagsNVX flags + uint32_t tokenCount + const VkIndirectCommandsLayoutTokenNVX* pTokens + + + VkStructureType sType + const void* pNext + VkObjectTableNVX objectTable + VkIndirectCommandsLayoutNVX indirectCommandsLayout + uint32_t indirectCommandsTokenCount + const VkIndirectCommandsTokenNVX* pIndirectCommandsTokens + uint32_t maxSequencesCount + VkCommandBuffer targetCommandBuffer + VkBuffer sequencesCountBuffer + VkDeviceSize sequencesCountOffset + VkBuffer sequencesIndexBuffer + VkDeviceSize sequencesIndexOffset + + + VkStructureType sType + const void* pNext + VkObjectTableNVX objectTable + VkIndirectCommandsLayoutNVX indirectCommandsLayout + uint32_t maxSequencesCount + + + VkStructureType sType + const void* pNext + uint32_t objectCount + constVkObjectEntryTypeNVX* pObjectEntryTypes + constuint32_t* pObjectEntryCounts + constVkObjectEntryUsageFlagsNVX* pObjectEntryUsageFlags + + uint32_t maxUniformBuffersPerDescriptor + uint32_t maxStorageBuffersPerDescriptor + uint32_t maxStorageImagesPerDescriptor + uint32_t maxSampledImagesPerDescriptor + uint32_t maxPipelineLayouts + + + VkObjectEntryTypeNVX type + VkObjectEntryUsageFlagsNVX flags + + + VkObjectEntryTypeNVX type + VkObjectEntryUsageFlagsNVX flags + VkPipeline pipeline + + + VkObjectEntryTypeNVX type + VkObjectEntryUsageFlagsNVX flags + VkPipelineLayout pipelineLayout + VkDescriptorSet descriptorSet + + + VkObjectEntryTypeNVX type + VkObjectEntryUsageFlagsNVX flags + VkBuffer buffer + + + VkObjectEntryTypeNVX type + VkObjectEntryUsageFlagsNVX flags + VkBuffer buffer + + + VkObjectEntryTypeNVX type + VkObjectEntryUsageFlagsNVX flags + VkPipelineLayout pipelineLayout + VkShaderStageFlags stageFlags + @@ -2434,7 +2544,33 @@ maintained in the master branch of the Khronos Vulkan GitHub project. - + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -3600,8 +3736,8 @@ maintained in the master branch of the Khronos Vulkan GitHub project. uint64_t object size_t location int32_t messageCode - const char* pLayerPrefix - const char* pMessage + const char* pLayerPrefix + const char* pMessage VkResult vkDebugMarkerSetObjectNameEXT @@ -3634,8 +3770,8 @@ maintained in the master branch of the Khronos Vulkan GitHub project. VkImageType type VkImageTiling tiling VkImageUsageFlags usage - VkImageCreateFlags flags - VkExternalMemoryHandleTypeFlagsNV externalHandleType + VkImageCreateFlags flags + VkExternalMemoryHandleTypeFlagsNV externalHandleType VkExternalImageFormatPropertiesNV* pExternalImageFormatProperties @@ -3665,6 +3801,64 @@ maintained in the master branch of the Khronos Vulkan GitHub project. uint32_t maxDrawCount uint32_t stride + + void vkCmdProcessCommandsNVX + VkCommandBuffer commandBuffer + const VkCmdProcessCommandsInfoNVX* pProcessCommandsInfo + + + void vkCmdReserveSpaceForCommandsNVX + VkCommandBuffer commandBuffer + const VkCmdReserveSpaceForCommandsInfoNVX* pReserveSpaceInfo + + + VkResult vkCreateIndirectCommandsLayoutNVX + VkDevice device + const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo + const VkAllocationCallbacks* pAllocator + VkIndirectCommandsLayoutNVX* pIndirectCommandsLayout + + + void vkDestroyIndirectCommandsLayoutNVX + VkDevice device + VkIndirectCommandsLayoutNVX indirectCommandsLayout + const VkAllocationCallbacks* pAllocator + + + VkResult vkCreateObjectTableNVX + VkDevice device + const VkObjectTableCreateInfoNVX* pCreateInfo + const VkAllocationCallbacks* pAllocator + VkObjectTableNVX* pObjectTable + + + void vkDestroyObjectTableNVX + VkDevice device + VkObjectTableNVX objectTable + const VkAllocationCallbacks* pAllocator + + + VkResult vkRegisterObjectsNVX + VkDevice device + VkObjectTableNVX objectTable + uint32_t objectCount + const VkObjectTableEntryNVX* const* ppObjectTableEntries + const uint32_t* pObjectIndices + + + VkResult vkUnregisterObjectsNVX + VkDevice device + VkObjectTableNVX objectTable + uint32_t objectCount + const VkObjectEntryTypeNVX* pObjectEntryTypes + const uint32_t* pObjectIndices + + + void vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX + VkPhysicalDevice physicalDevice + VkDeviceGeneratedCommandsFeaturesNVX* pFeatures + VkDeviceGeneratedCommandsLimitsNVX* pLimits + @@ -4547,12 +4741,50 @@ maintained in the master branch of the Khronos Vulkan GitHub project. - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -4577,5 +4809,11 @@ maintained in the master branch of the Khronos Vulkan GitHub project. + + + + + + diff --git a/src/vulkan/vulkan.h b/src/vulkan/vulkan.h index b33871eb..5549e178 100644 --- a/src/vulkan/vulkan.h +++ b/src/vulkan/vulkan.h @@ -43,7 +43,7 @@ extern "C" { #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff) #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff) // Version of this file -#define VK_HEADER_VERSION 34 +#define VK_HEADER_VERSION 35 #define VK_NULL_HANDLE 0 @@ -226,6 +226,12 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057001, VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 1000058000, VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000, + VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 1000086000, + VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 1000086001, + VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX = 1000086002, + VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX = 1000086003, + VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX = 1000086004, + VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX = 1000086005, VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), @@ -918,6 +924,7 @@ typedef enum VkPipelineStageFlagBits { VK_PIPELINE_STAGE_HOST_BIT = 0x00004000, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00008000, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00010000, + VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX = 0x00020000, VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkPipelineStageFlagBits; typedef VkFlags VkPipelineStageFlags; @@ -4154,6 +4161,232 @@ typedef struct VkValidationFlagsEXT { +#define VK_NVX_device_generated_commands 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX) + +#define VK_NVX_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 1 +#define VK_NVX_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_NVX_device_generated_commands" + + +typedef enum VkIndirectCommandsTokenTypeNVX { + VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX = 0, + VK_INDIRECT_COMMANDS_TOKEN_DESCRIPTOR_SET_NVX = 1, + VK_INDIRECT_COMMANDS_TOKEN_INDEX_BUFFER_NVX = 2, + VK_INDIRECT_COMMANDS_TOKEN_VERTEX_BUFFER_NVX = 3, + VK_INDIRECT_COMMANDS_TOKEN_PUSH_CONSTANT_NVX = 4, + VK_INDIRECT_COMMANDS_TOKEN_DRAW_INDEXED_NVX = 5, + VK_INDIRECT_COMMANDS_TOKEN_DRAW_NVX = 6, + VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX = 7, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_BEGIN_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_END_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_RANGE_SIZE_NVX = (VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX - VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX + 1), + VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF +} VkIndirectCommandsTokenTypeNVX; + +typedef enum VkObjectEntryTypeNVX { + VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX = 0, + VK_OBJECT_ENTRY_PIPELINE_NVX = 1, + VK_OBJECT_ENTRY_INDEX_BUFFER_NVX = 2, + VK_OBJECT_ENTRY_VERTEX_BUFFER_NVX = 3, + VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX = 4, + VK_OBJECT_ENTRY_TYPE_BEGIN_RANGE_NVX = VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX, + VK_OBJECT_ENTRY_TYPE_END_RANGE_NVX = VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX, + VK_OBJECT_ENTRY_TYPE_RANGE_SIZE_NVX = (VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX - VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX + 1), + VK_OBJECT_ENTRY_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF +} VkObjectEntryTypeNVX; + + +typedef enum VkIndirectCommandsLayoutUsageFlagBitsNVX { + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_NVX = 0x00000001, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_SPARSE_SEQUENCES_BIT_NVX = 0x00000002, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EMPTY_EXECUTIONS_BIT_NVX = 0x00000004, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT_NVX = 0x00000008, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF +} VkIndirectCommandsLayoutUsageFlagBitsNVX; +typedef VkFlags VkIndirectCommandsLayoutUsageFlagsNVX; + +typedef enum VkObjectEntryUsageFlagBitsNVX { + VK_OBJECT_ENTRY_USAGE_GRAPHICS_BIT_NVX = 0x00000001, + VK_OBJECT_ENTRY_USAGE_COMPUTE_BIT_NVX = 0x00000002, + VK_OBJECT_ENTRY_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF +} VkObjectEntryUsageFlagBitsNVX; +typedef VkFlags VkObjectEntryUsageFlagsNVX; + +typedef struct VkDeviceGeneratedCommandsFeaturesNVX { + VkStructureType sType; + const void* pNext; + VkBool32 computeBindingPointSupport; +} VkDeviceGeneratedCommandsFeaturesNVX; + +typedef struct VkDeviceGeneratedCommandsLimitsNVX { + VkStructureType sType; + const void* pNext; + uint32_t maxIndirectCommandsLayoutTokenCount; + uint32_t maxObjectEntryCounts; + uint32_t minSequenceCountBufferOffsetAlignment; + uint32_t minSequenceIndexBufferOffsetAlignment; + uint32_t minCommandsTokenBufferOffsetAlignment; +} VkDeviceGeneratedCommandsLimitsNVX; + +typedef struct VkIndirectCommandsTokenNVX { + VkIndirectCommandsTokenTypeNVX tokenType; + VkBuffer buffer; + VkDeviceSize offset; +} VkIndirectCommandsTokenNVX; + +typedef struct VkIndirectCommandsLayoutTokenNVX { + VkIndirectCommandsTokenTypeNVX tokenType; + uint32_t bindingUnit; + uint32_t dynamicCount; + uint32_t divisor; +} VkIndirectCommandsLayoutTokenNVX; + +typedef struct VkIndirectCommandsLayoutCreateInfoNVX { + VkStructureType sType; + const void* pNext; + VkPipelineBindPoint pipelineBindPoint; + VkIndirectCommandsLayoutUsageFlagsNVX flags; + uint32_t tokenCount; + const VkIndirectCommandsLayoutTokenNVX* pTokens; +} VkIndirectCommandsLayoutCreateInfoNVX; + +typedef struct VkCmdProcessCommandsInfoNVX { + VkStructureType sType; + const void* pNext; + VkObjectTableNVX objectTable; + VkIndirectCommandsLayoutNVX indirectCommandsLayout; + uint32_t indirectCommandsTokenCount; + const VkIndirectCommandsTokenNVX* pIndirectCommandsTokens; + uint32_t maxSequencesCount; + VkCommandBuffer targetCommandBuffer; + VkBuffer sequencesCountBuffer; + VkDeviceSize sequencesCountOffset; + VkBuffer sequencesIndexBuffer; + VkDeviceSize sequencesIndexOffset; +} VkCmdProcessCommandsInfoNVX; + +typedef struct VkCmdReserveSpaceForCommandsInfoNVX { + VkStructureType sType; + const void* pNext; + VkObjectTableNVX objectTable; + VkIndirectCommandsLayoutNVX indirectCommandsLayout; + uint32_t maxSequencesCount; +} VkCmdReserveSpaceForCommandsInfoNVX; + +typedef struct VkObjectTableCreateInfoNVX { + VkStructureType sType; + const void* pNext; + uint32_t objectCount; + constVkObjectEntryTypeNVX* pObjectEntryTypes; + constuint32_t* pObjectEntryCounts; + constVkObjectEntryUsageFlagsNVX* pObjectEntryUsageFlags; + uint32_t maxUniformBuffersPerDescriptor; + uint32_t maxStorageBuffersPerDescriptor; + uint32_t maxStorageImagesPerDescriptor; + uint32_t maxSampledImagesPerDescriptor; + uint32_t maxPipelineLayouts; +} VkObjectTableCreateInfoNVX; + +typedef struct VkObjectTableEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; +} VkObjectTableEntryNVX; + +typedef struct VkObjectTablePipelineEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkPipeline pipeline; +} VkObjectTablePipelineEntryNVX; + +typedef struct VkObjectTableDescriptorSetEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkPipelineLayout pipelineLayout; + VkDescriptorSet descriptorSet; +} VkObjectTableDescriptorSetEntryNVX; + +typedef struct VkObjectTableVertexBufferEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkBuffer buffer; +} VkObjectTableVertexBufferEntryNVX; + +typedef struct VkObjectTableIndexBufferEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkBuffer buffer; +} VkObjectTableIndexBufferEntryNVX; + +typedef struct VkObjectTablePushConstantEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkPipelineLayout pipelineLayout; + VkShaderStageFlags stageFlags; +} VkObjectTablePushConstantEntryNVX; + + +typedef void (VKAPI_PTR *PFN_vkCmdProcessCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdProcessCommandsInfoNVX* pProcessCommandsInfo); +typedef void (VKAPI_PTR *PFN_vkCmdReserveSpaceForCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdReserveSpaceForCommandsInfoNVX* pReserveSpaceInfo); +typedef VkResult (VKAPI_PTR *PFN_vkCreateIndirectCommandsLayoutNVX)(VkDevice device, const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkIndirectCommandsLayoutNVX* pIndirectCommandsLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyIndirectCommandsLayoutNVX)(VkDevice device, VkIndirectCommandsLayoutNVX indirectCommandsLayout, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateObjectTableNVX)(VkDevice device, const VkObjectTableCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkObjectTableNVX* pObjectTable); +typedef void (VKAPI_PTR *PFN_vkDestroyObjectTableNVX)(VkDevice device, VkObjectTableNVX objectTable, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkRegisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectTableEntryNVX* const* ppObjectTableEntries, const uint32_t* pObjectIndices); +typedef VkResult (VKAPI_PTR *PFN_vkUnregisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectEntryTypeNVX* pObjectEntryTypes, const uint32_t* pObjectIndices); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX)(VkPhysicalDevice physicalDevice, VkDeviceGeneratedCommandsFeaturesNVX* pFeatures, VkDeviceGeneratedCommandsLimitsNVX* pLimits); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdProcessCommandsNVX( + VkCommandBuffer commandBuffer, + const VkCmdProcessCommandsInfoNVX* pProcessCommandsInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdReserveSpaceForCommandsNVX( + VkCommandBuffer commandBuffer, + const VkCmdReserveSpaceForCommandsInfoNVX* pReserveSpaceInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutNVX( + VkDevice device, + const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkIndirectCommandsLayoutNVX* pIndirectCommandsLayout); + +VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutNVX( + VkDevice device, + VkIndirectCommandsLayoutNVX indirectCommandsLayout, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateObjectTableNVX( + VkDevice device, + const VkObjectTableCreateInfoNVX* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkObjectTableNVX* pObjectTable); + +VKAPI_ATTR void VKAPI_CALL vkDestroyObjectTableNVX( + VkDevice device, + VkObjectTableNVX objectTable, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkRegisterObjectsNVX( + VkDevice device, + VkObjectTableNVX objectTable, + uint32_t objectCount, + const VkObjectTableEntryNVX* const* ppObjectTableEntries, + const uint32_t* pObjectIndices); + +VKAPI_ATTR VkResult VKAPI_CALL vkUnregisterObjectsNVX( + VkDevice device, + VkObjectTableNVX objectTable, + uint32_t objectCount, + const VkObjectEntryTypeNVX* pObjectEntryTypes, + const uint32_t* pObjectIndices); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX( + VkPhysicalDevice physicalDevice, + VkDeviceGeneratedCommandsFeaturesNVX* pFeatures, + VkDeviceGeneratedCommandsLimitsNVX* pLimits); +#endif + #ifdef __cplusplus } #endif