同步示例

此页面包含 VK_KHR_synchronization2 API 的示例 - 原始同步 API 的示例可以在这里找到:旧式同步 API

Vulkan 中的同步可能会令人困惑。它需要花费大量时间来理解,即使理解了,也很容易在小细节上出错。不过,Vulkan 同步最常见的用法可以归结为少数几个用例,此页面列出了一些示例。

请注意,示例通常以管线屏障的形式表示,但事件或子通道依赖项也可以类似地使用。

计算到计算的依赖关系

第一个调度写入存储缓冲区,第二个调度从该存储缓冲区读取。

vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR };

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

第一个调度从存储缓冲区读取,第二个调度写入该存储缓冲区。

WAR 危害不需要它们之间的可用性或可见性操作 - 执行依赖关系就足够了。 没有访问标志的管线屏障或事件是执行依赖关系。

vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR };

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

第一个调度写入存储图像,第二个调度从该存储图像读取。

vkCmdDispatch(...);

// Storage image to storage image dependencies are always in GENERAL layout; no need for a layout transition
VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

三个调度。第一个调度写入存储缓冲区,第二个调度写入同一存储缓冲区的非重叠区域,第三个调度读取两个区域。

vkCmdDispatch(...);
vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR };

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

三个调度。第一个调度写入一个存储缓冲区,第二个调度写入不同的存储缓冲区,第三个调度读取两者。

与前面的示例相同 - 全局内存屏障覆盖所有资源。通常认为执行全局内存屏障比每个资源的屏障更有效,每个资源的屏障通常应用于队列所有权转移和图像布局转换 - 否则使用全局屏障。

vkCmdDispatch(...);
vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR };

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

计算到图形的依赖关系

请注意,与图形的交互最好通过使用子通道依赖项(外部或其他)而不是管线屏障来执行,但为简洁起见,以下大多数示例仍然描述为管线屏障。

调度写入存储缓冲区。绘制将该缓冲区作为索引缓冲区使用。

vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT_KHR_KHR,
  .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR };

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Render pass setup etc.

vkCmdDraw(...);

调度写入存储缓冲区。绘制将该缓冲区作为索引缓冲区使用。进一步的计算着色器将该缓冲区作为统一缓冲区读取。

vkCmdDispatch(...);

// Batch barriers where possible if it doesn't change how synchronization takes place
VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT_KHR_KHR | VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_INDEX_READ_BIT_KHR | VK_ACCESS_2_UNIFORM_READ_BIT_KHR};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Render pass setup etc.

vkCmdDraw(...);

... // Render pass teardown etc.

vkCmdDispatch(...);

调度写入存储缓冲区。绘制将该缓冲区用作绘制间接缓冲区。

vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR };

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Render pass setup etc.

vkCmdDrawIndirect(...);

调度写入存储图像。绘制在片段着色器中采样该图像。

vkCmdDispatch(...);

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
  .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Render pass setup etc.

vkCmdDraw(...);

调度写入存储纹素缓冲区。绘制将该缓冲区用作绘制间接缓冲区,然后在片段着色器中再次用作统一缓冲区。

vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR | VK_ACCESS_2_UNIFORM_READ_BIT_KHR};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDrawIndirect(...);

图形到计算的依赖关系

绘制写入颜色附件。调度从该图像采样。

请注意,颜色附件写入不在片段着色器中,它有自己专用的管线阶段!

vkCmdDraw(...);

... // Render pass teardown etc.

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

绘制写入深度附件。调度从该图像采样。

请注意,深度附件写入不在片段着色器中,它有自己专用的管线阶段!

vkCmdDraw(...);

... // Render pass teardown etc.

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

图形到图形的依赖关系

许多图形到图形的依赖关系可以表示为渲染过程中的子通道依赖关系,这通常比管线屏障或事件更有效。在下面的示例中,如果可能,示例将以子通道依赖关系的形式表示。

第一次绘制写入深度附件。第二次绘制在片段着色器中将其作为输入附件读取。

从 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL 到 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL 的转换在执行渲染过程时自动发生。

// Set this to the index in VkRenderPassCreateInfo::pAttachments where the depth image is described.
uint32_t depthAttachmentIndex = ...;

VkSubpassDescription subpasses[2];

VkAttachmentReference depthAttachment = {
    .attachment = depthAttachmentIndex,
    .layout     = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL};

// Subpass containing first draw
subpasses[0] = {
    ...
    .pDepthStencilAttachment = &depthAttachment,
    ...};

VkAttachmentReference depthAsInputAttachment = {
    .attachment = depthAttachmentIndex,
    .layout     = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL};

// Subpass containing second draw
subpasses[1] = {
    ...
    .inputAttachmentCount = 1,
    .pInputAttachments = &depthAsInputAttachment,
    ...};

VkSubpassDependency dependency = {
    .srcSubpass = 0,
    .dstSubpass = 1,
    .srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
                    VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
    .dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
    .srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
    .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
    .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT};

// If initialLayout does not match the layout of the attachment reference in the first subpass, there will be an implicit transition before starting the render pass.
// If finalLayout does not match the layout of the attachment reference in the last subpass, there will be an implicit transition at the end.
VkAttachmentDescription depthFramebufferAttachment = {
    ...
    .initialLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
    .finalLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL};

VkRenderPassCreateInfo renderPassCreateInfo = {
    ...
    .attachmentCount = 1,
    .pAttachments = &depthFramebufferAttachment,
    .subpassCount = 2,
    .pSubpasses = subpasses,
    .dependencyCount = 1,
    .pDependencies = &dependency};

vkCreateRenderPass(...);

...

第一次绘制写入深度附件。第二次绘制在片段着色器中采样该深度图像(例如,阴影贴图渲染)。

vkCmdDraw(...);

... // First render pass teardown etc.

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Second render pass setup etc.

vkCmdDraw(...);

第一次绘制写入颜色附件。第二次绘制在片段着色器中将其作为输入附件读取。

// Set this to the index in VkRenderPassCreateInfo::pAttachments where the color image is described.
uint32_t colorAttachmentIndex = ...;

VkSubpassDescription subpasses[2];

VkAttachmentReference colorAttachment = {
    .attachment = colorAttachmentIndex,
    .layout     = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL};

// Subpass containing first draw
subpasses[0] = {
    ...
    .colorAttachmentCount = 1,
    .pColorAttachments = &colorAttachment,
    ...};

VkAttachmentReference colorAsInputAttachment = {
    .attachment = colorAttachmentIndex,
    .layout     = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL};

// Subpass containing second draw
subpasses[1] = {
    ...
    .inputAttachmentCount = 1,
    .pInputAttachments = &colorAsInputAttachment,
    ...};

VkSubpassDependency dependency = {
    .srcSubpass = 0,
    .dstSubpass = 1,
    .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
    .dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
    .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
    .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
    .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT};

// If initialLayout does not match the layout of the attachment reference in the first subpass, there will be an implicit transition before starting the render pass.
// If finalLayout does not match the layout of the attachment reference in the last subpass, there will be an implicit transition at the end.
VkAttachmentDescription colorFramebufferAttachment = {
    ...
    .initialLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
    .finalLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL};

VkRenderPassCreateInfo renderPassCreateInfo = {
    ...
    .attachmentCount = 1,
    .pAttachments = &colorFramebufferAttachment,
    .subpassCount = 2,
    .pSubpasses = subpasses,
    .dependencyCount = 1,
    .pDependencies = &dependency};

vkCreateRenderPass(...);

...

第一次绘制写入颜色附件。第二次绘制在片段着色器中从该颜色图像采样。

vkCmdDraw(...);

... // First render pass teardown etc.

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Second render pass setup etc.

vkCmdDraw(...);

第一次绘制写入颜色附件。第二次绘制在顶点着色器中从该颜色图像采样。

vkCmdDraw(...);

... // First render pass teardown etc.

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Second render pass setup etc.

vkCmdDraw(...);

第一次绘制在片段着色器中采样纹理。第二次绘制将该纹理作为颜色附件写入。

这是一个 WAR(写后读)危害,通常只需要执行依赖关系即可 - 这意味着您不需要提供任何内存屏障。 在这种情况下,您仍然需要一个内存屏障来进行布局转换,但是您不需要源访问掩码中的任何访问类型。 但是,布局转换本身被认为是写入操作,因此您确实需要目标访问掩码是正确的 - 否则在布局转换和颜色附件写入之间会出现 WAW(写后写)危害。

vkCmdDraw(...);

... // First render pass teardown etc.

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Second render pass setup etc.

vkCmdDraw(...);

第一个渲染通道写入深度附件。第二个渲染通道重用相同的深度附件。

这是 WAW(写后写)危害的一个示例,它始终需要内存依赖关系。 即使渲染通道没有读取先前通道的输出(实际上,在此示例中,先前图像内容由于从 UNDEFINED 转换而明确未保留),我们仍然需要内存依赖关系来确保对图像的写入不会被重新排序。

此外,由于我们正在使用自动布局转换(initialLayoutlayout 不同),因此重要的是要确保转换不会过早发生。 这通常需要显式指定 VK_SUBPASS_EXTERNAL 子通道依赖关系,因为默认的隐式依赖关系(其具有 srcStageMask = TOP)是不够的。(有关另一个示例,请参见“交换链图像获取和呈现”。)

此示例使用 VK_SUBPASS_EXTERNAL 子通道依赖关系来实现两个目标(解决 WAW 危害并阻止自动布局转换),但是像往常一样,也可以使用管道屏障。

// We're using the depth buffer as a depth-stencil attachment
VkAttachmentReference depthAttachment = {
    .attachment = 0,
    .layout     = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL};

VkAttachmentDescription depthFramebufferAttachment = {
    ...
    .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, // Want to clear the buffer at the start of the subpass
    .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, // No need to preserve previous image contents
    .finalLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL // When done, leave it in the layout used in the subpass (no transition at the end)
};

// Subpass using the depth-buffer
VkSubpassDescription subpass = {
    ...
    .pDepthStencilAttachment = &depthAttachment,
    ...};

// Use an incoming subpass-dependency to ensure:
// * Previous use of the depth-buffer is complete (execution dependency).
// * WAW hazard is resolved (e.g. caches are flushed and invalidated so old and new writes are not re-ordered).
// * Transition from UNDEFINED -> VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL happens-after previous `EARLY/LATE_FRAGMENT_TESTS` use.
// * Changes made to the image by the transition are accounted for by setting the appropriate dstAccessMask.
VkSubpassDependency dependency = {
    .srcSubpass = VK_SUBPASS_EXTERNAL,
    .dstSubpass = 0,
    .srcStageMask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,  // Store op is always performed in late tests, after subpass access
    .dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, // Load op is always performed in early tests, before subpass access
    .srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
    .dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT,
    .dependencyFlags = 0};

VkRenderPassCreateInfo renderPassCreateInfo = {
    ...
    .attachmentCount = 1,
    .pAttachments = &depthFramebufferAttachment,
    .subpassCount = 1,
    .pSubpasses = &subpass
    .dependencyCount = 1,
    .pDependencies = &dependency};

vkCreateRenderPass(...);

...

# First render-pass
vkCmdBeginRenderPass();
...
vkCmdEndRenderPass();

...

# Second render-pass, could be the same or a different frame
vkCmdBeginRenderPass();
...
vkCmdEndRenderPass();

传输依赖

将数据从 CPU 上传到顶点缓冲区

离散主机和设备内存

如果存在一个带有 "HOST_VISIBLE" 但没有 "DEVICE_LOCAL" 的内存类型,并且存在一个单独的带有 "DEVICE_LOCAL" 的类型,则使用以下设置路径。 UMA 系统在下一个代码块中描述,尽管此代码可以在此类系统上工作,但会产生额外的内存开销。

设置

// Data and size of that data
const uint32_t vertexDataSize = ... ;
const void* pData = ... ;

// Create a staging buffer for upload
VkBufferCreateInfo stagingCreateInfo = {
    ...
    .size = vertexDataSize,
    .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
    ... };

VkBuffer stagingBuffer;
vkCreateBuffer(device, &stagingCreateInfo, NULL, &stagingBuffer);

// Create the vertex buffer
VkBufferCreateInfo vertexCreateInfo = {
    ...
    .size = vertexDataSize,
    .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
    ... };

VkBuffer vertexBuffer;
vkCreateBuffer(device, &vertexCreateInfo, NULL, &vertexBuffer);

...

// Allocate and memory bind memory for these buffers.
// Ensure that the staging buffer uses a memory type that has
// VK_MEMORY_PROPERTY_HOST_VISIBLE property and doesn't have
// VK_MEMORY_PROPERTY_DEVICE_LOCAL.
// The vertex buffer memory should be the opposite - it should include
// VK_MEMORY_PROPERTY_DEVICE_LOCAL and should not have
// VK_MEMORY_PROPERTY_HOST_VISIBLE.
// Use the example code documented in the description of
// VkPhysicalDeviceMemoryProperties:
// https://www.khronos.org/registry/vulkan/specs/latest/man/html/VkPhysicalDeviceMemoryProperties.html

...

// Map the staging buffers - if you plan to re-use these (which you should),
// keep them mapped.
// Ideally just map the whole range at once as well.

void* stagingData;

vkMapMemory(
    ...
    stagingMemory,
    stagingMemoryOffset,
    vertexDataSize,
    0,
    &stagingData);

// Write data directly into the mapped pointer
fread(stagingData, vertexDataSize, 1, vertexFile);

// Flush the memory range
// If the memory type of stagingMemory includes VK_MEMORY_PROPERTY_HOST_COHERENT, skip this step

// Align to the VkPhysicalDeviceProperties::nonCoherentAtomSize
uint32_t alignedSize = (vertexDataSize-1) - ((vertexDataSize-1) % nonCoherentAtomSize) + nonCoherentAtomSize;

// Setup the range
VkMappedMemoryRange stagingRange = {
    ...
    .memory = stagingMemory,
    .offset = stagingMemoryOffset,
    .size   = alignedSize};

// Flush the range
vkFlushMappedMemoryRanges(device, 1, &stagingRange);

用于统一传输/图形队列的命令缓冲区记录和提交

vkBeginCommandBuffer(...);

// Submission guarantees the host write being complete, as per
// https://www.khronos.org/registry/vulkan/specs/latest/html/vkspec.html#synchronization-submission-host-writes
// So no need for a barrier before the transfer

// Copy the staging buffer contents to the vertex buffer
VkBufferCopy vertexCopyRegion = {
    .srcOffset = stagingMemoryOffset,
    .dstOffset = vertexMemoryOffset,
    .size      = vertexDataSize};

vkCmdCopyBuffer(
    commandBuffer,
    stagingBuffer,
    vertexBuffer,
    1,
    &vertexCopyRegion);


// If the graphics queue and transfer queue are the same queue
if (isUnifiedGraphicsAndTransferQueue)
{
    // If there is a semaphore signal + wait between this being submitted and
    // the vertex buffer being used, then skip this pipeline barrier.

    // Pipeline barrier before using the vertex data
    // Note that this can apply to all buffers uploaded in the same way, so
    // ideally batch all copies before this.
    VkMemoryBarrier2KHR memoryBarrier = {
        ...
        .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
        .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
        .dstStageMask = VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT_KHR,
        .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR};

    VkDependencyInfoKHR dependencyInfo = {
        ...
        1,                 // memoryBarrierCount
        &memoryBarrier,    // pMemoryBarriers
        ...
    }

    vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);


    vkEndCommandBuffer(...);

    vkQueueSubmit2KHR(unifiedQueue, ...);
}
else
{
    // Pipeline barrier to start a queue ownership transfer after the copy
    VkBufferMemoryBarrier2KHR bufferMemoryBarrier = {
        ...
        .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
        .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
        .srcQueueFamilyIndex = transferQueueFamilyIndex,
        .dstQueueFamilyIndex = graphicsQueueFamilyIndex,
        .buffer = vertexBuffer,
        ...};

    VkDependencyInfoKHR dependencyInfo = {
        ...
        1,                      // bufferMemoryBarrierCount
        &bufferMemoryBarrier,    // pBufferMemoryBarriers
        ...
    }

    vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

    vkEndCommandBuffer(...);

    // Ensure a semaphore is signalled here which will be waited on by the graphics queue.
    vkQueueSubmit2KHR(transferQueue, ...);

    // Record a command buffer for the graphics queue.
    vkBeginCommandBuffer(...);

    // Pipeline barrier before using the vertex buffer, after finalising the ownership transfer
    VkBufferMemoryBarrier2KHR bufferMemoryBarrier = {
        ...
        .dstStageMask = VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT_KHR,
        .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR,
        .srcQueueFamilyIndex = transferQueueFamilyIndex,
        .dstQueueFamilyIndex = graphicsQueueFamilyIndex,
        .buffer = vertexBuffer,
        ...};

    VkDependencyInfoKHR dependencyInfo = {
        ...
        1,                       // bufferMemoryBarrierCount
        &bufferMemoryBarrier,    // pBufferMemoryBarriers
        ...
    }

    vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

    vkEndCommandBuffer(...);

    vkQueueSubmit2KHR(graphicsQueue, ...);
}

统一内存

对于 UMA 系统,您可以使用以上设置,但是如果对于这些系统避免使用暂存缓冲区,则会使用更少的内存,如下面的设置所示。假设第一个使用它的命令在上传 *之后* 提交(而不是使用不推荐且此处未描述的 VkEvents),则无需执行任何设备端同步。

设置

// Data and size of that data
const uint32_t vertexDataSize = ... ;
const void* pData = ... ;

// Create the vertex buffer
VkBufferCreateInfo vertexCreateInfo = {
    ...
    .size = vertexDataSize,
    .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
    ... };

VkBuffer vertexBuffer;
vkCreateBuffer(device, &vertexCreateInfo, NULL, &vertexBuffer);

...

// Allocate and memory bind memory for this buffer.
// It should use a memory type that includes HOST_VISIBLE, and ideally also
// DEVICE_LOCAL if available.
// Use the example code documented in the description of
// VkPhysicalDeviceMemoryProperties:
// https://www.khronos.org/registry/vulkan/specs/latest/man/html/VkPhysicalDeviceMemoryProperties.html

...

// Map the vertex buffer

void* vertexData;

vkMapMemory(
    ...
    vertexMemory,
    vertexMemoryOffset,
    vertexDataSize,
    0,
    &vertexData);

// Write data directly into the mapped pointer
fread(vertexData, vertexDataSize, 1, vertexFile);

// Flush the memory range
// If the memory type of vertexMemory includes VK_MEMORY_PROPERTY_HOST_COHERENT, skip this step

// Align to the VkPhysicalDeviceProperties::nonCoherentAtomSize
uint32_t alignedSize = (vertexDataSize-1) - ((vertexDataSize-1) % nonCoherentAtomSize) + nonCoherentAtomSize;

// Setup the range
VkMappedMemoryRange vertexRange = {
    ...
    .memory = vertexMemory,
    .offset = vertexMemoryOffset,
    .size   = alignedSize};

// Flush the range
vkFlushMappedMemoryRanges(device, 1, &vertexRange);

// You may want to skip this if you're going to modify the
// data again
vkUnmapMemory(device, vertexMemory);

将数据从 CPU 上传到在片段着色器中采样的图像

此路径对于 UMA 和离散系统都是通用的,因为图像在上载时应转换为最佳平铺。

设置

// Data and size of that data
const uint32_t imageDataSize = ... ;

// Create a staging buffer for upload
VkBufferCreateInfo stagingCreateInfo = {
    ...
    .size = imageDataSize,
    .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
    ... };

VkBuffer stagingBuffer;
vkCreateBuffer(device, &stagingCreateInfo, NULL, &stagingBuffer);

// Create the sampled image
VkImageCreateInfo imageCreateInfo = {
    ...
    // Set the dimensions for the image as appropriate
    .tiling = VK_IMAGE_TILING_OPTIMAL,
    .usage  = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
    ... };

VkImage image;
vkCreateImage(device, &imageCreateInfo, NULL, &image);

...

// Allocate and memory bind memory for these resources.
// Ensure that the staging buffer uses a memory type that has
// VK_MEMORY_PROPERTY_HOST_VISIBLE property and doesn't have
// VK_MEMORY_PROPERTY_DEVICE_LOCAL.
// The image memory should be the opposite - it should include
// VK_MEMORY_PROPERTY_DEVICE_LOCAL and should not have
// VK_MEMORY_PROPERTY_HOST_VISIBLE.
// Use the example code documented in the description of
// VkPhysicalDeviceMemoryProperties:
// https://www.khronos.org/registry/vulkan/specs/latest/man/html/VkPhysicalDeviceMemoryProperties.html

...

// Map the staging buffers - if you plan to re-use these (which you should),
// keep them mapped.
// Ideally just map the whole range at once as well.

void* stagingData;

vkMapMemory(
    ...
    stagingMemory,
    stagingMemoryOffset,
    imageDataSize,
    0,
    &stagingData);

// Write data directly into the mapped pointer
fread(stagingData, imageDataSize, 1, imageFile);

// Flush the memory range
// If the memory type of stagingMemory includes VK_MEMORY_PROPERTY_HOST_COHERENT, skip this step

// Align to the VkPhysicalDeviceProperties::nonCoherentAtomSize
uint32_t alignedSize = (imageDataSize-1) - ((imageDataSize-1) % nonCoherentAtomSize) + nonCoherentAtomSize;

// Setup the range
VkMappedMemoryRange stagingRange = {
    ...
    .memory = stagingMemory,
    .offset = stagingMemoryOffset,
    .size   = alignedSize};

// Flush the range
vkFlushMappedMemoryRanges(device, 1, &stagingRange);

命令缓冲区记录和提交

vkBeginCommandBuffer(...);

// Submission guarantees the host write being complete, as per
// https://www.khronos.org/registry/vulkan/specs/latest/html/vkspec.html#synchronization-submission-host-writes
// So no need for a barrier before the transfer for that purpose, but one is
// required for the image layout changes.

// Pipeline barrier before the copy to perform a layout transition
VkImageMemoryBarrier2KHR preCopyMemoryBarrier = {
    ...
    .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
    .dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
    .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
    .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
    .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
    .image = image,
    .subresourceRange = ... }; // Transition as much of the image as you can at once.

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &preCopyMemoryBarrier,  // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

// Setup copies for the all regions required (should be batched into a single call where possible)
vkCmdCopyBufferToImage(
    commandBuffer,
    stagingBuffer,
    image,
    ... };

// If the graphics queue and transfer queue are the same queue
if (isUnifiedGraphicsAndTransferQueue)
{
    // Pipeline barrier before using the vertex data
    VkImageMemoryBarrier2KHR postCopyMemoryBarrier = {
        ...
        .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
        .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
        .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR,
        .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
        .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
        .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
        .image = image,
        .subresourceRange = ... }; // Transition as much of the image as you can at once.

    VkDependencyInfoKHR dependencyInfo = {
        ...
        1,                      // imageMemoryBarrierCount
        &postCopyMemoryBarrier, // pImageMemoryBarriers
        ...
    }

    vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

    vkEndCommandBuffer(...);

    vkQueueSubmit2KHR(unifiedQueue, ...);
}
else
{
    // Pipeline barrier before using the vertex data
    VkImageMemoryBarrier2KHR postCopyTransferMemoryBarrier = {
        ...
        .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
        .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
        .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
        .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
        .srcQueueFamilyIndex = transferQueueFamilyIndex,
        .dstQueueFamilyIndex = graphicsQueueFamilyIndex,
        .image = image,
        .subresourceRange = ... }; // Transition as much of the image as you can at once.

    VkDependencyInfoKHR dependencyInfo = {
        ...
        1,                              // imageMemoryBarrierCount
        &postCopyTransferMemoryBarrier, // pImageMemoryBarriers
        ...
    }

    vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

    vkEndCommandBuffer(...);

    vkQueueSubmit2KHR(transferQueue, ...);

    vkBeginCommandBuffer(...);

    // Pipeline barrier before using the vertex data
    VkImageMemoryBarrier2KHR postCopyGraphicsMemoryBarrier = {
        ...
        .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR,
        .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR,
        .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
        .newLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
        .srcQueueFamilyIndex = transferQueueFamilyIndex,
        .dstQueueFamilyIndex = graphicsQueueFamilyIndex,
        .image = image,
        .subresourceRange = ... }; // Transition as much of the image as you can at once.

    VkDependencyInfoKHR dependencyInfo = {
        ...
        1,                              // imageMemoryBarrierCount
        &postCopyGraphicsMemoryBarrier, // pImageMemoryBarriers
        ...
    }

    vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

    vkEndCommandBuffer(...);

    vkQueueSubmit2KHR(graphicsQueue, ...);
}

CPU 读回由计算着色器写入的数据

此示例显示了将计算着色器写入缓冲区的数据取回 CPU 所需的步骤。

vkCmdDispatch(...);

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_HOST_READ_BIT_KHR};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkEndCommandBuffer(...);

vkQueueSubmit2KHR(..., fence); // Submit the command buffer with a fence

GPU 需要时间进行处理,因此应与其他资源管理(例如,交换链图像)进行流水线处理

vkWaitForFences(fence);

// If the memory is host coherent, skip this step - otherwise invalidation is necessary
if (memoryIsNotHostCoherent) {
    VkMappedMemoryRange mappedMemoryRange = {
        ...
        mappedMemory,       // Mapped pointer to the VkDeviceMemory allocation backing the buffer.
        ...
    } VkMappedMemoryRange;

    vkInvalidateMappedMemoryRanges(..., 1, &mappedMemoryRange);
}

// Read values back from the mapped pointer
value = mappedMemory[...];

与信号量的交互

如果您有一个依赖关系,其中两个正在同步的命令之间具有信号量信号/等待,则可以减少或删除由管道屏障/事件/子通道依赖项完成的其他同步。 仅列出受信号量依赖项影响的参数

任何仅影响缓冲区或图像(布局不变)的依赖关系

// Nothing to see here - semaphore alone is sufficient.
// No additional synchronization required - remove those barriers.

发出信号量会等待所有阶段完成,并且所有内存访问都会自动可用。 同样,等待信号量将使所有内存访问可用,并阻止进一步的工作开始,直到发出信号。 请注意,在 QueueSubmit 的情况下,存在一组明确的阶段,以防止在 VkSubmitInfo::pWaitDstStageMask 中运行 - 对于所有其他信号量使用,所有工作的执行都被阻止。

在信号量信号之前表达的,需要在图像之间进行布局转换的依赖关系

vkCmdDispatch(...);

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .dstStageMask = VK_PIPELINE_STAGE_2_NONE_KHR
  .dstAccessMask = VK_ACCESS_2_NONE_KHR};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Semaphore signal/wait happens here

vkCmdDispatch(...);

在信号量信号之后表达的,需要在图像之间进行布局转换的依赖关系

vkCmdDispatch(...);

... // Semaphore signal/wait happens here

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_NONE_KHR
  .srcAccessMask = VK_ACCESS_2_NONE_KHR};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

vkCmdDispatch(...);

srcAccessMask 中使用的阶段**必须**等于(或逻辑上晚于)为相关信号量等待操作定义的 VkSubmitInfo::pWaitDstStageMask 中的值,否则不能保证屏障在信号量等待之后发生。 在此示例中,我们假设相关的 pWaitDstStageMask 值等于 VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR

交换链图像获取和呈现

组合的图形/呈现队列

VkAttachmentReference attachmentReference = {
    .attachment = 0,
    .layout     = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL};

// Subpass containing first draw
VkSubpassDescription subpass = {
    ...
    .colorAttachmentCount = 1,
    .pColorAttachments = &attachmentReference,
    ...};

/* Add external dependencies to ensure that the layout
   transitions happen at the right time.
   Unlike synchronization 1, we insert 2 dependencies,
   as the semaphore wait and signal operations happen
   at COLOR_ATTACHMENT_OUTPUT to reduce their scope to
   the minimum; the subpass dependencies are then both
   adjusted to match */
VkSubpassDependency dependencies[2] = {
    {
        .srcSubpass = VK_SUBPASS_EXTERNAL,
        .dstSubpass = 0,
        .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
        .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
        .srcAccessMask = VK_ACCESS_NONE_KHR,
        .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
        .dependencyFlags = 0
    },
    /* In cases where the application signals the semaphore with
       VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, or uses vkQueueSubmit,
       this second dependency could be omitted. */
    {
        .srcSubpass = 0,
        .dstSubpass = VK_SUBPASS_EXTERNAL,
        .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
        .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
        .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
        .dstAccessMask = VK_ACCESS_NONE_KHR,
        .dependencyFlags = 0
    }
};


VkAttachmentDescription attachmentDescription = {
    ...
    .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
    .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
    ...
    // The image will automatically be transitioned from UNDEFINED to COLOR_ATTACHMENT_OPTIMAL for rendering, then out to PRESENT_SRC_KHR at the end.
    .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
    // Presenting images in Vulkan requires a special layout.
    .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR};

VkRenderPassCreateInfo renderPassCreateInfo = {
    ...
    .attachmentCount = 1,
    .pAttachments    = &attachmentDescription,
    .subpassCount    = 1,
    .pSubpasses      = &subpass,
    .dependencyCount = 2,
    .pDependencies   = dependencies};

vkCreateRenderPass(...);

...

vkAcquireNextImageKHR(
    ...
    acquireCompleteSemaphore,   //semaphore
    ...
    &imageIndex);       //image index

VkSemaphoreSubmitInfoKHR acquireCompleteInfo = {
    ...
    .semaphore = acquireCompleteSemaphore,
    .stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR};

VkSemaphoreSubmitInfoKHR renderingCompleteInfo = {
    ...
    .semaphore = renderingCompleteSemaphore,
    .stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR};

VkSubmitInfo2KHR submitInfo = {
    ...
    .waitSemaphoreInfoCount     = 1,
    .pWaitSemaphoreInfos        = &acquireCompleteInfo,
    ...
    .signalSemaphoreInfoCount   = 1,
    .pSignalSemaphoreInfos      = &renderingCompleteInfo};


vkQueueSubmit2KHR(..., &submitInfo, ...);

VkPresentInfoKHR presentInfo = {
    .waitSemaphoreCount = 1,
    .pWaitSemaphores = &renderingCompleteSemaphore,
    ...};

vkQueuePresentKHR(..., &presentInfo);

多个队列

如果呈现队列与完成渲染的队列是不同的队列,则在获取和呈现时必须在两个队列之间额外执行队列所有权转移,这需要额外的同步。

渲染通道设置

VkAttachmentReference attachmentReference = {
    .attachment = 0,
    .layout     = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL};

// Subpass containing first draw
VkSubpassDescription subpass = {
    ...
    .colorAttachmentCount = 1,
    .pColorAttachments = &attachmentReference,
    ...};

VkAttachmentDescription attachmentDescription = {
    ...
    .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
    .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
    ...
    .initialLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
    .finalLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL};

/*  Due to these necessary extra synchronization points, it makes more sense
    to omit the sub pass external dependencies (which can't express a queue
    transfer), and batch the relevant operations with the new pipeline
    barriers we're introducing. */

VkRenderPassCreateInfo renderPassCreateInfo = {
    ...
    .attachmentCount = 1,
    .pAttachments    = &attachmentDescription,
    .subpassCount    = 1,
    .pSubpasses      = &subpass,
    .dependencyCount = 0,
    .pDependencies   = NULL};

vkCreateRenderPass(...);

渲染命令缓冲区 - 图形队列

/* Queue ownership transfer is only required when we need the content to remain valid across queues.
   Since we are transitioning from UNDEFINED -- and therefore discarding the image contents to begin with --
   we are not required to perform an ownership transfer from the presentation queue to graphics.

   This transition could also be made as an EXTERNAL -> subpass #0 render pass dependency as shown earlier. */

VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  .newLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

... // Render pass submission.

// Queue release operation. dstAccessMask should always be 0.
VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR,
  .oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
  .srcQueueFamilyIndex = graphicsQueueFamilyIndex, // index of the graphics queue family
  .dstQueueFamilyIndex = presentQueueFamilyIndex,  // index of the present queue family
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

预呈现命令 - 呈现队列

// After submitting the render pass...
VkImageMemoryBarrier2KHR imageMemoryBarrier = {
  ...
  // A layout transition which happens as part of an ownership transfer needs to be specified twice; one for the release, and one for the acquire.
  // No srcStage/AccessMask is needed, waiting for a semaphore does that automatically.
  // No dstStage/AccessMask is needed, signalling a semaphore does that automatically.
  .oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
  .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
  .srcQueueFamilyIndex = graphicsQueueFamilyIndex, // index of the graphics queue family
  .dstQueueFamilyIndex = presentQueueFamilyIndex,  // index of the present queue family
  /* .image and .subresourceRange should identify image subresource accessed */};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,                      // imageMemoryBarrierCount
    &imageMemoryBarrier,    // pImageMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);

队列提交

vkAcquireNextImageKHR(
    ...
    acquireCompleteSemaphore,   //semaphore
    ...
    &imageIndex);       //image index

VkSemaphoreSubmitInfoKHR acquireCompleteInfo = {
    ...
    .semaphore = acquireCompleteSemaphore,
    .stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR};

VkSemaphoreSubmitInfoKHR renderingCompleteInfo = {
    ...
    .semaphore = renderingCompleteSemaphore,
    .stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR};

VkCommandBufferSubmitInfoKHR renderingCommandBufferInfo = {
    ...
    .commandBuffer = renderingCommandBuffer;
};

VkSubmitInfo2KHR renderingSubmitInfo = {
    ...
    .waitSemaphoreInfoCount     = 1,
    .pWaitSemaphoreInfos        = &acquireCompleteInfo,
    .commandBufferInfoCount     = 1,
    .pCommandBufferInfos        = &renderingCommandBufferInfo,
    .signalSemaphoreInfoCount   = 1,
    .pSignalSemaphoreInfos      = &renderingCompleteSemaphore};

vkQueueSubmit2KHR(renderQueue, &renderingSubmitInfo, ...);

VkSemaphoreSubmitInfoKHR prePresentWaitInfo = {
    ...
    .semaphore = renderingCompleteSemaphore,
    .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR};

VkSemaphoreSubmitInfoKHR prePresentCompleteInfo = {
    ...
    .semaphore = prePresentCompleteSemaphore,
    .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR};

VkCommandBufferSubmitInfoKHR prePresentCommandBufferInfo = {
    ...
    .commandBuffer = prePresentCommandBuffer;
};

VkSubmitInfo2KHR prePresentSubmitInfo = {
    ...
    .waitSemaphoreInfoCount     = 1,
    .pWaitSemaphoreInfos        = &prePresentWaitInfo,
    .commandBufferInfoCount     = 1,
    .pCommandBufferInfos        = &prePresentCommandBufferInfo,
    .signalSemaphoreInfoCount   = 1,
    .pSignalSemaphoreInfos      = &prePresentCompleteInfo};

vkQueueSubmit2KHR(presentQueue, &prePresentSubmitInfo, ...);

VkPresentInfoKHR presentInfo = {
    .waitSemaphoreCount = 1,
    .pWaitSemaphores = &prePresentCompleteSemaphore,
    ...};

vkQueuePresentKHR(..., &presentInfo);

完整管道屏障

您应该**仅将此用于调试** - 这不是应该在实际代码中发布的内容,它将刷新并使所有缓存失效并使所有内容停止,这是一个不应轻易使用的工具!

也就是说,如果您认为应用程序中存在竞争条件,并且只想序列化所有内容以便进行调试,它会*非常*方便。

请注意,这不能处理图像布局 - 如果您正在调试,则可以将所有图像的布局设置为 GENERAL 来解决此问题,但再次强调 - 不要在发布代码中执行此操作!

VkMemoryBarrier2KHR memoryBarrier = {
  ...
  .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
  .srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR |
                   VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
  .dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
  .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR |
                   VK_ACCESS_2_MEMORY_WRITE_BIT_KHR};

VkDependencyInfoKHR dependencyInfo = {
    ...
    1,              // memoryBarrierCount
    &memoryBarrier, // pMemoryBarriers
    ...
}

vkCmdPipelineBarrier2KHR(commandBuffer, &dependencyInfo);