Add AMD work around in Vulkan to create a new secondary command buffer
authorGreg Daniel <egdaniel@google.com>
Wed, 22 Mar 2017 19:45:43 +0000 (15:45 -0400)
committerSkia Commit-Bot <skia-commit-bot@chromium.org>
Wed, 22 Mar 2017 20:24:31 +0000 (20:24 +0000)
whenever we change the VkPipeline. All these secondary CBs are still
submitted within one render pass.

This works around the amd bug linked in the bug below. It will probably
cause a slight performance hit, so I will track it on perf and revert if
the hit is significant.

BUG=skia:6406

Change-Id: I48ff39ab36cfa96a67397f745ff65fe8b199f02b
Reviewed-on: https://skia-review.googlesource.com/9987
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
src/gpu/vk/GrVkCaps.cpp
src/gpu/vk/GrVkCaps.h
src/gpu/vk/GrVkGpu.cpp
src/gpu/vk/GrVkGpu.h
src/gpu/vk/GrVkGpuCommandBuffer.cpp
src/gpu/vk/GrVkGpuCommandBuffer.h

index d185b17..af2a41d 100644 (file)
@@ -20,6 +20,7 @@ GrVkCaps::GrVkCaps(const GrContextOptions& contextOptions, const GrVkInterface*
     fSupportsCopiesAsDraws = false;
     fMustSubmitCommandsBeforeCopyOp = false;
     fMustSleepOnTearDown  = false;
+    fNewSecondaryCBOnPipelineChange = false;
 
     /**************************************************************************
     * GrDrawTargetCaps fields
@@ -172,6 +173,12 @@ void GrVkCaps::initGrCaps(const VkPhysicalDeviceProperties& properties,
     fStencilWrapOpsSupport = true;
     fOversizedStencilSupport = true;
     fSampleShadingSupport = SkToBool(featureFlags & kSampleRateShading_GrVkFeatureFlag);
+
+    // AMD seems to have issues binding new VkPipelines inside a secondary command buffer.
+    // Current workaround is to use a different secondary command buffer for each new VkPipeline.
+    if (kAMD_VkVendor == properties.vendorID) {
+        fNewSecondaryCBOnPipelineChange = true;
+    }
 }
 
 void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties, uint32_t featureFlags) {
index e04bc16..73ed367 100644 (file)
@@ -60,26 +60,43 @@ public:
         return SkToBool(ConfigInfo::kBlitSrc_Flag & flags);
     }
 
+    // Tells of if we can pass in straight GLSL string into vkCreateShaderModule
     bool canUseGLSLForShaderModule() const {
         return fCanUseGLSLForShaderModule;
     }
 
+    // On Adreno vulkan, they do not respect the imageOffset parameter at least in
+    // copyImageToBuffer. This flag says that we must do the copy starting from the origin always.
     bool mustDoCopiesFromOrigin() const {
         return fMustDoCopiesFromOrigin;
     }
 
+    // Check whether we support using draws for copies.
     bool supportsCopiesAsDraws() const {
         return fSupportsCopiesAsDraws;
     }
 
+    // On Nvidia there is a current bug where we must the current command buffer before copy
+    // operations or else the copy will not happen. This includes copies, blits, resolves, and copy
+    // as draws.
     bool mustSubmitCommandsBeforeCopyOp() const {
         return fMustSubmitCommandsBeforeCopyOp;
     }
 
+    // Sometimes calls to QueueWaitIdle return before actually signalling the fences
+    // on the command buffers even though they have completed. This causes an assert to fire when
+    // destroying the command buffers. Therefore we add a sleep to make sure the fence signals.
     bool mustSleepOnTearDown() const {
         return fMustSleepOnTearDown;
     }
 
+    // Returns true if while adding commands to secondary command buffers, we must make a new
+    // secondary command buffer everytime we want to bind a new VkPipeline. This is to work around a
+    // driver bug specifically on AMD.
+    bool newSecondaryCBOnPipelineChange() const {
+        return fNewSecondaryCBOnPipelineChange;
+    }
+
     /**
      * Returns both a supported and most prefered stencil format to use in draws.
      */
@@ -129,26 +146,18 @@ private:
 
     StencilFormat fPreferedStencilFormat;
 
-    // Tells of if we can pass in straight GLSL string into vkCreateShaderModule
     bool fCanUseGLSLForShaderModule;
 
-    // On Adreno vulkan, they do not respect the imageOffset parameter at least in
-    // copyImageToBuffer. This flag says that we must do the copy starting from the origin always.
     bool fMustDoCopiesFromOrigin;
 
-    // Check whether we support using draws for copies.
     bool fSupportsCopiesAsDraws;
 
-    // On Nvidia there is a current bug where we must the current command buffer before copy
-    // operations or else the copy will not happen. This includes copies, blits, resolves, and copy
-    // as draws.
     bool fMustSubmitCommandsBeforeCopyOp;
 
-    // Sometimes calls to QueueWaitIdle return before actually signalling the fences
-    // on the command buffers even though they have completed. This causes an assert to fire when
-    // destroying the command buffers. Therefore we add a sleep to make sure the fence signals.
     bool fMustSleepOnTearDown;
 
+    bool fNewSecondaryCBOnPipelineChange;
+
     typedef GrCaps INHERITED;
 };
 
index 6412a55..e1ff70f 100644 (file)
@@ -1794,7 +1794,7 @@ void adjust_bounds_to_granularity(SkIRect* dstBounds, const SkIRect& srcBounds,
     }
 }
 
-void GrVkGpu::submitSecondaryCommandBuffer(GrVkSecondaryCommandBuffer* buffer,
+void GrVkGpu::submitSecondaryCommandBuffer(const SkTArray<GrVkSecondaryCommandBuffer*>& buffers,
                                            const GrVkRenderPass* renderPass,
                                            const VkClearValue* colorClear,
                                            GrVkRenderTarget* target,
@@ -1820,7 +1820,9 @@ void GrVkGpu::submitSecondaryCommandBuffer(GrVkSecondaryCommandBuffer* buffer,
     }
 
     fCurrentCmdBuffer->beginRenderPass(this, renderPass, colorClear, *target, *pBounds, true);
-    fCurrentCmdBuffer->executeCommands(this, buffer);
+    for (int i = 0; i < buffers.count(); ++i) {
+        fCurrentCmdBuffer->executeCommands(this, buffers[i]);
+    }
     fCurrentCmdBuffer->endRenderPass(this);
 
     this->didWriteToSurface(target, &bounds);
index f47f471..db77443 100644 (file)
@@ -120,7 +120,7 @@ public:
         this->internalResolveRenderTarget(target, true);
     }
 
-    void submitSecondaryCommandBuffer(GrVkSecondaryCommandBuffer*,
+    void submitSecondaryCommandBuffer(const SkTArray<GrVkSecondaryCommandBuffer*>&,
                                       const GrVkRenderPass*,
                                       const VkClearValue*,
                                       GrVkRenderTarget*,
index c4a8dae..6b2cd8e 100644 (file)
@@ -57,13 +57,14 @@ GrVkGpuCommandBuffer::GrVkGpuCommandBuffer(GrVkGpu* gpu,
                                            const LoadAndStoreInfo& stencilInfo)
     : fGpu(gpu)
     , fRenderTarget(nullptr)
-    , fClearColor(GrColor4f::FromGrColor(colorInfo.fClearColor)){
+    , fClearColor(GrColor4f::FromGrColor(colorInfo.fClearColor))
+    , fLastPipelineState(nullptr) {
 
     get_vk_load_store_ops(colorInfo, &fVkColorLoadOp, &fVkColorStoreOp);
 
     get_vk_load_store_ops(stencilInfo, &fVkStencilLoadOp, &fVkStencilStoreOp);
 
-    fCurrentCmdBuffer = -1;
+    fCurrentCmdInfo = -1;
 }
 
 void GrVkGpuCommandBuffer::init(GrVkRenderTarget* target) {
@@ -75,7 +76,7 @@ void GrVkGpuCommandBuffer::init(GrVkRenderTarget* target) {
 
     CommandBufferInfo& cbInfo = fCommandBufferInfos.push_back();
     SkASSERT(fCommandBufferInfos.count() == 1);
-    fCurrentCmdBuffer = 0;
+    fCurrentCmdInfo = 0;
 
     const GrVkResourceProvider::CompatibleRPHandle& rpHandle = target->compatibleRenderPassHandle();
     if (rpHandle.isValid()) {
@@ -97,15 +98,17 @@ void GrVkGpuCommandBuffer::init(GrVkRenderTarget* target) {
     cbInfo.fIsEmpty = true;
     cbInfo.fStartsWithClear = false;
 
-    cbInfo.fCommandBuffer = fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer();
-    cbInfo.fCommandBuffer->begin(fGpu, target->framebuffer(), cbInfo.fRenderPass);
+    cbInfo.fCommandBuffers.push_back(fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer());
+    cbInfo.currentCmdBuf()->begin(fGpu, target->framebuffer(), cbInfo.fRenderPass);
 }
 
 
 GrVkGpuCommandBuffer::~GrVkGpuCommandBuffer() {
     for (int i = 0; i < fCommandBufferInfos.count(); ++i) {
         CommandBufferInfo& cbInfo = fCommandBufferInfos[i];
-        cbInfo.fCommandBuffer->unref(fGpu);
+        for (int j = 0; j < cbInfo.fCommandBuffers.count(); ++j) {
+            cbInfo.fCommandBuffers[j]->unref(fGpu);
+        }
         cbInfo.fRenderPass->unref(fGpu);
     }
 }
@@ -114,8 +117,8 @@ GrGpu* GrVkGpuCommandBuffer::gpu() { return fGpu; }
 GrRenderTarget* GrVkGpuCommandBuffer::renderTarget() { return fRenderTarget; }
 
 void GrVkGpuCommandBuffer::end() {
-    if (fCurrentCmdBuffer >= 0) {
-        fCommandBufferInfos[fCurrentCmdBuffer].fCommandBuffer->end(fGpu);
+    if (fCurrentCmdInfo >= 0) {
+        fCommandBufferInfos[fCurrentCmdInfo].currentCmdBuf()->end(fGpu);
     }
 }
 
@@ -172,7 +175,7 @@ void GrVkGpuCommandBuffer::onSubmit() {
             SkIRect iBounds;
             cbInfo.fBounds.roundOut(&iBounds);
 
-            fGpu->submitSecondaryCommandBuffer(cbInfo.fCommandBuffer, cbInfo.fRenderPass,
+            fGpu->submitSecondaryCommandBuffer(cbInfo.fCommandBuffers, cbInfo.fRenderPass,
                                                &cbInfo.fColorClearValue, fRenderTarget, iBounds);
         }
     }
@@ -185,7 +188,7 @@ void GrVkGpuCommandBuffer::discard(GrRenderTarget* rt) {
     }
     SkASSERT(target == fRenderTarget);
 
-    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
     if (cbInfo.fIsEmpty) {
         // We will change the render pass to do a clear load instead
         GrVkRenderPass::LoadStoreOps vkColorOps(VK_ATTACHMENT_LOAD_OP_DONT_CARE,
@@ -224,7 +227,7 @@ void GrVkGpuCommandBuffer::onClearStencilClip(GrRenderTarget* rt, const GrFixedC
     }
     SkASSERT(target == fRenderTarget);
 
-    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
 
     GrStencilAttachment* sb = fRenderTarget->renderTargetPriv().getStencilAttachment();
     // this should only be called internally when we know we have a
@@ -270,7 +273,7 @@ void GrVkGpuCommandBuffer::onClearStencilClip(GrRenderTarget* rt, const GrFixedC
     attachment.colorAttachment = 0; // this value shouldn't matter
     attachment.clearValue.depthStencil = vkStencilColor;
 
-    cbInfo.fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
+    cbInfo.currentCmdBuf()->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
     cbInfo.fIsEmpty = false;
 
     // Update command buffer bounds
@@ -291,7 +294,7 @@ void GrVkGpuCommandBuffer::onClear(GrRenderTarget* rt, const GrFixedClip& clip,
     }
     SkASSERT(target == fRenderTarget);
 
-    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
 
     VkClearColorValue vkColor;
     GrColorToRGBAFloat(color, vkColor.float32);
@@ -354,7 +357,7 @@ void GrVkGpuCommandBuffer::onClear(GrRenderTarget* rt, const GrFixedClip& clip,
     attachment.colorAttachment = colorIndex;
     attachment.clearValue.color = vkColor;
 
-    cbInfo.fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
+    cbInfo.currentCmdBuf()->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
     cbInfo.fIsEmpty = false;
 
     // Update command buffer bounds
@@ -367,10 +370,17 @@ void GrVkGpuCommandBuffer::onClear(GrRenderTarget* rt, const GrFixedClip& clip,
 }
 
 void GrVkGpuCommandBuffer::addAdditionalCommandBuffer() {
-    fCommandBufferInfos[fCurrentCmdBuffer].fCommandBuffer->end(fGpu);
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
+    cbInfo.currentCmdBuf()->end(fGpu);
+    cbInfo.fCommandBuffers.push_back(fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer());
+    cbInfo.currentCmdBuf()->begin(fGpu, fRenderTarget->framebuffer(), cbInfo.fRenderPass);
+}
+
+void GrVkGpuCommandBuffer::addAdditionalRenderPass() {
+    fCommandBufferInfos[fCurrentCmdInfo].currentCmdBuf()->end(fGpu);
 
     CommandBufferInfo& cbInfo = fCommandBufferInfos.push_back();
-    fCurrentCmdBuffer++;
+    fCurrentCmdInfo++;
 
     GrVkRenderPass::LoadStoreOps vkColorOps(VK_ATTACHMENT_LOAD_OP_LOAD,
                                             VK_ATTACHMENT_STORE_OP_STORE);
@@ -389,7 +399,7 @@ void GrVkGpuCommandBuffer::addAdditionalCommandBuffer() {
                                                                      vkStencilOps);
     }
 
-    cbInfo.fCommandBuffer = fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer();
+    cbInfo.fCommandBuffers.push_back(fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer());
     // It shouldn't matter what we set the clear color to here since we will assume loading of the
     // attachment.
     memset(&cbInfo.fColorClearValue, 0, sizeof(VkClearValue));
@@ -397,7 +407,7 @@ void GrVkGpuCommandBuffer::addAdditionalCommandBuffer() {
     cbInfo.fIsEmpty = true;
     cbInfo.fStartsWithClear = false;
 
-    cbInfo.fCommandBuffer->begin(fGpu, fRenderTarget->framebuffer(), cbInfo.fRenderPass);
+    cbInfo.currentCmdBuf()->begin(fGpu, fRenderTarget->framebuffer(), cbInfo.fRenderPass);
 }
 
 void GrVkGpuCommandBuffer::inlineUpload(GrOpFlushState* state, GrDrawOp::DeferredUploadFn& upload,
@@ -406,17 +416,17 @@ void GrVkGpuCommandBuffer::inlineUpload(GrOpFlushState* state, GrDrawOp::Deferre
     if (!fRenderTarget) {
         this->init(target);
     }
-    if (!fCommandBufferInfos[fCurrentCmdBuffer].fIsEmpty) {
-        this->addAdditionalCommandBuffer();
+    if (!fCommandBufferInfos[fCurrentCmdInfo].fIsEmpty) {
+        this->addAdditionalRenderPass();
     }
-    fCommandBufferInfos[fCurrentCmdBuffer].fPreDrawUploads.emplace_back(state, upload);
+    fCommandBufferInfos[fCurrentCmdInfo].fPreDrawUploads.emplace_back(state, upload);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 
 void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc,
                                         const GrNonInstancedMesh& mesh) {
-    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
     // There is no need to put any memory barriers to make sure host writes have finished here.
     // When a command buffer is submitted to a queue, there is an implicit memory barrier that
     // occurs for all host writes. Additionally, BufferMemoryBarriers are not allowed inside of
@@ -427,7 +437,7 @@ void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc,
     SkASSERT(vbuf);
     SkASSERT(!vbuf->isMapped());
 
-    cbInfo.fCommandBuffer->bindVertexBuffer(fGpu, vbuf);
+    cbInfo.currentCmdBuf()->bindVertexBuffer(fGpu, vbuf);
 
     if (mesh.isIndexed()) {
         SkASSERT(!mesh.indexBuffer()->isCPUBacked());
@@ -435,7 +445,7 @@ void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc,
         SkASSERT(ibuf);
         SkASSERT(!ibuf->isMapped());
 
-        cbInfo.fCommandBuffer->bindIndexBuffer(fGpu, ibuf);
+        cbInfo.currentCmdBuf()->bindIndexBuffer(fGpu, ibuf);
     }
 }
 
@@ -443,7 +453,8 @@ sk_sp<GrVkPipelineState> GrVkGpuCommandBuffer::prepareDrawState(
                                                                const GrPipeline& pipeline,
                                                                const GrPrimitiveProcessor& primProc,
                                                                GrPrimitiveType primitiveType) {
-    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
+    SkASSERT(cbInfo.fRenderPass);
 
     sk_sp<GrVkPipelineState> pipelineState =
         fGpu->resourceProvider().findOrCreateCompatiblePipelineState(pipeline,
@@ -454,11 +465,18 @@ sk_sp<GrVkPipelineState> GrVkGpuCommandBuffer::prepareDrawState(
         return pipelineState;
     }
 
+    if (!cbInfo.fIsEmpty &&
+        fLastPipelineState && fLastPipelineState != pipelineState.get() &&
+        fGpu->vkCaps().newSecondaryCBOnPipelineChange()) {
+        this->addAdditionalCommandBuffer();
+    }
+    fLastPipelineState = pipelineState.get();
+
     pipelineState->setData(fGpu, primProc, pipeline);
 
-    pipelineState->bind(fGpu, cbInfo.fCommandBuffer);
+    pipelineState->bind(fGpu, cbInfo.currentCmdBuf());
 
-    GrVkPipeline::SetDynamicState(fGpu, cbInfo.fCommandBuffer, pipeline);
+    GrVkPipeline::SetDynamicState(fGpu, cbInfo.currentCmdBuf(), pipeline);
 
     return pipelineState;
 }
@@ -509,9 +527,6 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline,
     if (!meshCount) {
         return;
     }
-    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
-    SkASSERT(cbInfo.fRenderPass);
-
     prepare_sampled_images(primProc, fGpu);
     GrFragmentProcessor::Iter iter(pipeline);
     while (const GrFragmentProcessor* fp = iter.next()) {
@@ -527,6 +542,8 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline,
         return;
     }
 
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo];
+
     for (int i = 0; i < meshCount; ++i) {
         const GrMesh& mesh = meshes[i];
         GrMesh::Iterator iter;
@@ -550,14 +567,14 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline,
             this->bindGeometry(primProc, *nonIdxMesh);
 
             if (nonIdxMesh->isIndexed()) {
-                cbInfo.fCommandBuffer->drawIndexed(fGpu,
+                cbInfo.currentCmdBuf()->drawIndexed(fGpu,
                                                    nonIdxMesh->indexCount(),
                                                    1,
                                                    nonIdxMesh->startIndex(),
                                                    nonIdxMesh->startVertex(),
                                                    0);
             } else {
-                cbInfo.fCommandBuffer->draw(fGpu,
+                cbInfo.currentCmdBuf()->draw(fGpu,
                                             nonIdxMesh->vertexCount(),
                                             1,
                                             nonIdxMesh->startVertex(),
index f5f6677..519edb3 100644 (file)
@@ -63,6 +63,7 @@ private:
     void onClearStencilClip(GrRenderTarget*, const GrFixedClip&, bool insideStencilMask) override;
 
     void addAdditionalCommandBuffer();
+    void addAdditionalRenderPass();
 
     struct InlineUploadInfo {
         InlineUploadInfo(GrOpFlushState* state, const GrDrawOp::DeferredUploadFn& upload)
@@ -73,17 +74,21 @@ private:
     };
 
     struct CommandBufferInfo {
-        const GrVkRenderPass*        fRenderPass;
-        GrVkSecondaryCommandBuffer*  fCommandBuffer;
-        VkClearValue                 fColorClearValue;
-        SkRect                       fBounds;
-        bool                         fIsEmpty;
-        bool                         fStartsWithClear;
-        SkTArray<InlineUploadInfo>   fPreDrawUploads;
+        const GrVkRenderPass*                  fRenderPass;
+        SkTArray<GrVkSecondaryCommandBuffer*>  fCommandBuffers;
+        VkClearValue                           fColorClearValue;
+        SkRect                                 fBounds;
+        bool                                   fIsEmpty;
+        bool                                   fStartsWithClear;
+        SkTArray<InlineUploadInfo>             fPreDrawUploads;
+
+        GrVkSecondaryCommandBuffer* currentCmdBuf() {
+            return fCommandBuffers.back();
+        }
     };
 
     SkTArray<CommandBufferInfo> fCommandBufferInfos;
-    int                         fCurrentCmdBuffer;
+    int                         fCurrentCmdInfo;
 
     GrVkGpu*                    fGpu;
     GrVkRenderTarget*           fRenderTarget;
@@ -92,6 +97,7 @@ private:
     VkAttachmentLoadOp          fVkStencilLoadOp;
     VkAttachmentStoreOp         fVkStencilStoreOp;
     GrColor4f                   fClearColor;
+    GrVkPipelineState*          fLastPipelineState;
 
     typedef GrGpuCommandBuffer INHERITED;
 };