From 22bc8653d704584e13f35844dafb5ddeb9989127 Mon Sep 17 00:00:00 2001 From: Greg Daniel Date: Wed, 22 Mar 2017 15:45:43 -0400 Subject: [PATCH] Add AMD work around in Vulkan to create a new secondary command buffer whenever we change the VkPipeline. All these secondary CBs are still submitted within one render pass. This works around the amd bug linked in the bug below. It will probably cause a slight performance hit, so I will track it on perf and revert if the hit is significant. BUG=skia:6406 Change-Id: I48ff39ab36cfa96a67397f745ff65fe8b199f02b Reviewed-on: https://skia-review.googlesource.com/9987 Commit-Queue: Greg Daniel Reviewed-by: Brian Salomon --- src/gpu/vk/GrVkCaps.cpp | 7 ++++ src/gpu/vk/GrVkCaps.h | 29 ++++++++----- src/gpu/vk/GrVkGpu.cpp | 6 ++- src/gpu/vk/GrVkGpu.h | 2 +- src/gpu/vk/GrVkGpuCommandBuffer.cpp | 81 ++++++++++++++++++++++--------------- src/gpu/vk/GrVkGpuCommandBuffer.h | 22 ++++++---- 6 files changed, 94 insertions(+), 53 deletions(-) diff --git a/src/gpu/vk/GrVkCaps.cpp b/src/gpu/vk/GrVkCaps.cpp index d185b17..af2a41d 100644 --- a/src/gpu/vk/GrVkCaps.cpp +++ b/src/gpu/vk/GrVkCaps.cpp @@ -20,6 +20,7 @@ GrVkCaps::GrVkCaps(const GrContextOptions& contextOptions, const GrVkInterface* fSupportsCopiesAsDraws = false; fMustSubmitCommandsBeforeCopyOp = false; fMustSleepOnTearDown = false; + fNewSecondaryCBOnPipelineChange = false; /************************************************************************** * GrDrawTargetCaps fields @@ -172,6 +173,12 @@ void GrVkCaps::initGrCaps(const VkPhysicalDeviceProperties& properties, fStencilWrapOpsSupport = true; fOversizedStencilSupport = true; fSampleShadingSupport = SkToBool(featureFlags & kSampleRateShading_GrVkFeatureFlag); + + // AMD seems to have issues binding new VkPipelines inside a secondary command buffer. + // Current workaround is to use a different secondary command buffer for each new VkPipeline. + if (kAMD_VkVendor == properties.vendorID) { + fNewSecondaryCBOnPipelineChange = true; + } } void GrVkCaps::initShaderCaps(const VkPhysicalDeviceProperties& properties, uint32_t featureFlags) { diff --git a/src/gpu/vk/GrVkCaps.h b/src/gpu/vk/GrVkCaps.h index e04bc16..73ed367 100644 --- a/src/gpu/vk/GrVkCaps.h +++ b/src/gpu/vk/GrVkCaps.h @@ -60,26 +60,43 @@ public: return SkToBool(ConfigInfo::kBlitSrc_Flag & flags); } + // Tells of if we can pass in straight GLSL string into vkCreateShaderModule bool canUseGLSLForShaderModule() const { return fCanUseGLSLForShaderModule; } + // On Adreno vulkan, they do not respect the imageOffset parameter at least in + // copyImageToBuffer. This flag says that we must do the copy starting from the origin always. bool mustDoCopiesFromOrigin() const { return fMustDoCopiesFromOrigin; } + // Check whether we support using draws for copies. bool supportsCopiesAsDraws() const { return fSupportsCopiesAsDraws; } + // On Nvidia there is a current bug where we must the current command buffer before copy + // operations or else the copy will not happen. This includes copies, blits, resolves, and copy + // as draws. bool mustSubmitCommandsBeforeCopyOp() const { return fMustSubmitCommandsBeforeCopyOp; } + // Sometimes calls to QueueWaitIdle return before actually signalling the fences + // on the command buffers even though they have completed. This causes an assert to fire when + // destroying the command buffers. Therefore we add a sleep to make sure the fence signals. bool mustSleepOnTearDown() const { return fMustSleepOnTearDown; } + // Returns true if while adding commands to secondary command buffers, we must make a new + // secondary command buffer everytime we want to bind a new VkPipeline. This is to work around a + // driver bug specifically on AMD. + bool newSecondaryCBOnPipelineChange() const { + return fNewSecondaryCBOnPipelineChange; + } + /** * Returns both a supported and most prefered stencil format to use in draws. */ @@ -129,26 +146,18 @@ private: StencilFormat fPreferedStencilFormat; - // Tells of if we can pass in straight GLSL string into vkCreateShaderModule bool fCanUseGLSLForShaderModule; - // On Adreno vulkan, they do not respect the imageOffset parameter at least in - // copyImageToBuffer. This flag says that we must do the copy starting from the origin always. bool fMustDoCopiesFromOrigin; - // Check whether we support using draws for copies. bool fSupportsCopiesAsDraws; - // On Nvidia there is a current bug where we must the current command buffer before copy - // operations or else the copy will not happen. This includes copies, blits, resolves, and copy - // as draws. bool fMustSubmitCommandsBeforeCopyOp; - // Sometimes calls to QueueWaitIdle return before actually signalling the fences - // on the command buffers even though they have completed. This causes an assert to fire when - // destroying the command buffers. Therefore we add a sleep to make sure the fence signals. bool fMustSleepOnTearDown; + bool fNewSecondaryCBOnPipelineChange; + typedef GrCaps INHERITED; }; diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 6412a55..e1ff70f 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -1794,7 +1794,7 @@ void adjust_bounds_to_granularity(SkIRect* dstBounds, const SkIRect& srcBounds, } } -void GrVkGpu::submitSecondaryCommandBuffer(GrVkSecondaryCommandBuffer* buffer, +void GrVkGpu::submitSecondaryCommandBuffer(const SkTArray& buffers, const GrVkRenderPass* renderPass, const VkClearValue* colorClear, GrVkRenderTarget* target, @@ -1820,7 +1820,9 @@ void GrVkGpu::submitSecondaryCommandBuffer(GrVkSecondaryCommandBuffer* buffer, } fCurrentCmdBuffer->beginRenderPass(this, renderPass, colorClear, *target, *pBounds, true); - fCurrentCmdBuffer->executeCommands(this, buffer); + for (int i = 0; i < buffers.count(); ++i) { + fCurrentCmdBuffer->executeCommands(this, buffers[i]); + } fCurrentCmdBuffer->endRenderPass(this); this->didWriteToSurface(target, &bounds); diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h index f47f471..db77443 100644 --- a/src/gpu/vk/GrVkGpu.h +++ b/src/gpu/vk/GrVkGpu.h @@ -120,7 +120,7 @@ public: this->internalResolveRenderTarget(target, true); } - void submitSecondaryCommandBuffer(GrVkSecondaryCommandBuffer*, + void submitSecondaryCommandBuffer(const SkTArray&, const GrVkRenderPass*, const VkClearValue*, GrVkRenderTarget*, diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp index c4a8dae..6b2cd8e 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp +++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp @@ -57,13 +57,14 @@ GrVkGpuCommandBuffer::GrVkGpuCommandBuffer(GrVkGpu* gpu, const LoadAndStoreInfo& stencilInfo) : fGpu(gpu) , fRenderTarget(nullptr) - , fClearColor(GrColor4f::FromGrColor(colorInfo.fClearColor)){ + , fClearColor(GrColor4f::FromGrColor(colorInfo.fClearColor)) + , fLastPipelineState(nullptr) { get_vk_load_store_ops(colorInfo, &fVkColorLoadOp, &fVkColorStoreOp); get_vk_load_store_ops(stencilInfo, &fVkStencilLoadOp, &fVkStencilStoreOp); - fCurrentCmdBuffer = -1; + fCurrentCmdInfo = -1; } void GrVkGpuCommandBuffer::init(GrVkRenderTarget* target) { @@ -75,7 +76,7 @@ void GrVkGpuCommandBuffer::init(GrVkRenderTarget* target) { CommandBufferInfo& cbInfo = fCommandBufferInfos.push_back(); SkASSERT(fCommandBufferInfos.count() == 1); - fCurrentCmdBuffer = 0; + fCurrentCmdInfo = 0; const GrVkResourceProvider::CompatibleRPHandle& rpHandle = target->compatibleRenderPassHandle(); if (rpHandle.isValid()) { @@ -97,15 +98,17 @@ void GrVkGpuCommandBuffer::init(GrVkRenderTarget* target) { cbInfo.fIsEmpty = true; cbInfo.fStartsWithClear = false; - cbInfo.fCommandBuffer = fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer(); - cbInfo.fCommandBuffer->begin(fGpu, target->framebuffer(), cbInfo.fRenderPass); + cbInfo.fCommandBuffers.push_back(fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer()); + cbInfo.currentCmdBuf()->begin(fGpu, target->framebuffer(), cbInfo.fRenderPass); } GrVkGpuCommandBuffer::~GrVkGpuCommandBuffer() { for (int i = 0; i < fCommandBufferInfos.count(); ++i) { CommandBufferInfo& cbInfo = fCommandBufferInfos[i]; - cbInfo.fCommandBuffer->unref(fGpu); + for (int j = 0; j < cbInfo.fCommandBuffers.count(); ++j) { + cbInfo.fCommandBuffers[j]->unref(fGpu); + } cbInfo.fRenderPass->unref(fGpu); } } @@ -114,8 +117,8 @@ GrGpu* GrVkGpuCommandBuffer::gpu() { return fGpu; } GrRenderTarget* GrVkGpuCommandBuffer::renderTarget() { return fRenderTarget; } void GrVkGpuCommandBuffer::end() { - if (fCurrentCmdBuffer >= 0) { - fCommandBufferInfos[fCurrentCmdBuffer].fCommandBuffer->end(fGpu); + if (fCurrentCmdInfo >= 0) { + fCommandBufferInfos[fCurrentCmdInfo].currentCmdBuf()->end(fGpu); } } @@ -172,7 +175,7 @@ void GrVkGpuCommandBuffer::onSubmit() { SkIRect iBounds; cbInfo.fBounds.roundOut(&iBounds); - fGpu->submitSecondaryCommandBuffer(cbInfo.fCommandBuffer, cbInfo.fRenderPass, + fGpu->submitSecondaryCommandBuffer(cbInfo.fCommandBuffers, cbInfo.fRenderPass, &cbInfo.fColorClearValue, fRenderTarget, iBounds); } } @@ -185,7 +188,7 @@ void GrVkGpuCommandBuffer::discard(GrRenderTarget* rt) { } SkASSERT(target == fRenderTarget); - CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer]; + CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; if (cbInfo.fIsEmpty) { // We will change the render pass to do a clear load instead GrVkRenderPass::LoadStoreOps vkColorOps(VK_ATTACHMENT_LOAD_OP_DONT_CARE, @@ -224,7 +227,7 @@ void GrVkGpuCommandBuffer::onClearStencilClip(GrRenderTarget* rt, const GrFixedC } SkASSERT(target == fRenderTarget); - CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer]; + CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; GrStencilAttachment* sb = fRenderTarget->renderTargetPriv().getStencilAttachment(); // this should only be called internally when we know we have a @@ -270,7 +273,7 @@ void GrVkGpuCommandBuffer::onClearStencilClip(GrRenderTarget* rt, const GrFixedC attachment.colorAttachment = 0; // this value shouldn't matter attachment.clearValue.depthStencil = vkStencilColor; - cbInfo.fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect); + cbInfo.currentCmdBuf()->clearAttachments(fGpu, 1, &attachment, 1, &clearRect); cbInfo.fIsEmpty = false; // Update command buffer bounds @@ -291,7 +294,7 @@ void GrVkGpuCommandBuffer::onClear(GrRenderTarget* rt, const GrFixedClip& clip, } SkASSERT(target == fRenderTarget); - CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer]; + CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; VkClearColorValue vkColor; GrColorToRGBAFloat(color, vkColor.float32); @@ -354,7 +357,7 @@ void GrVkGpuCommandBuffer::onClear(GrRenderTarget* rt, const GrFixedClip& clip, attachment.colorAttachment = colorIndex; attachment.clearValue.color = vkColor; - cbInfo.fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect); + cbInfo.currentCmdBuf()->clearAttachments(fGpu, 1, &attachment, 1, &clearRect); cbInfo.fIsEmpty = false; // Update command buffer bounds @@ -367,10 +370,17 @@ void GrVkGpuCommandBuffer::onClear(GrRenderTarget* rt, const GrFixedClip& clip, } void GrVkGpuCommandBuffer::addAdditionalCommandBuffer() { - fCommandBufferInfos[fCurrentCmdBuffer].fCommandBuffer->end(fGpu); + CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; + cbInfo.currentCmdBuf()->end(fGpu); + cbInfo.fCommandBuffers.push_back(fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer()); + cbInfo.currentCmdBuf()->begin(fGpu, fRenderTarget->framebuffer(), cbInfo.fRenderPass); +} + +void GrVkGpuCommandBuffer::addAdditionalRenderPass() { + fCommandBufferInfos[fCurrentCmdInfo].currentCmdBuf()->end(fGpu); CommandBufferInfo& cbInfo = fCommandBufferInfos.push_back(); - fCurrentCmdBuffer++; + fCurrentCmdInfo++; GrVkRenderPass::LoadStoreOps vkColorOps(VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE); @@ -389,7 +399,7 @@ void GrVkGpuCommandBuffer::addAdditionalCommandBuffer() { vkStencilOps); } - cbInfo.fCommandBuffer = fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer(); + cbInfo.fCommandBuffers.push_back(fGpu->resourceProvider().findOrCreateSecondaryCommandBuffer()); // It shouldn't matter what we set the clear color to here since we will assume loading of the // attachment. memset(&cbInfo.fColorClearValue, 0, sizeof(VkClearValue)); @@ -397,7 +407,7 @@ void GrVkGpuCommandBuffer::addAdditionalCommandBuffer() { cbInfo.fIsEmpty = true; cbInfo.fStartsWithClear = false; - cbInfo.fCommandBuffer->begin(fGpu, fRenderTarget->framebuffer(), cbInfo.fRenderPass); + cbInfo.currentCmdBuf()->begin(fGpu, fRenderTarget->framebuffer(), cbInfo.fRenderPass); } void GrVkGpuCommandBuffer::inlineUpload(GrOpFlushState* state, GrDrawOp::DeferredUploadFn& upload, @@ -406,17 +416,17 @@ void GrVkGpuCommandBuffer::inlineUpload(GrOpFlushState* state, GrDrawOp::Deferre if (!fRenderTarget) { this->init(target); } - if (!fCommandBufferInfos[fCurrentCmdBuffer].fIsEmpty) { - this->addAdditionalCommandBuffer(); + if (!fCommandBufferInfos[fCurrentCmdInfo].fIsEmpty) { + this->addAdditionalRenderPass(); } - fCommandBufferInfos[fCurrentCmdBuffer].fPreDrawUploads.emplace_back(state, upload); + fCommandBufferInfos[fCurrentCmdInfo].fPreDrawUploads.emplace_back(state, upload); } //////////////////////////////////////////////////////////////////////////////// void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc, const GrNonInstancedMesh& mesh) { - CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer]; + CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; // There is no need to put any memory barriers to make sure host writes have finished here. // When a command buffer is submitted to a queue, there is an implicit memory barrier that // occurs for all host writes. Additionally, BufferMemoryBarriers are not allowed inside of @@ -427,7 +437,7 @@ void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc, SkASSERT(vbuf); SkASSERT(!vbuf->isMapped()); - cbInfo.fCommandBuffer->bindVertexBuffer(fGpu, vbuf); + cbInfo.currentCmdBuf()->bindVertexBuffer(fGpu, vbuf); if (mesh.isIndexed()) { SkASSERT(!mesh.indexBuffer()->isCPUBacked()); @@ -435,7 +445,7 @@ void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc, SkASSERT(ibuf); SkASSERT(!ibuf->isMapped()); - cbInfo.fCommandBuffer->bindIndexBuffer(fGpu, ibuf); + cbInfo.currentCmdBuf()->bindIndexBuffer(fGpu, ibuf); } } @@ -443,7 +453,8 @@ sk_sp GrVkGpuCommandBuffer::prepareDrawState( const GrPipeline& pipeline, const GrPrimitiveProcessor& primProc, GrPrimitiveType primitiveType) { - CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer]; + CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; + SkASSERT(cbInfo.fRenderPass); sk_sp pipelineState = fGpu->resourceProvider().findOrCreateCompatiblePipelineState(pipeline, @@ -454,11 +465,18 @@ sk_sp GrVkGpuCommandBuffer::prepareDrawState( return pipelineState; } + if (!cbInfo.fIsEmpty && + fLastPipelineState && fLastPipelineState != pipelineState.get() && + fGpu->vkCaps().newSecondaryCBOnPipelineChange()) { + this->addAdditionalCommandBuffer(); + } + fLastPipelineState = pipelineState.get(); + pipelineState->setData(fGpu, primProc, pipeline); - pipelineState->bind(fGpu, cbInfo.fCommandBuffer); + pipelineState->bind(fGpu, cbInfo.currentCmdBuf()); - GrVkPipeline::SetDynamicState(fGpu, cbInfo.fCommandBuffer, pipeline); + GrVkPipeline::SetDynamicState(fGpu, cbInfo.currentCmdBuf(), pipeline); return pipelineState; } @@ -509,9 +527,6 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline, if (!meshCount) { return; } - CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer]; - SkASSERT(cbInfo.fRenderPass); - prepare_sampled_images(primProc, fGpu); GrFragmentProcessor::Iter iter(pipeline); while (const GrFragmentProcessor* fp = iter.next()) { @@ -527,6 +542,8 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline, return; } + CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdInfo]; + for (int i = 0; i < meshCount; ++i) { const GrMesh& mesh = meshes[i]; GrMesh::Iterator iter; @@ -550,14 +567,14 @@ void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline, this->bindGeometry(primProc, *nonIdxMesh); if (nonIdxMesh->isIndexed()) { - cbInfo.fCommandBuffer->drawIndexed(fGpu, + cbInfo.currentCmdBuf()->drawIndexed(fGpu, nonIdxMesh->indexCount(), 1, nonIdxMesh->startIndex(), nonIdxMesh->startVertex(), 0); } else { - cbInfo.fCommandBuffer->draw(fGpu, + cbInfo.currentCmdBuf()->draw(fGpu, nonIdxMesh->vertexCount(), 1, nonIdxMesh->startVertex(), diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h index f5f6677..519edb3 100644 --- a/src/gpu/vk/GrVkGpuCommandBuffer.h +++ b/src/gpu/vk/GrVkGpuCommandBuffer.h @@ -63,6 +63,7 @@ private: void onClearStencilClip(GrRenderTarget*, const GrFixedClip&, bool insideStencilMask) override; void addAdditionalCommandBuffer(); + void addAdditionalRenderPass(); struct InlineUploadInfo { InlineUploadInfo(GrOpFlushState* state, const GrDrawOp::DeferredUploadFn& upload) @@ -73,17 +74,21 @@ private: }; struct CommandBufferInfo { - const GrVkRenderPass* fRenderPass; - GrVkSecondaryCommandBuffer* fCommandBuffer; - VkClearValue fColorClearValue; - SkRect fBounds; - bool fIsEmpty; - bool fStartsWithClear; - SkTArray fPreDrawUploads; + const GrVkRenderPass* fRenderPass; + SkTArray fCommandBuffers; + VkClearValue fColorClearValue; + SkRect fBounds; + bool fIsEmpty; + bool fStartsWithClear; + SkTArray fPreDrawUploads; + + GrVkSecondaryCommandBuffer* currentCmdBuf() { + return fCommandBuffers.back(); + } }; SkTArray fCommandBufferInfos; - int fCurrentCmdBuffer; + int fCurrentCmdInfo; GrVkGpu* fGpu; GrVkRenderTarget* fRenderTarget; @@ -92,6 +97,7 @@ private: VkAttachmentLoadOp fVkStencilLoadOp; VkAttachmentStoreOp fVkStencilStoreOp; GrColor4f fClearColor; + GrVkPipelineState* fLastPipelineState; typedef GrGpuCommandBuffer INHERITED; }; -- 2.7.4