Add fence support for TransferBuffers
authorjvanverth <jvanverth@google.com>
Fri, 30 Sep 2016 15:39:02 +0000 (08:39 -0700)
committerCommit bot <commit-bot@chromium.org>
Fri, 30 Sep 2016 15:39:03 +0000 (08:39 -0700)
BUG=skia:4604
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2384463003

Review-Url: https://codereview.chromium.org/2384463003

20 files changed:
include/gpu/GrCaps.h
include/gpu/GrTypesPriv.h
include/gpu/gl/GrGLFunctions.h
include/gpu/gl/GrGLInterface.h
include/gpu/gl/GrGLTypes.h
src/gpu/GrCaps.cpp
src/gpu/GrGpu.cpp
src/gpu/GrGpu.h
src/gpu/gl/GrGLAssembleInterface.cpp
src/gpu/gl/GrGLCaps.cpp
src/gpu/gl/GrGLDefines.h
src/gpu/gl/GrGLGpu.cpp
src/gpu/gl/GrGLGpu.h
src/gpu/gl/GrGLInterface.cpp
src/gpu/gl/GrGLTestInterface.cpp
src/gpu/gl/GrGLTestInterface.h
src/gpu/vk/GrVkCaps.cpp
src/gpu/vk/GrVkGpu.cpp
src/gpu/vk/GrVkGpu.h
tools/gpu/GrTest.cpp

index b7c91c4..a97be72 100644 (file)
@@ -286,6 +286,8 @@ public:
 
     bool sampleShadingSupport() const { return fSampleShadingSupport; }
 
+    bool fenceSyncSupport() const { return fFenceSyncSupport; }
+
 protected:
     /** Subclasses must call this at the end of their constructors in order to apply caps
         overrides requested by the client. Note that overrides will only reduce the caps never
@@ -324,6 +326,8 @@ protected:
     bool fPreferVRAMUseOverFlushes                   : 1;
 
     bool fSampleShadingSupport                       : 1;
+    // TODO: this may need to be an enum to support different fence types
+    bool fFenceSyncSupport                           : 1;
 
     InstancedSupport fInstancedSupport;
 
index d60fab1..636e72a 100644 (file)
@@ -482,4 +482,9 @@ template <typename T> T * const * sk_sp_address_as_pointer_address(sk_sp<T> cons
     return reinterpret_cast<T * const *>(sp);
 }
 
+/*
+ * Object for CPU-GPU synchronization
+ */
+typedef intptr_t GrFence;
+
 #endif
index 7e720cd..eccd1bf 100644 (file)
@@ -345,6 +345,11 @@ typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLFlushMappedNamedBufferRangeProc)(GrGL
 // OpenGL 3.1
 typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLTextureBufferProc)(GrGLuint texture, GrGLenum target, GrGLenum internalformat, GrGLuint buffer);
 
+/* ARB_sync */
+typedef GrGLsync (GR_GL_FUNCTION_TYPE* GrGLFenceSyncProc)(GrGLenum condition, GrGLbitfield flags);
+typedef GrGLenum (GR_GL_FUNCTION_TYPE* GrGLClientWaitSyncProc)(GrGLsync sync, GrGLbitfield flags, GrGLuint64 timeout);
+typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLDeleteSyncProc)(GrGLsync sync);
+
 /* KHR_debug */
 typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLDebugMessageControlProc)(GrGLenum source, GrGLenum type, GrGLenum severity, GrGLsizei count, const GrGLuint* ids, GrGLboolean enabled);
 typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLDebugMessageInsertProc)(GrGLenum source, GrGLenum type, GrGLuint id, GrGLenum severity, GrGLsizei length,  const GrGLchar* buf);
index 27184ad..60109ec 100644 (file)
@@ -452,6 +452,11 @@ public:
         // OpenGL 3.1
         GrGLFunction<GrGLTextureBufferProc> fTextureBuffer;
 
+        /* ARB_sync */
+        GrGLFunction<GrGLFenceSyncProc> fFenceSync;
+        GrGLFunction<GrGLClientWaitSyncProc> fClientWaitSync;
+        GrGLFunction<GrGLDeleteSyncProc> fDeleteSync;
+
         /* KHR_debug */
         GrGLFunction<GrGLDebugMessageControlProc> fDebugMessageControl;
         GrGLFunction<GrGLDebugMessageInsertProc> fDebugMessageInsert;
index d0edcf1..5b9e31d 100644 (file)
@@ -58,6 +58,7 @@ typedef signed long int GrGLintptr;
 typedef signed long int GrGLsizeiptr;
 #endif
 typedef void* GrGLeglImage;
+typedef void* GrGLsync;
 
 struct GrGLDrawArraysIndirectCommand {
     GrGLuint fCount;
index 9f92210..0f77b5a 100644 (file)
@@ -103,6 +103,7 @@ GrCaps::GrCaps(const GrContextOptions& options) {
     fFullClearIsFree = false;
     fMustClearUploadedBufferData = false;
     fSampleShadingSupport = false;
+    fFenceSyncSupport = false;
 
     fUseDrawInsteadOfClear = false;
 
@@ -189,6 +190,9 @@ SkString GrCaps::dump() const {
     r.appendf("Prefer client-side dynamic buffers : %s\n", gNY[fPreferClientSideDynamicBuffers]);
     r.appendf("Full screen clear is free          : %s\n", gNY[fFullClearIsFree]);
     r.appendf("Must clear buffer memory           : %s\n", gNY[fMustClearUploadedBufferData]);
+    r.appendf("Sample shading support             : %s\n", gNY[fSampleShadingSupport]);
+    r.appendf("Fence sync support                 : %s\n", gNY[fFenceSyncSupport]);
+
     r.appendf("Draw Instead of Clear [workaround] : %s\n", gNY[fUseDrawInsteadOfClear]);
     r.appendf("Draw Instead of TexSubImage [workaround] : %s\n",
               gNY[fUseDrawInsteadOfPartialRenderTargetWrite]);
index e14e892..0de9fed 100644 (file)
@@ -402,8 +402,9 @@ bool GrGpu::writePixels(GrSurface* surface,
 bool GrGpu::transferPixels(GrSurface* surface,
                            int left, int top, int width, int height,
                            GrPixelConfig config, GrBuffer* transferBuffer,
-                           size_t offset, size_t rowBytes) {
+                           size_t offset, size_t rowBytes, GrFence* fence) {
     SkASSERT(transferBuffer);
+    SkASSERT(fence);
 
     this->handleDirtyContext();
     if (this->onTransferPixels(surface, left, top, width, height, config,
@@ -411,6 +412,12 @@ bool GrGpu::transferPixels(GrSurface* surface,
         SkIRect rect = SkIRect::MakeXYWH(left, top, width, height);
         this->didWriteToSurface(surface, &rect);
         fStats.incTransfersToTexture();
+
+        if (*fence) {
+            this->deleteFence(*fence);
+        }
+        *fence = this->insertFence();
+
         return true;
     }
     return false;
index 3e3a4cd..b8703dc 100644 (file)
@@ -305,7 +305,7 @@ public:
     bool transferPixels(GrSurface* surface,
                         int left, int top, int width, int height,
                         GrPixelConfig config, GrBuffer* transferBuffer,
-                        size_t offset, size_t rowBytes);
+                        size_t offset, size_t rowBytes, GrFence* fence);
 
     /**
      * This is can be called before allocating a texture to be a dst for copySurface. This is only
@@ -371,6 +371,10 @@ public:
     // Provides a hook for post-flush actions (e.g. PLS reset and Vulkan command buffer submits).
     virtual void finishDrawTarget() {}
 
+    virtual GrFence SK_WARN_UNUSED_RESULT insertFence() const = 0;
+    virtual bool waitFence(GrFence, uint64_t timeout = 1000) const = 0;
+    virtual void deleteFence(GrFence) const = 0;
+
     ///////////////////////////////////////////////////////////////////////////
     // Debugging and Stats
 
index e804dd6..b435655 100644 (file)
@@ -522,10 +522,16 @@ const GrGLInterface* GrGLAssembleGLInterface(void* ctx, GrGLGetProc get) {
         GET_EGL_PROC_SUFFIX(DestroyImage, KHR);
     }
 
-    if (glVer >= GR_GL_VER(4,0) || extensions.has("GL_ARB_sample_shading")) {
+    if (glVer >= GR_GL_VER(4, 0) || extensions.has("GL_ARB_sample_shading")) {
         GET_PROC(MinSampleShading);
     }
 
+    if (glVer >= GR_GL_VER(3, 2) || extensions.has("GL_ARB_sync")) {
+        GET_PROC(FenceSync);
+        GET_PROC(ClientWaitSync);
+        GET_PROC(DeleteSync);
+    }
+
     interface->fStandard = kGL_GrGLStandard;
     interface->fExtensions.swap(&extensions);
 
@@ -919,6 +925,12 @@ const GrGLInterface* GrGLAssembleGLESInterface(void* ctx, GrGLGetProc get) {
         GET_PROC_SUFFIX(MinSampleShading, OES);
     }
 
+    if (version >= GR_GL_VER(3, 0)) {
+        GET_PROC(FenceSync);
+        GET_PROC(ClientWaitSync);
+        GET_PROC(DeleteSync);
+    }
+
     interface->fStandard = kGLES_GrGLStandard;
     interface->fExtensions.swap(&extensions);
 
index feb3eb6..1a7b105 100644 (file)
@@ -550,6 +550,15 @@ void GrGLCaps::init(const GrContextOptions& contextOptions,
         fSampleShadingSupport = true;
     }
 
+    // TODO: support CHROMIUM_sync_point and maybe KHR_fence_sync
+    if (kGL_GrGLStandard == standard) {
+        if (version >= GR_GL_VER(3, 2) || ctxInfo.hasExtension("GL_ARB_sync")) {
+            fFenceSyncSupport = true;
+        }
+    } else if (version >= GR_GL_VER(3, 0)) {
+        fFenceSyncSupport = true;
+    }
+
     // We support manual mip-map generation (via iterative downsampling draw calls). This fixes
     // bugs on some cards/drivers that produce incorrect mip-maps for sRGB textures when using
     // glGenerateMipmap. Our implementation requires mip-level sampling control. Additionally,
index 8cd3751..8dc7af1 100644 (file)
 #define GR_GL_INCLUSIVE                                     0x8f10
 #define GR_GL_EXCLUSIVE                                     0x8f11
 
+/* GL_ARB_sync */
+#define GR_GL_SYNC_GPU_COMMANDS_COMPLETE                    0x9117
+#define GR_GL_ALREADY_SIGNALED                              0x911A
+#define GR_GL_TIMEOUT_EXPIRED                               0x911B
+#define GR_GL_CONDITION_SATISFIED                           0x911C
+#define GR_GL_WAIT_FAILED                                   0x911D
+#define GR_GL_SYNC_FLUSH_COMMANDS_BIT                       0x00000001
+
 /* EGL Defines */
 #define GR_EGL_NO_DISPLAY                                   ((GrEGLDisplay)0)
 #define GR_EGL_EXTENSIONS                                   0x3055
index fcd3270..5755702 100644 (file)
@@ -4694,3 +4694,19 @@ bool GrGLGpu::onMakeCopyForTextureParams(GrTexture* texture, const GrTexturePara
     }
     return false;
 }
+
+GrFence SK_WARN_UNUSED_RESULT GrGLGpu::insertFence() const {
+    GrGLsync fence;
+    GL_CALL_RET(fence, FenceSync(GR_GL_SYNC_GPU_COMMANDS_COMPLETE, 0));
+    return (GrFence)fence;
+}
+
+bool GrGLGpu::waitFence(GrFence fence, uint64_t timeout) const {
+    GrGLenum result;
+    GL_CALL_RET(result, ClientWaitSync((GrGLsync)fence, GR_GL_SYNC_FLUSH_COMMANDS_BIT, timeout));
+    return (GR_GL_CONDITION_SATISFIED == result);
+}
+
+void GrGLGpu::deleteFence(GrFence fence) const {
+    GL_CALL(DeleteSync((GrGLsync)fence));
+}
index 20616e4..7ba79b2 100644 (file)
@@ -144,6 +144,10 @@ public:
 
     void finishDrawTarget() override;
 
+    GrFence SK_WARN_UNUSED_RESULT insertFence() const override;
+    bool waitFence(GrFence, uint64_t timeout) const override;
+    void deleteFence(GrFence) const override;
+
 private:
     GrGLGpu(GrGLContext* ctx, GrContext* context);
 
index c3fc8a8..0a157dd 100644 (file)
@@ -786,6 +786,24 @@ bool GrGLInterface::validate() const {
         }
     }
 
+    if (kGL_GrGLStandard == fStandard) {
+        if (glVer >= GR_GL_VER(3, 2) || fExtensions.has("GL_ARB_sync")) {
+            if (nullptr == fFunctions.fFenceSync ||
+                nullptr == fFunctions.fClientWaitSync ||
+                nullptr == fFunctions.fDeleteSync) {
+                RETURN_FALSE_INTERFACE
+            }
+        }
+    } else if (kGLES_GrGLStandard == fStandard) {
+        if (glVer >= GR_GL_VER(3, 0)) {
+            if (nullptr == fFunctions.fFenceSync ||
+                nullptr == fFunctions.fClientWaitSync ||
+                nullptr == fFunctions.fDeleteSync) {
+                RETURN_FALSE_INTERFACE
+            }
+        }
+    }
+
     if (fExtensions.has("EGL_KHR_image") || fExtensions.has("EGL_KHR_image_base")) {
         if (nullptr == fFunctions.fEGLCreateImage ||
             nullptr == fFunctions.fEGLDestroyImage) {
index 10968a7..d871ef6 100644 (file)
@@ -312,6 +312,9 @@ GrGLTestInterface::GrGLTestInterface() {
     fFunctions.fMapNamedBufferRange = bind_to_member(this, &GrGLTestInterface::mapNamedBufferRange);
     fFunctions.fFlushMappedNamedBufferRange = bind_to_member(this, &GrGLTestInterface::flushMappedNamedBufferRange);
     fFunctions.fTextureBuffer = bind_to_member(this, &GrGLTestInterface::textureBuffer);
+    fFunctions.fFenceSync = bind_to_member(this, &GrGLTestInterface::fenceSync);
+    fFunctions.fClientWaitSync = bind_to_member(this, &GrGLTestInterface::clientWaitSync);
+    fFunctions.fDeleteSync = bind_to_member(this, &GrGLTestInterface::deleteSync);
     fFunctions.fDebugMessageControl = bind_to_member(this, &GrGLTestInterface::debugMessageControl);
     fFunctions.fDebugMessageInsert = bind_to_member(this, &GrGLTestInterface::debugMessageInsert);
     fFunctions.fDebugMessageCallback = bind_to_member(this, &GrGLTestInterface::debugMessageCallback);
index fc837bf..ef00df3 100644 (file)
@@ -317,6 +317,9 @@ public:
     virtual GrGLvoid* mapNamedBufferRange(GrGLuint buffer, GrGLintptr offset, GrGLsizeiptr length, GrGLbitfield access) { return nullptr; }
     virtual GrGLvoid flushMappedNamedBufferRange(GrGLuint buffer, GrGLintptr offset, GrGLsizeiptr length) {}
     virtual GrGLvoid textureBuffer(GrGLuint texture, GrGLenum target, GrGLenum internalformat, GrGLuint buffer) {}
+    virtual GrGLsync fenceSync(GrGLenum condition, GrGLbitfield flags) { return nullptr;  }
+    virtual GrGLenum clientWaitSync(GrGLsync sync, GrGLbitfield flags, GrGLuint64 timeout) { return GR_GL_WAIT_FAILED;  }
+    virtual GrGLvoid deleteSync(GrGLsync sync) {}
     virtual GrGLvoid debugMessageControl(GrGLenum source, GrGLenum type, GrGLenum severity, GrGLsizei count, const GrGLuint* ids, GrGLboolean enabled) {}
     virtual GrGLvoid debugMessageInsert(GrGLenum source, GrGLenum type, GrGLuint id, GrGLenum severity, GrGLsizei length,  const GrGLchar* buf) {}
     virtual GrGLvoid debugMessageCallback(GRGLDEBUGPROC callback, const GrGLvoid* userParam) {}
index e72e723..d982756 100644 (file)
@@ -36,6 +36,7 @@ GrVkCaps::GrVkCaps(const GrContextOptions& contextOptions, const GrVkInterface*
     fOversizedStencilSupport = false; //TODO: figure this out
 
     fUseDrawInsteadOfClear = false;
+    fFenceSyncSupport = true;   // always available in Vulkan
 
     fMapBufferFlags = kNone_MapFlags; //TODO: figure this out
     fBufferMapThreshold = SK_MaxS32;  //TODO: figure this out
index c143219..60a8763 100644 (file)
@@ -1888,3 +1888,31 @@ void GrVkGpu::submitSecondaryCommandBuffer(GrVkSecondaryCommandBuffer* buffer,
     this->didWriteToSurface(target, &bounds);
 }
 
+GrFence SK_WARN_UNUSED_RESULT GrVkGpu::insertFence() const {
+    VkFenceCreateInfo createInfo;
+    memset(&createInfo, 0, sizeof(VkFenceCreateInfo));
+    createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+    createInfo.pNext = nullptr;
+    createInfo.flags = 0;
+    VkFence fence = VK_NULL_HANDLE;
+    VkResult result = GR_VK_CALL(this->vkInterface(), CreateFence(this->device(), &createInfo,
+                                                                  nullptr, &fence));
+    // TODO: verify that all QueueSubmits before this will finish before this fence signals
+    if (VK_SUCCESS == result) {
+        GR_VK_CALL(this->vkInterface(), QueueSubmit(this->queue(), 0, nullptr, fence));
+    }
+    return (GrFence)fence;
+}
+
+bool GrVkGpu::waitFence(GrFence fence, uint64_t timeout) const {
+    VkResult result = GR_VK_CALL(this->vkInterface(), WaitForFences(this->device(), 1,
+                                                                    (VkFence*)&fence,
+                                                                    VK_TRUE,
+                                                                    timeout));
+    return (VK_SUCCESS == result);
+}
+
+void GrVkGpu::deleteFence(GrFence fence) const {
+    GR_VK_CALL(this->vkInterface(), DestroyFence(this->device(), (VkFence)fence, nullptr));
+}
+
index eeaacf3..273f28c 100644 (file)
@@ -141,6 +141,10 @@ public:
 
     void finishDrawTarget() override;
 
+    GrFence SK_WARN_UNUSED_RESULT insertFence() const override;
+    bool waitFence(GrFence, uint64_t timeout) const override;
+    void deleteFence(GrFence) const override;
+
     void generateMipmap(GrVkTexture* tex);
 
     bool updateBuffer(GrVkBuffer* buffer, const void* src, VkDeviceSize offset, VkDeviceSize size);
index b99cf06..2e17902 100644 (file)
@@ -318,6 +318,10 @@ public:
 
     void drawDebugWireRect(GrRenderTarget*, const SkIRect&, GrColor) override {}
 
+    GrFence SK_WARN_UNUSED_RESULT insertFence() const override { return 0; }
+    bool waitFence(GrFence, uint64_t) const override { return true; }
+    void deleteFence(GrFence) const override {}
+
 private:
     void onResetContext(uint32_t resetBits) override {}