Batch up draws into triangle fans as large as possible when drawing convex
authorsenorblanco@chromium.org <senorblanco@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>
Thu, 19 May 2011 17:11:07 +0000 (17:11 +0000)
committersenorblanco@chromium.org <senorblanco@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>
Thu, 19 May 2011 17:11:07 +0000 (17:11 +0000)
edge AA polys, so we minimize state changes and GL calls.  This requires
querying GL for the maximum number of fragment uniforms.  It also makes the
shader generator produce custom shaders for the number of relevant edges.
This gives a ~5X speedup on the "Shapes" SampleApp.

Review URL:  http://codereview.appspot.com/4536070/

git-svn-id: http://skia.googlecode.com/svn/trunk@1380 2bbb7eff-a529-9590-31e7-b0007b416f81

gpu/include/GrDrawTarget.h
gpu/include/GrGLDefines.h
gpu/src/GrDrawTarget.cpp
gpu/src/GrGLProgram.cpp
gpu/src/GrGLProgram.h
gpu/src/GrGpuGL.cpp
gpu/src/GrGpuGL.h
gpu/src/GrGpuGLShaders.cpp
gpu/src/GrTesselatedPathRenderer.cpp
src/gpu/GrPrintf_skia.cpp

index 985cca7..e89a273 100644 (file)
@@ -54,6 +54,17 @@ public:
         kMaxTexCoords = kNumStages
     };
 
+
+    /**
+     * The absolute maximum number of edges that may be specified for
+     * a single draw call when performing edge antialiasing.  This is used for
+     * the size of several static buffers, so implementations of getMaxEdges()
+     * (below) should clamp to this value.
+     */
+    enum {
+        kMaxEdges = 32
+    };
+
     /**
      *  Bitfield used to indicate which stages are in use.
      */
@@ -78,9 +89,6 @@ public:
         kNoColorWrites_StateBit = 0x08, //<! If set it disables writing colors.
                                         //   Useful while performing stencil
                                         //   ops.
-        kEdgeAA_StateBit        = 0x10, //<! Perform edge anti-aliasing.
-                                        //   Requires the edges to be passed in
-                                        //   setEdgeAAData().
 
         // subclass may use additional bits internally
         kDummyStateBit,
@@ -128,6 +136,20 @@ public:
         fCurrDrawState.fStencilSettings.setDisabled();
     }
 
+    class Edge {
+      public:
+        Edge() {}
+        Edge(float x, float y, float z) : fX(x), fY(y), fZ(z) {}
+        GrPoint intersect(const Edge& other) {
+            return GrPoint::Make(
+                (fY * other.fZ - other.fY * fZ) /
+                  (fX * other.fY - other.fX * fY),
+                (fX * other.fZ - other.fX * fZ) /
+                  (other.fX * fY - fX * other.fY));
+        }
+        float fX, fY, fZ;
+    };
+
 protected:
 
     struct DrState {
@@ -164,7 +186,8 @@ protected:
 
         GrStencilSettings       fStencilSettings;
         GrMatrix                fViewMatrix;
-        float                   fEdgeAAEdges[18];
+        Edge                    fEdgeAAEdges[kMaxEdges];
+        int                     fEdgeAANumEdges;
         bool operator ==(const DrState& s) const {
             return 0 == memcmp(this, &s, sizeof(DrState));
         }
@@ -536,7 +559,7 @@ public:
      * @param edges       3 * 6 float values, representing the edge
      *                    equations in Ax + By + C form
      */
-     void setEdgeAAData(const float edges[18]);
+     void setEdgeAAData(const Edge* edges, int numEdges);
 
 private:
     static const int TEX_COORD_BIT_CNT = kNumStages*kMaxTexCoords;
@@ -804,6 +827,15 @@ public:
      */
     virtual void clear(const GrIRect* rect, GrColor color) = 0;
 
+    /**
+     * Returns the maximum number of edges that may be specified in a single
+     * draw call when performing edge antialiasing.  This is usually limited
+     * by the number of fragment uniforms which may be uploaded.  Must be a
+     * minimum of six, since a triangle's vertices each belong to two boundary
+     * edges which may be distinct.
+     */
+    virtual int getMaxEdges() const { return 6; }
+
     ///////////////////////////////////////////////////////////////////////////
 
     class AutoStateRestore : ::GrNoncopyable {
index 29e56f3..2e22803 100644 (file)
 #define GR_GL_ACTIVE_ATTRIBUTE_MAX_LENGTH      0x8B8A
 #define GR_GL_SHADING_LANGUAGE_VERSION         0x8B8C
 #define GR_GL_CURRENT_PROGRAM                  0x8B8D
+#define GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS  0x8B49
+#define GR_GL_MAX_VERTEX_UNIFORM_COMPONENTS    0x8B4A
 
 /* StencilFunction */
 #define GR_GL_NEVER                          0x0200
index efee91e..250fbe1 100644 (file)
@@ -483,7 +483,7 @@ void GrDrawTarget::setIndexSourceToBuffer(const GrIndexBuffer* buffer) {
 
 bool GrDrawTarget::canDisableBlend() const {
     // If we're using edge antialiasing, we can't force blend off.
-    if (fCurrDrawState.fFlagBits & kEdgeAA_StateBit) {
+    if (fCurrDrawState.fEdgeAANumEdges > 0) {
         return false;
     }
 
@@ -535,8 +535,10 @@ bool GrDrawTarget::canDisableBlend() const {
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-void GrDrawTarget::setEdgeAAData(const float edges[18]) {
-    memcpy(fCurrDrawState.fEdgeAAEdges, edges, sizeof(fCurrDrawState.fEdgeAAEdges));
+void GrDrawTarget::setEdgeAAData(const Edge* edges, int numEdges) {
+    GrAssert(numEdges <= kMaxEdges);
+    memcpy(fCurrDrawState.fEdgeAAEdges, edges, numEdges * sizeof(Edge));
+    fCurrDrawState.fEdgeAANumEdges = numEdges;
 }
 
 
index 0f82d2a..9a9e3c2 100644 (file)
@@ -361,11 +361,6 @@ bool GrGLProgram::genProgram(GrGLProgram::CachedData* programData) const {
         }
     }
 
-    if (fProgramDesc.fUsesEdgeAA) {
-        segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "[6];\n");
-        programData->fUniLocations.fEdgesUni = kUseUniform;
-    }
-
     if (fProgramDesc.fEmitsPointSize){
         segments.fVSCode.append("\tgl_PointSize = 1.0;\n");
     }
@@ -457,17 +452,36 @@ bool GrGLProgram::genProgram(GrGLProgram::CachedData* programData) const {
     // we will want to compute coverage for some blend when there is no
     // color (when dual source blending is enabled). But for now we have this if
     if (!wroteFragColorZero) {
-        if (fProgramDesc.fUsesEdgeAA) {
-            // FIXME:  put the a's in a loop
+        if (fProgramDesc.fEdgeAANumEdges > 0) {
+            segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "[");
+            segments.fFSUnis.appendS32(fProgramDesc.fEdgeAANumEdges);
+            segments.fFSUnis.append("];\n");
+            programData->fUniLocations.fEdgesUni = kUseUniform;
+            int count = fProgramDesc.fEdgeAANumEdges;
             segments.fFSCode.append(
-                "\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n"
-                "\tfloat a0 = clamp(dot(uEdges[0], pos), 0.0, 1.0);\n"
-                "\tfloat a1 = clamp(dot(uEdges[1], pos), 0.0, 1.0);\n"
-                "\tfloat a2 = clamp(dot(uEdges[2], pos), 0.0, 1.0);\n"
-                "\tfloat a3 = clamp(dot(uEdges[3], pos), 0.0, 1.0);\n"
-                "\tfloat a4 = clamp(dot(uEdges[4], pos), 0.0, 1.0);\n"
-                "\tfloat a5 = clamp(dot(uEdges[5], pos), 0.0, 1.0);\n"
-                "\tfloat edgeAlpha = min(min(a0 * a1, a2 * a3), a4 * a5);\n");
+                "\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n");
+            for (int i = 0; i < count; i++) {
+                segments.fFSCode.append("\tfloat a");
+                segments.fFSCode.appendS32(i);
+                segments.fFSCode.append(" = clamp(dot(" EDGES_UNI_NAME "[");
+                segments.fFSCode.appendS32(i);
+                segments.fFSCode.append("], pos), 0.0, 1.0);\n");
+            }
+            segments.fFSCode.append("\tfloat edgeAlpha = ");
+            for (int i = 0; i < count - 1; i++) {
+                segments.fFSCode.append("min(a");
+                segments.fFSCode.appendS32(i);
+                segments.fFSCode.append(" * a");
+                segments.fFSCode.appendS32(i + 1);
+                segments.fFSCode.append(", ");
+            }
+            segments.fFSCode.append("a");
+            segments.fFSCode.appendS32(count - 1);
+            segments.fFSCode.append(" * a0");
+            for (int i = 0; i < count - 1; i++) {
+                segments.fFSCode.append(")");
+            }
+            segments.fFSCode.append(";\n");
             inCoverage = "edgeAlpha";
             coverageIsScalar = true;
         }
index e02d15b..d4a6406 100644 (file)
@@ -96,7 +96,7 @@ private:
 
         int  fFirstCoverageStage;
         bool fEmitsPointSize;
-        bool fUsesEdgeAA;
+        int fEdgeAANumEdges;
 
         SkXfermode::Mode fColorFilterXfermode;
 
index e8c7afb..5a2d2bd 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "GrGpuGL.h"
 #include "GrMemory.h"
+#include "GrTypes.h"
 
 static const GrGLuint GR_MAX_GLUINT = ~0;
 static const GrGLint  GR_INVAL_GLINT = ~0;
@@ -201,6 +202,16 @@ GrGpuGL::GrGpuGL() {
         GR_GL_GetIntegerv(GR_GL_MAX_TEXTURE_UNITS, &maxTextureUnits);
         GrAssert(maxTextureUnits > kNumStages);
     }
+    if (GR_GL_SUPPORT_ES2) {
+        GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_VECTORS,
+                          &fMaxFragmentUniformVectors);
+    } else if (GR_GL_SUPPORT_DESKTOP) {
+        GrGLint max;
+        GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &max);
+        fMaxFragmentUniformVectors = max / 4;
+    } else {
+        fMaxFragmentUniformVectors = 16;
+    }
 
     ////////////////////////////////////////////////////////////////////////////
     // Check for supported features.
@@ -2064,3 +2075,9 @@ void GrGpuGL::setBuffers(bool indexed,
         }
     }
 }
+
+int GrGpuGL::getMaxEdges() const {
+    // FIXME:  This is a pessimistic estimate based on how many other things
+    // want to add uniforms.  This should be centralized somewhere.
+    return GR_CT_MIN(fMaxFragmentUniformVectors - 8, kMaxEdges);
+}
index da955cf..d48d69e 100644 (file)
@@ -107,6 +107,7 @@ protected:
     virtual void flushScissor(const GrIRect* rect);
     void clearStencil(uint32_t value, uint32_t mask);
     virtual void clearStencilClip(const GrIRect& rect);
+    virtual int getMaxEdges() const;
 
     // binds texture unit in GL
     void setTextureUnit(int unitIdx);
@@ -189,6 +190,9 @@ private:
     // Do we have stencil wrap ops.
     bool fHasStencilWrap;
 
+    // The maximum number of fragment uniform vectors (GLES has min. 16).
+    int fMaxFragmentUniformVectors;
+
     // ES requires an extension to support RGBA8 in RenderBufferStorage
     bool fRGBA8Renderbuffer;
 
index 08845a9..bbf9719 100644 (file)
@@ -193,7 +193,7 @@ void GrGpuGLShaders::ProgramUnitTest() {
         idx = (int)(random.nextF() * (kNumStages+1));
         pdesc.fFirstCoverageStage = idx;
 
-        pdesc.fUsesEdgeAA = (random.nextF() > .5f);
+        pdesc.fEdgeAANumEdges = (random.nextF() * (getMaxEdges() + 1));
 
         for (int s = 0; s < kNumStages; ++s) {
             // enable the stage?
@@ -442,16 +442,17 @@ void GrGpuGLShaders::flushTexelSize(int s) {
 void GrGpuGLShaders::flushEdgeAAData() {
     const int& uni = fProgramData->fUniLocations.fEdgesUni;
     if (GrGLProgram::kUnusedUniform != uni) {
-        float edges[18];
-        memcpy(edges, fCurrDrawState.fEdgeAAEdges, sizeof(edges));
+        int count = fCurrDrawState.fEdgeAANumEdges;
+        Edge edges[kMaxEdges];
         // Flip the edges in Y
         float height = fCurrDrawState.fRenderTarget->height();
-        for (int i = 0; i < 6; ++i) {
-            float b = edges[i * 3 + 1];
-            edges[i * 3 + 1] = -b;
-            edges[i * 3 + 2] += b * height;
+        for (int i = 0; i < count; ++i) {
+            edges[i] = fCurrDrawState.fEdgeAAEdges[i];
+            float b = edges[i].fY;
+            edges[i].fY = -b;
+            edges[i].fZ += b * height;
         }
-        GR_GL(Uniform3fv(uni, 6, edges));
+        GR_GL(Uniform3fv(uni, count, &edges[0].fX));
     }
 }
 
@@ -701,7 +702,7 @@ void GrGpuGLShaders::buildProgram(GrPrimitiveType type) {
         desc.fColorType = GrGLProgram::ProgramDesc::kAttribute_ColorType;
     }
 
-    desc.fUsesEdgeAA = fCurrDrawState.fFlagBits & kEdgeAA_StateBit;
+    desc.fEdgeAANumEdges = fCurrDrawState.fEdgeAANumEdges;
 
     for (int s = 0; s < kNumStages; ++s) {
         GrGLProgram::ProgramDesc::StageDesc& stage = desc.fStages[s];
index da6da5c..8a33012 100644 (file)
@@ -85,19 +85,7 @@ static unsigned fill_type_to_glu_winding_rule(GrPathFill fill) {
 GrTesselatedPathRenderer::GrTesselatedPathRenderer() {
 }
 
-class Edge {
-  public:
-    Edge() {}
-    Edge(float x, float y, float z) : fX(x), fY(y), fZ(z) {}
-    GrPoint intersect(const Edge& other) {
-        return GrPoint::Make(
-            (fY * other.fZ - other.fY * fZ) / (fX * other.fY - other.fX * fY),
-            (fX * other.fZ - other.fX * fZ) / (other.fX * fY - fX * other.fY));
-    }
-    float fX, fY, fZ;
-};
-
-typedef GrTDArray<Edge> EdgeArray;
+typedef GrTDArray<GrDrawTarget::Edge> EdgeArray;
 
 bool isCCW(const GrPoint* pts)
 {
@@ -121,15 +109,15 @@ static size_t computeEdgesAndOffsetVertices(const GrMatrix& matrix,
         GrVec tangent = GrVec::Make(p.fY - q.fY, q.fX - p.fX);
         float scale = sign / tangent.length();
         float cross2 = p.fX * q.fY - q.fX * p.fY;
-        Edge edge(tangent.fX * scale,
+        GrDrawTarget::Edge edge(tangent.fX * scale,
                   tangent.fY * scale,
                   cross2 * scale + 0.5f);
         *edges->append() = edge;
         p = q;
     }
-    Edge prev_edge = *edges->back();
+    GrDrawTarget::Edge prev_edge = *edges->back();
     for (size_t i = 0; i < edges->count(); ++i) {
-        Edge edge = edges->at(i);
+        GrDrawTarget::Edge edge = edges->at(i);
         vertices[i] = prev_edge.intersect(edge);
         inverse.mapPoints(&vertices[i], 1);
         prev_edge = edge;
@@ -262,29 +250,30 @@ FINISHED:
 
     if (subpathCnt == 1 && !inverted && path.isConvex()) {
         if (target->isAntialiasState()) {
-            target->enableState(GrDrawTarget::kEdgeAA_StateBit);
             EdgeArray edges;
             GrMatrix inverse, matrix = target->getViewMatrix();
             target->getViewInverse(&inverse);
 
             count = computeEdgesAndOffsetVertices(matrix, inverse, base, count, &edges);
-            GrPoint triangle[3];
-            triangle[0] = base[0];
-            Edge triangleEdges[6];
-            triangleEdges[0] = *edges.back();
-            triangleEdges[1] = edges[0];
-            for (size_t i = 1; i < count - 1; i++) {
-                triangle[1] = base[i];
-                triangle[2] = base[i + 1];
-                triangleEdges[2] = edges[i - 1];
-                triangleEdges[3] = edges[i];
-                triangleEdges[4] = edges[i];
-                triangleEdges[5] = edges[i + 1];
-                target->setVertexSourceToArray(layout, triangle, 3);
-                target->setEdgeAAData(&triangleEdges[0].fX);
-                target->drawNonIndexed(kTriangles_PrimitiveType, 0, 3);
+            int maxEdges = target->getMaxEdges();
+            if (count <= maxEdges) {
+                // All edges fit; upload all edges and draw all verts as a fan
+                target->setVertexSourceToArray(layout, base, count);
+                target->setEdgeAAData(&edges[0], count);
+                target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count);
+            } else {
+                // Upload "maxEdges" edges and verts at a time, and draw as
+                // separate fans
+                for (size_t i = 0; i < count - 2; i += maxEdges - 2) {
+                    edges[i] = edges[0];
+                    base[i] = base[0];
+                    int size = GR_CT_MIN(count - i, maxEdges);
+                    target->setVertexSourceToArray(layout, &base[i], size);
+                    target->setEdgeAAData(&edges[i], size);
+                    target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, size);
+                }
             }
-            target->disableState(GrDrawTarget::kEdgeAA_StateBit);
+            target->setEdgeAAData(NULL, 0);
         } else {
             target->setVertexSourceToArray(layout, base, count);
             target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count);
index fa8b6a7..6da8822 100644 (file)
@@ -23,7 +23,7 @@
 #include "SkTypes.h"
 
 void GrPrintf(const char format[], ...) {
-    const size_t MAX_BUFFER_SIZE = 512;
+    const size_t MAX_BUFFER_SIZE = 2048;
 
     char buffer[MAX_BUFFER_SIZE + 1];
     va_list args;