Manually unroll loop in convolution (blur) - most GL drivers do, but some
authortomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Mon, 11 Jun 2012 12:42:24 +0000 (12:42 +0000)
committertomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Mon, 11 Jun 2012 12:42:24 +0000 (12:42 +0000)
don't, and this leads to 20-30% benchmark speedup.

http://codereview.appspot.com/6308057/

git-svn-id: http://skia.googlecode.com/svn/trunk@4218 2bbb7eff-a529-9590-31e7-b0007b416f81

src/gpu/effects/GrConvolutionEffect.cpp

index 14e817f..5aaa185 100644 (file)
@@ -84,29 +84,25 @@ void GrGLConvolutionEffect::emitFS(GrGLShaderBuilder* state,
                                    const char* inputColor,
                                    const char* samplerName) {
     GrStringBuilder* code = &state->fFSCode;
- // const char* texFunc = "texture2D";
- // bool complexCoord = false;
 
-    state->fFSCode.appendf("\t\tvec4 sum = vec4(0, 0, 0, 0);\n");
+    code->appendf("\t\tvec4 sum = vec4(0, 0, 0, 0);\n");
 
     code->appendf("\t\tvec2 coord = %s;\n", state->fSampleCoords.c_str());
-    code->appendf("\t\tfor (int i = 0; i < %d; i++) {\n", this->width());
 
-    // Creates the string "kernel[i]" with workarounds for
-    // possible driver bugs
-    GrStringBuilder kernelIndex;
-    fKernelVar->appendArrayAccess("i", &kernelIndex);
-    state->fFSCode.appendf("\t\t\tsum += ");
-    state->emitTextureLookup(samplerName, "coord");
-    state->fFSCode.appendf(" * %s;\n", kernelIndex.c_str());
-
-    code->appendf("\t\t\tcoord += %s;\n",
-                  fImageIncrementVar->getName().c_str());
-    code->appendf("\t\t}\n");
+    // Manually unroll loop because some drivers don't; yields 20-30% speedup.
+    for (int i = 0; i < this->width(); i++) {
+        GrStringBuilder index;
+        GrStringBuilder kernelIndex;
+        index.appendS32(i);
+        fKernelVar->appendArrayAccess(index.c_str(), &kernelIndex);
+        code->appendf("\t\tsum += ");
+        state->emitTextureLookup(samplerName, "coord");
+        code->appendf(" * %s;\n", kernelIndex.c_str());
+        code->appendf("\t\tcoord += %s;\n",
+                      fImageIncrementVar->getName().c_str());
+    }
 
-    state->fFSCode.appendf("\t\t%s = sum%s;\n",
-                           outputColor,
-                           state->fModulate.c_str());
+    code->appendf("\t\t%s = sum%s;\n", outputColor, state->fModulate.c_str());
 }
 
 void GrGLConvolutionEffect::initUniforms(const GrGLInterface* gl,