GPU device preserves pixel values across read/write/read of unpremul pixel values
authorbsalomon@google.com <bsalomon@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Thu, 23 Feb 2012 15:39:54 +0000 (15:39 +0000)
committerbsalomon@google.com <bsalomon@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Thu, 23 Feb 2012 15:39:54 +0000 (15:39 +0000)
Review URL: http://codereview.appspot.com/5695047/

git-svn-id: http://skia.googlecode.com/svn/trunk@3237 2bbb7eff-a529-9590-31e7-b0007b416f81

gyp/tests.gyp
src/core/SkConfig8888.cpp
src/core/SkConfig8888.h
src/gpu/GrContext.cpp
src/gpu/GrGpu.h
src/gpu/gl/GrGLProgram.cpp
src/gpu/gl/GrGLProgram.h
src/gpu/gl/GrGpuGL.cpp
src/gpu/gl/GrGpuGL.h
src/gpu/gl/GrGpuGLShaders.cpp
tests/PremulAlphaRoundTripTest.cpp [new file with mode: 0644]

index 1a8f0cd..cf793ab 100644 (file)
@@ -52,6 +52,7 @@
         '../tests/PathTest.cpp',
         '../tests/PDFPrimitivesTest.cpp',
         '../tests/PointTest.cpp',
+        '../tests/PremulAlphaRoundTripTest.cpp',
         '../tests/QuickRejectTest.cpp',
         '../tests/Reader32Test.cpp',
         '../tests/ReadPixelsTest.cpp',
index 67a9e41..10a1b36 100644 (file)
@@ -255,3 +255,27 @@ void SkConvertConfig8888Pixels(uint32_t* dstPixels,
             break;
     }
 }
+
+uint32_t SkPackConfig8888(SkCanvas::Config8888 config,
+                          uint32_t a,
+                          uint32_t r,
+                          uint32_t g,
+                          uint32_t b) {
+    switch (config) {
+        case SkCanvas::kNative_Premul_Config8888:
+        case SkCanvas::kNative_Unpremul_Config8888:
+            return pack_config8888<SK_NATIVE_A_IDX,
+                                   SK_NATIVE_R_IDX,
+                                   SK_NATIVE_G_IDX,
+                                   SK_NATIVE_B_IDX>(a, r, g, b);
+        case SkCanvas::kBGRA_Premul_Config8888:
+        case SkCanvas::kBGRA_Unpremul_Config8888:
+            return pack_config8888<3, 2, 1, 0>(a, r, g, b);
+        case SkCanvas::kRGBA_Premul_Config8888:
+        case SkCanvas::kRGBA_Unpremul_Config8888:
+            return pack_config8888<3, 0, 1, 2>(a, r, g, b);
+        default:
+            SkDEBUGFAIL("Unexpected config8888");
+            return 0;
+    }
+}
index d4ca764..a891370 100644 (file)
@@ -22,6 +22,15 @@ void SkConvertConfig8888Pixels(uint32_t* dstPixels,
                                int width,
                                int height);
 
+/**
+ * Packs a, r, g, b, values into byte order specified by config.
+ */
+uint32_t SkPackConfig8888(SkCanvas::Config8888 config,
+                          uint32_t a,
+                          uint32_t r,
+                          uint32_t g,
+                          uint32_t b);
+
 namespace {
 
 /**
index dd1b276..5f2c43b 100644 (file)
@@ -1787,6 +1787,35 @@ bool GrContext::internalReadTexturePixels(GrTexture* texture,
     }
 }
 
+#include "SkConfig8888.h"
+
+namespace {
+/**
+ * Converts a GrPixelConfig to a SkCanvas::Config8888. Only byte-per-channel
+ * formats are representable as Config8888 and so the function returns false
+ * if the GrPixelConfig has no equivalent Config8888.
+ */
+bool grconfig_to_config8888(GrPixelConfig config,
+                            SkCanvas::Config8888* config8888) {
+    switch (config) {
+        case kRGBA_8888_PM_GrPixelConfig:
+            *config8888 = SkCanvas::kRGBA_Premul_Config8888;
+            return true;
+        case kRGBA_8888_UPM_GrPixelConfig:
+            *config8888 = SkCanvas::kRGBA_Unpremul_Config8888;
+            return true;
+        case kBGRA_8888_PM_GrPixelConfig:
+            *config8888 = SkCanvas::kBGRA_Premul_Config8888;
+            return true;
+        case kBGRA_8888_UPM_GrPixelConfig:
+            *config8888 = SkCanvas::kBGRA_Unpremul_Config8888;
+            return true;
+        default:
+            return false;
+    }
+}
+}
+
 bool GrContext::internalReadRenderTargetPixels(GrRenderTarget* target,
                                                int left, int top,
                                                int width, int height,
@@ -1803,19 +1832,34 @@ bool GrContext::internalReadRenderTargetPixels(GrRenderTarget* target,
             return false;
         }
     }
-    
-    // PM <-> UPM conversion requires a draw. Currently we only support drawing
-    // into a UPM target, not reading from a UPM texture. Thus, UPM->PM is not
-    // not supported at this time.
-    if (GrPixelConfigIsUnpremultiplied(target->config()) && 
-        !GrPixelConfigIsUnpremultiplied(config)) {
-        return false;
-    }
 
     if (!(kDontFlush_PixelOpsFlag & flags)) {
         this->flush();
     }
 
+    if (!GrPixelConfigIsUnpremultiplied(target->config()) &&
+        GrPixelConfigIsUnpremultiplied(config) &&
+        !fGpu->canPreserveReadWriteUnpremulPixels()) {
+        SkCanvas::Config8888 srcConfig8888, dstConfig8888;
+        if (!grconfig_to_config8888(target->config(), &srcConfig8888) ||
+            !grconfig_to_config8888(config, &dstConfig8888)) {
+            return false;
+        }
+        // do read back using target's own config
+        this->internalReadRenderTargetPixels(target,
+                                             left, top,
+                                             width, height,
+                                             target->config(),
+                                             buffer, rowBytes,
+                                             kDontFlush_PixelOpsFlag);
+        // sw convert the pixels to unpremul config
+        uint32_t* pixels = reinterpret_cast<uint32_t*>(buffer);
+        SkConvertConfig8888Pixels(pixels, rowBytes, dstConfig8888,
+                                  pixels, rowBytes, srcConfig8888,
+                                  width, height);
+        return true;
+    }
+
     GrTexture* src = target->asTexture();
     bool swapRAndB = NULL != src &&
                      fGpu->preferredReadPixelsConfig(config) ==
@@ -1968,6 +2012,28 @@ void GrContext::internalWriteRenderTargetPixels(GrRenderTarget* target,
         return;
     }
 #endif
+    if (!GrPixelConfigIsUnpremultiplied(target->config()) &&
+        GrPixelConfigIsUnpremultiplied(config) &&
+        !fGpu->canPreserveReadWriteUnpremulPixels()) {
+        SkCanvas::Config8888 srcConfig8888, dstConfig8888;
+        if (!grconfig_to_config8888(config, &srcConfig8888) ||
+            !grconfig_to_config8888(target->config(), &dstConfig8888)) {
+            return;
+        }
+        // allocate a tmp buffer and sw convert the pixels to premul
+        SkAutoSTMalloc<128 * 128, uint32_t> tmpPixels(width * height);
+        const uint32_t* src = reinterpret_cast<const uint32_t*>(buffer);
+        SkConvertConfig8888Pixels(tmpPixels.get(), 4 * width, dstConfig8888,
+                                  src, rowBytes, srcConfig8888,
+                                  width, height);
+        // upload the already premul pixels
+        this->internalWriteRenderTargetPixels(target,
+                                             left, top,
+                                             width, height,
+                                             target->config(),
+                                             tmpPixels, 4 * width, flags);
+        return;
+    }
 
     bool swapRAndB = fGpu->preferredReadPixelsConfig(config) ==
                      GrPixelConfigSwapRAndB(config);
index 28a0063..52282ed 100644 (file)
@@ -184,6 +184,13 @@ public:
     void forceRenderTargetFlush();
 
     /**
+     * If this returns true then a sequence that reads unpremultiplied pixels
+     * from a surface, writes back the same values, and reads them again will
+     * give the same pixel values back in both reads.
+     */
+    virtual bool canPreserveReadWriteUnpremulPixels() = 0;
+
+    /**
      * readPixels with some configs may be slow. Given a desired config this
      * function returns a fast-path config. The returned config must have the
      * same components, component sizes, and not require conversion between
index 868149f..7eecf91 100644 (file)
@@ -953,13 +953,22 @@ bool GrGLProgram::genProgram(const GrGLContextInfo& gl,
                             inCoverage.c_str(),
                             &segments.fFSCode);
         }
-        if (ProgramDesc::kNo_OutputPM == fProgramDesc.fOutputPM) {
-            segments.fFSCode.appendf("\t%s = %s.a <= 0.0 ? vec4(0,0,0,0) : vec4(%s.rgb / %s.a, %s.a);\n",
-                                     colorOutput.getName().c_str(),
-                                     colorOutput.getName().c_str(),
-                                     colorOutput.getName().c_str(),
-                                     colorOutput.getName().c_str(),
-                                     colorOutput.getName().c_str());
+        if (ProgramDesc::kUnpremultiplied_RoundDown_OutputConfig ==
+            fProgramDesc.fOutputConfig) {
+            segments.fFSCode.appendf("\t%s = %s.a <= 0.0 ? vec4(0,0,0,0) : vec4(floor(%s.rgb / %s.a * 255.0)/255.0, %s.a);\n",
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str());
+        } else if (ProgramDesc::kUnpremultiplied_RoundUp_OutputConfig ==
+                   fProgramDesc.fOutputConfig) {
+            segments.fFSCode.appendf("\t%s = %s.a <= 0.0 ? vec4(0,0,0,0) : vec4(ceil(%s.rgb / %s.a * 255.0)/255.0, %s.a);\n",
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str(),
+                                        colorOutput.getName().c_str());
         }
     }
 
@@ -1809,13 +1818,16 @@ void GrGLProgram::genStageCode(const GrGLContextInfo& gl,
 
     };
 
+    static const uint32_t kMulByAlphaMask =
+        (StageDesc::kMulRGBByAlpha_RoundUp_InConfigFlag |
+         StageDesc::kMulRGBByAlpha_RoundDown_InConfigFlag);
+
     const char* swizzle = "";
     if (desc.fInConfigFlags & StageDesc::kSwapRAndB_InConfigFlag) {
         GrAssert(!(desc.fInConfigFlags & StageDesc::kSmearAlpha_InConfigFlag));
         swizzle = ".bgra";
     } else if (desc.fInConfigFlags & StageDesc::kSmearAlpha_InConfigFlag) {
-        GrAssert(!(desc.fInConfigFlags &
-                   StageDesc::kMulRGBByAlpha_InConfigFlag));
+        GrAssert(!(desc.fInConfigFlags & kMulByAlphaMask));
         swizzle = ".aaaa";
     } 
 
@@ -1843,30 +1855,37 @@ void GrGLProgram::genStageCode(const GrGLContextInfo& gl,
 
     switch (desc.fFetchMode) {
     case StageDesc::k2x2_FetchMode:
-        GrAssert(!(desc.fInConfigFlags &
-                   StageDesc::kMulRGBByAlpha_InConfigFlag));
+        GrAssert(!(desc.fInConfigFlags & kMulByAlphaMask));
         gen2x2FS(stageNum, segments, locations, &sampleCoords,
             samplerName, texelSizeName, swizzle, fsOutColor,
             texFunc, modulate, complexCoord, coordDims);
         break;
     case StageDesc::kConvolution_FetchMode:
-        GrAssert(!(desc.fInConfigFlags &
-                   StageDesc::kMulRGBByAlpha_InConfigFlag));
+        GrAssert(!(desc.fInConfigFlags & kMulByAlphaMask));
         genConvolutionFS(stageNum, desc, segments,
             samplerName, kernel, swizzle, imageIncrementName, fsOutColor,
             sampleCoords, texFunc, modulate);
         break;
     default:
-        if (desc.fInConfigFlags & StageDesc::kMulRGBByAlpha_InConfigFlag) {
+        if (desc.fInConfigFlags & kMulByAlphaMask) {
+            // only one of the mul by alpha flags should be set
+            GrAssert(GrIsPow2(kMulByAlphaMask & desc.fInConfigFlags));
             GrAssert(!(desc.fInConfigFlags & 
                        StageDesc::kSmearAlpha_InConfigFlag));
             segments->fFSCode.appendf("\t%s = %s(%s, %s)%s;\n",
                                       fsOutColor, texFunc.c_str(), 
                                       samplerName, sampleCoords.c_str(),
                                       swizzle);
-            segments->fFSCode.appendf("\t%s = vec4(%s.rgb*%s.a,%s.a)%s;\n",
-                                      fsOutColor, fsOutColor, fsOutColor,
-                                      fsOutColor, modulate.c_str());
+            if (desc.fInConfigFlags &
+                StageDesc::kMulRGBByAlpha_RoundUp_InConfigFlag) {
+                segments->fFSCode.appendf("\t%s = vec4(ceil(%s.rgb*%s.a*255.0)/255.0,%s.a)%s;\n",
+                                          fsOutColor, fsOutColor, fsOutColor,
+                                          fsOutColor, modulate.c_str());
+            } else {
+                segments->fFSCode.appendf("\t%s = vec4(floor(%s.rgb*%s.a*255.0)/255.0,%s.a)%s;\n",
+                                          fsOutColor, fsOutColor, fsOutColor,
+                                          fsOutColor, modulate.c_str());
+            }
         } else {
             segments->fFSCode.appendf("\t%s = %s(%s, %s)%s%s;\n",
                                       fsOutColor, texFunc.c_str(), 
index 5c13679..e9030bc 100644 (file)
@@ -86,13 +86,17 @@ public:
             memset(this, 0, sizeof(ProgramDesc));
         }
 
-        enum OutputPM {
+        enum OutputConfig {
             // PM-color OR color with no alpha channel
-            kYes_OutputPM,
-            // nonPM-color with alpha channel
-            kNo_OutputPM,
-
-            kOutputPMCnt
+            kPremultiplied_OutputConfig,
+            // nonPM-color with alpha channel. Round components up after
+            // dividing by alpha. Assumes output is 8 bits for r, g, and b
+            kUnpremultiplied_RoundUp_OutputConfig,
+            // nonPM-color with alpha channel. Round components down after
+            // dividing by alpha. Assumes output is 8 bits for r, g, and b
+            kUnpremultiplied_RoundDown_OutputConfig,
+
+            kOutputConfigCnt
         };
 
         struct StageDesc {
@@ -114,7 +118,7 @@ public:
               described are performed after reading a texel.
              */
             enum InConfigFlags {
-                kNone_InConfigFlag              = 0x0,
+                kNone_InConfigFlag                      = 0x0,
 
                 /**
                   Swap the R and B channels. This is incompatible with
@@ -122,21 +126,27 @@ public:
                   the shader using GL_ARB_texture_swizzle if possible rather
                   than setting this flag.
                  */
-                kSwapRAndB_InConfigFlag         = 0x1,
+                kSwapRAndB_InConfigFlag                 = 0x1,
 
                 /**
                  Smear alpha across all four channels. This is incompatible with
-                 kSwapRAndB and kPremul.  It is prefereable to perform the
-                 smear outside the shader using GL_ARB_texture_swizzle if
+                 kSwapRAndB and kMulRGBByAlpha*. It is prefereable to perform
+                 the smear outside the shader using GL_ARB_texture_swizzle if
                  possible rather than setting this flag.
                 */
-                kSmearAlpha_InConfigFlag        = 0x2,
+                kSmearAlpha_InConfigFlag                = 0x2,
 
                 /**
                  Multiply r,g,b by a after texture reads. This flag incompatible
                  with kSmearAlpha and may only be used with FetchMode kSingle.
+
+                 It is assumed the src texture has 8bit color components. After
+                 reading the texture one version rounds up to the next multiple
+                 of 1/255.0 and the other rounds down. At most one of these
+                 flags may be set.
                  */
-                kMulRGBByAlpha_InConfigFlag     =  0x4,
+                kMulRGBByAlpha_RoundUp_InConfigFlag     =  0x4,
+                kMulRGBByAlpha_RoundDown_InConfigFlag   =  0x8,
 
                 kDummyInConfigFlag,
                 kInConfigBitMask = (kDummyInConfigFlag-1) |
@@ -211,7 +221,7 @@ public:
 
         uint8_t fColorInput;        // casts to enum ColorInput
         uint8_t fCoverageInput;     // casts to enum CoverageInput
-        uint8_t fOutputPM;          // cases to enum OutputPM
+        uint8_t fOutputConfig;      // casts to enum OutputConfig
         uint8_t fDualSrcOutput;     // casts to enum DualSrcOutput
         int8_t fFirstCoverageStage;
         SkBool8 fEmitsPointSize;
index 68f33b1..bee2017 100644 (file)
@@ -207,6 +207,7 @@ GrGpuGL::GrGpuGL(const GrGLContextInfo& ctxInfo) : fGLContextInfo(ctxInfo) {
     this->initCaps();
 
     fLastSuccessfulStencilFmtIdx = 0;
+    fCanPreserveUnpremulRoundtrip = kUnknown_CanPreserveUnpremulRoundtrip;
 }
 
 GrGpuGL::~GrGpuGL() {
@@ -292,6 +293,85 @@ void GrGpuGL::initCaps() {
     fCaps.fFSAASupport = GrGLCaps::kNone_MSFBOType != this->glCaps().msFBOType();
 }
 
+bool GrGpuGL::canPreserveReadWriteUnpremulPixels() {
+    if (kUnknown_CanPreserveUnpremulRoundtrip ==
+        fCanPreserveUnpremulRoundtrip) {
+
+        SkAutoTMalloc<uint32_t> data(256 * 256 * 3);
+        uint32_t* srcData = data.get();
+        uint32_t* firstRead = data.get() + 256 * 256;
+        uint32_t* secondRead = data.get() + 2 * 256 * 256;
+
+        for (int y = 0; y < 256; ++y) {
+            for (int x = 0; x < 256; ++x) {
+                uint8_t* color = reinterpret_cast<uint8_t*>(&srcData[256*y + x]);
+                color[3] = y;
+                color[2] = x;
+                color[1] = x;
+                color[0] = x;
+            }
+        }
+
+        // We have broader support for read/write pixels on render targets
+        // than on textures.
+        GrTextureDesc dstDesc;
+        dstDesc.fFlags = kRenderTarget_GrTextureFlagBit |
+                         kNoStencil_GrTextureFlagBit;
+        dstDesc.fWidth = 256;
+        dstDesc.fHeight = 256;
+        dstDesc.fConfig = kRGBA_8888_GrPixelConfig;
+        dstDesc.fSampleCnt = 0;
+
+        SkAutoTUnref<GrTexture> dstTex(this->createTexture(dstDesc, NULL, 0));
+        if (!dstTex.get()) {
+            return false;
+        }
+        GrRenderTarget* rt = dstTex.get()->asRenderTarget();
+        GrAssert(NULL != rt);
+
+        bool failed = true;
+        static const UnpremulConversion gMethods[] = {
+            kUpOnWrite_DownOnRead_UnpremulConversion,
+            kDownOnWrite_UpOnRead_UnpremulConversion,
+        };
+
+        // pretend that we can do the roundtrip to avoid recursive calls to
+        // this function
+        fCanPreserveUnpremulRoundtrip = kYes_CanPreserveUnpremulRoundtrip;
+        for (size_t i = 0; i < GR_ARRAY_COUNT(gMethods) && failed; ++i) {
+            fUnpremulConversion = gMethods[i];
+            rt->writePixels(0, 0,
+                            256, 256,
+                            kRGBA_8888_UPM_GrPixelConfig, srcData, 0);
+            rt->readPixels(0, 0,
+                           256, 256,
+                           kRGBA_8888_UPM_GrPixelConfig, firstRead, 0);
+            rt->writePixels(0, 0,
+                            256, 256,
+                            kRGBA_8888_UPM_GrPixelConfig, firstRead, 0);
+            rt->readPixels(0, 0,
+                           256, 256,
+                           kRGBA_8888_UPM_GrPixelConfig, secondRead, 0);
+            failed = false;
+            for (int j = 0; j < 256 * 256; ++j) {
+                if (firstRead[j] != secondRead[j]) {
+                    failed = true;
+                    break;
+                }
+            }
+        }
+        fCanPreserveUnpremulRoundtrip = failed ? 
+                        kNo_CanPreserveUnpremulRoundtrip :
+                        kYes_CanPreserveUnpremulRoundtrip;
+    }
+
+    if (kYes_CanPreserveUnpremulRoundtrip == fCanPreserveUnpremulRoundtrip) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
 GrPixelConfig GrGpuGL::preferredReadPixelsConfig(GrPixelConfig config) const {
     if (GR_GL_RGBA_8888_PIXEL_OPS_SLOW && GrPixelConfigIsRGBA8888(config)) {
         return GrPixelConfigSwapRAndB(config);
index eac8d4f..398a2fc 100644 (file)
@@ -46,6 +46,8 @@ public:
                                     size_t rowBytes) const SK_OVERRIDE;
     virtual bool fullReadPixelsIsFasterThanPartial() const SK_OVERRIDE;
 
+    virtual bool canPreserveReadWriteUnpremulPixels() SK_OVERRIDE;
+
 protected:
     GrGpuGL(const GrGLContextInfo& ctxInfo);
 
@@ -62,6 +64,11 @@ protected:
         bool fSmoothLineEnabled;
     } fHWAAState;
 
+    enum UnpremulConversion {
+        kUpOnWrite_DownOnRead_UnpremulConversion,
+        kDownOnWrite_UpOnRead_UnpremulConversion
+    } fUnpremulConversion;
+
     GrDrawState fHWDrawState;
     bool        fHWStencilClip;
 
@@ -246,6 +253,11 @@ private:
     // from our loop that tries stencil formats and calls check fb status.
     int fLastSuccessfulStencilFmtIdx;
 
+    enum CanPreserveUnpremulRoundtrip {
+        kUnknown_CanPreserveUnpremulRoundtrip,
+        kNo_CanPreserveUnpremulRoundtrip,
+        kYes_CanPreserveUnpremulRoundtrip,
+    } fCanPreserveUnpremulRoundtrip;
 
     bool fPrintedCaps;
 
index 4093a0d..a0a2df5 100644 (file)
@@ -173,8 +173,9 @@ bool GrGpuGLShaders::programUnitTest() {
     static const int IN_CONFIG_FLAGS[] = {
         StageDesc::kNone_InConfigFlag,
         StageDesc::kSwapRAndB_InConfigFlag,
-        StageDesc::kSwapRAndB_InConfigFlag | StageDesc::kMulRGBByAlpha_InConfigFlag,
-        StageDesc::kMulRGBByAlpha_InConfigFlag,
+        StageDesc::kSwapRAndB_InConfigFlag |
+        StageDesc::kMulRGBByAlpha_RoundUp_InConfigFlag,
+        StageDesc::kMulRGBByAlpha_RoundDown_InConfigFlag,
         StageDesc::kSmearAlpha_InConfigFlag,
     };
     GrGLProgram program;
@@ -210,7 +211,7 @@ bool GrGpuGLShaders::programUnitTest() {
         pdesc.fExperimentalGS = this->getCaps().fGeometryShaderSupport &&
                                 random_bool(&random);
 #endif
-        pdesc.fOutputPM =  random_int(&random, ProgramDesc::kOutputPMCnt);
+        pdesc.fOutputConfig =  random_int(&random, ProgramDesc::kOutputConfigCnt);
 
         bool edgeAA = random_bool(&random);
         if (edgeAA) {
@@ -264,17 +265,20 @@ bool GrGpuGLShaders::programUnitTest() {
                 stage.fOptFlags |= StageDesc::kNoPerspective_OptFlagBit;
             }
             stage.setEnabled(VertexUsesStage(s, pdesc.fVertexLayout));
+            static const uint32_t kMulByAlphaMask =
+                StageDesc::kMulRGBByAlpha_RoundUp_InConfigFlag |
+                StageDesc::kMulRGBByAlpha_RoundDown_InConfigFlag;
             switch (stage.fFetchMode) {
                 case StageDesc::kSingle_FetchMode:
                     stage.fKernelWidth = 0;
                     break;
                 case StageDesc::kConvolution_FetchMode:
                     stage.fKernelWidth = random_int(&random, 2, 8);
-                    stage.fInConfigFlags &= ~StageDesc::kMulRGBByAlpha_InConfigFlag;
+                    stage.fInConfigFlags &= ~kMulByAlphaMask;
                     break;
                 case StageDesc::k2x2_FetchMode:
                     stage.fKernelWidth = 0;
-                    stage.fInConfigFlags &= ~StageDesc::kMulRGBByAlpha_InConfigFlag;
+                    stage.fInConfigFlags &= ~kMulByAlphaMask;
                     break;
             }
         }
@@ -1102,7 +1106,17 @@ void GrGpuGLShaders::buildProgram(GrPrimitiveType type,
                 }
             }
             if (GrPixelConfigIsUnpremultiplied(texture->config())) {
-                stage.fInConfigFlags |= StageDesc::kMulRGBByAlpha_InConfigFlag;
+                // The shader generator assumes that color channels are bytes
+                // when rounding.
+                GrAssert(4 == GrBytesPerPixel(texture->config()));
+                if (kUpOnWrite_DownOnRead_UnpremulConversion ==
+                    fUnpremulConversion) {
+                    stage.fInConfigFlags |=
+                        StageDesc::kMulRGBByAlpha_RoundDown_InConfigFlag;
+                } else {
+                    stage.fInConfigFlags |=
+                        StageDesc::kMulRGBByAlpha_RoundUp_InConfigFlag;
+                }
             }
 
             if (sampler.getFilter() == GrSamplerState::kConvolution_Filter) {
@@ -1120,9 +1134,18 @@ void GrGpuGLShaders::buildProgram(GrPrimitiveType type,
     }
 
     if (GrPixelConfigIsUnpremultiplied(drawState.getRenderTarget()->config())) {
-        desc.fOutputPM = ProgramDesc::kNo_OutputPM;
+        // The shader generator assumes that color channels are bytes
+        // when rounding.
+        GrAssert(4 == GrBytesPerPixel(drawState.getRenderTarget()->config()));
+        if (kUpOnWrite_DownOnRead_UnpremulConversion == fUnpremulConversion) {
+            desc.fOutputConfig =
+                ProgramDesc::kUnpremultiplied_RoundUp_OutputConfig;
+        } else {
+            desc.fOutputConfig =
+                ProgramDesc::kUnpremultiplied_RoundDown_OutputConfig;
+        }
     } else {
-        desc.fOutputPM = ProgramDesc::kYes_OutputPM;
+        desc.fOutputConfig = ProgramDesc::kPremultiplied_OutputConfig;
     }
 
     desc.fDualSrcOutput = ProgramDesc::kNone_DualSrcOutput;
diff --git a/tests/PremulAlphaRoundTripTest.cpp b/tests/PremulAlphaRoundTripTest.cpp
new file mode 100644 (file)
index 0000000..c4ec6ab
--- /dev/null
@@ -0,0 +1,106 @@
+
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "Test.h"
+#include "SkCanvas.h"
+#include "SkConfig8888.h"
+#include "SkGpuDevice.h"
+
+
+namespace {
+
+void fillCanvas(SkCanvas* canvas, SkCanvas::Config8888 unpremulConfig) {
+    SkBitmap bmp;
+    bmp.setConfig(SkBitmap::kARGB_8888_Config, 256, 256);
+    bmp.allocPixels();
+    SkAutoLockPixels alp(bmp);
+    uint32_t* pixels = reinterpret_cast<uint32_t*>(bmp.getPixels());
+
+    for (int a = 0; a < 256; ++a) {
+        for (int r = 0; r < 256; ++r) {
+            pixels[a * 256 + r] = SkPackConfig8888(unpremulConfig, a, r, 0, 0);
+        }
+    }
+    canvas->writePixels(bmp, 0, 0, unpremulConfig);
+}
+
+static const SkCanvas::Config8888 gUnpremulConfigs[] = {
+    SkCanvas::kNative_Unpremul_Config8888,
+/**
+ * There is a bug in Ganesh (http://code.google.com/p/skia/issues/detail?id=438)
+ * that causes the readback of pixels from BGRA canvas to an RGBA bitmap to
+ * fail. This should be removed as soon as the issue above is resolved.
+ */
+#if !defined(SK_BUILD_FOR_ANDROID)
+    SkCanvas::kBGRA_Unpremul_Config8888,
+#endif
+    SkCanvas::kRGBA_Unpremul_Config8888,
+};
+
+void PremulAlphaRoundTripTest(skiatest::Reporter* reporter,
+                              GrContext* context) {
+    SkCanvas canvas;
+    for (int dtype = 0; dtype < 2; ++dtype) {
+        if (0 == dtype) {
+            canvas.setDevice(new SkDevice(SkBitmap::kARGB_8888_Config,
+                                          256,
+                                          256,
+                                          false))->unref();
+        } else {
+#if SK_SCALAR_IS_FIXED
+            // GPU device known not to work in the fixed pt build.
+            continue;
+#endif
+            canvas.setDevice(new SkGpuDevice(context,
+                                             SkBitmap::kARGB_8888_Config,
+                                             256,
+                                             256))->unref();
+        }
+
+        SkBitmap readBmp1;
+        readBmp1.setConfig(SkBitmap::kARGB_8888_Config, 256, 256);
+        readBmp1.allocPixels();
+        SkBitmap readBmp2;
+        readBmp2.setConfig(SkBitmap::kARGB_8888_Config, 256, 256);
+        readBmp2.allocPixels();
+
+        for (size_t upmaIdx = 0;
+             upmaIdx < SK_ARRAY_COUNT(gUnpremulConfigs);
+             ++upmaIdx) {
+            fillCanvas(&canvas, gUnpremulConfigs[upmaIdx]);
+            {
+                SkAutoLockPixels alp1(readBmp1);
+                SkAutoLockPixels alp2(readBmp2);
+                sk_bzero(readBmp1.getPixels(), readBmp1.getSafeSize());
+                sk_bzero(readBmp2.getPixels(), readBmp2.getSafeSize());
+            }
+
+            canvas.readPixels(&readBmp1, 0, 0, gUnpremulConfigs[upmaIdx]);
+            canvas.writePixels(readBmp1, 0, 0, gUnpremulConfigs[upmaIdx]);
+            canvas.readPixels(&readBmp2, 0, 0, gUnpremulConfigs[upmaIdx]);
+
+            SkAutoLockPixels alp1(readBmp1);
+            SkAutoLockPixels alp2(readBmp2);
+            uint32_t* pixels1 =
+                reinterpret_cast<uint32_t*>(readBmp1.getPixels());
+            uint32_t* pixels2 =
+                reinterpret_cast<uint32_t*>(readBmp2.getPixels());
+            for (int y = 0; y < 256; ++y) {
+                for (int x = 0; x < 256; ++x) {
+                    int i = y * 256 + x;
+                    REPORTER_ASSERT(reporter, pixels1[i] == pixels2[i]);
+                }
+            }
+        }
+    }
+}
+}
+
+#include "TestClassDef.h"
+DEFINE_GPUTESTCLASS("PremulAlphaRoundTripTest", PremulAlphaRoundTripTestClass, PremulAlphaRoundTripTest)
+