// used for cheap 2x2 dithering when the colors are opaque
void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
+///////////////////////////////////////////////////////////////////////////////
+
+static inline int SkUpscale31To32(int value) {
+ SkASSERT((unsigned)value <= 31);
+ return value + (value >> 4);
+}
+
+static inline int SkBlend32(int src, int dst, int scale) {
+ SkASSERT((unsigned)src <= 0xFF);
+ SkASSERT((unsigned)dst <= 0xFF);
+ SkASSERT((unsigned)scale <= 32);
+ return dst + ((src - dst) * scale >> 5);
+}
+
+static SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
+ SkPMColor dst, uint16_t mask) {
+ if (mask == 0) {
+ return dst;
+ }
+
+ /* We want all of these in 5bits, hence the shifts in case one of them
+ * (green) is 6bits.
+ */
+ int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
+ int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
+ int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
+
+ // Now upscale them to 0..32, so we can use blend32
+ maskR = SkUpscale31To32(maskR);
+ maskG = SkUpscale31To32(maskG);
+ maskB = SkUpscale31To32(maskB);
+
+ // srcA has been upscaled to 256 before passed into this function
+ maskR = maskR * srcA >> 8;
+ maskG = maskG * srcA >> 8;
+ maskB = maskB * srcA >> 8;
+
+ int dstR = SkGetPackedR32(dst);
+ int dstG = SkGetPackedG32(dst);
+ int dstB = SkGetPackedB32(dst);
+
+ // LCD blitting is only supported if the dst is known/required
+ // to be opaque
+ return SkPackARGB32(0xFF,
+ SkBlend32(srcR, dstR, maskR),
+ SkBlend32(srcG, dstG, maskG),
+ SkBlend32(srcB, dstB, maskB));
+}
+
+static SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
+ SkPMColor dst, uint16_t mask,
+ SkPMColor opaqueDst) {
+ if (mask == 0) {
+ return dst;
+ }
+
+ if (0xFFFF == mask) {
+ return opaqueDst;
+ }
+
+ /* We want all of these in 5bits, hence the shifts in case one of them
+ * (green) is 6bits.
+ */
+ int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
+ int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
+ int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
+
+ // Now upscale them to 0..32, so we can use blend32
+ maskR = SkUpscale31To32(maskR);
+ maskG = SkUpscale31To32(maskG);
+ maskB = SkUpscale31To32(maskB);
+
+ int dstR = SkGetPackedR32(dst);
+ int dstG = SkGetPackedG32(dst);
+ int dstB = SkGetPackedB32(dst);
+
+ // LCD blitting is only supported if the dst is known/required
+ // to be opaque
+ return SkPackARGB32(0xFF,
+ SkBlend32(srcR, dstR, maskR),
+ SkBlend32(srcG, dstG, maskG),
+ SkBlend32(srcB, dstB, maskB));
+}
+
+static void SkBlitLCD16Row(SkPMColor dst[], const uint16_t src[],
+ SkColor color, int width, SkPMColor) {
+ int srcA = SkColorGetA(color);
+ int srcR = SkColorGetR(color);
+ int srcG = SkColorGetG(color);
+ int srcB = SkColorGetB(color);
+
+ srcA = SkAlpha255To256(srcA);
+
+ for (int i = 0; i < width; i++) {
+ dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], src[i]);
+ }
+}
+
+static void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t src[],
+ SkColor color, int width,
+ SkPMColor opaqueDst) {
+ int srcR = SkColorGetR(color);
+ int srcG = SkColorGetG(color);
+ int srcB = SkColorGetB(color);
+
+ for (int i = 0; i < width; i++) {
+ dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], src[i],
+ opaqueDst);
+ }
+}
+
#endif
typedef void (*ColorProc)(void* dst, size_t dstRB,
const void* mask, size_t maskRB,
SkColor color, int width, int height);
+
+ /**
+ * Function pointer that blits a row of mask(lcd16) into a row of dst
+ * colorized by a single color. The number of pixels to blit is specified
+ * by width.
+ */
+ typedef void (*BlitLCD16RowProc)(SkPMColor dst[], const uint16_t src[],
+ SkColor color, int width,
+ SkPMColor opaqueDst);
/**
* Function pointer that blits a row of src colors through a row of a mask
* or NULL if no optimized routine is available.
*/
static ColorProc PlatformColorProcs(SkBitmap::Config, SkMask::Format, SkColor);
+
+ /**
+ * Public entry-point to return a blitcolor BlitLCD16RowProc.
+ */
+ static BlitLCD16RowProc BlitLCD16RowFactory(bool isOpaque);
+
+ /**
+ * Return either platform specific optimized blitcolor BlitLCD16RowProc,
+ * or NULL if no optimized routine is available.
+ */
+ static BlitLCD16RowProc PlatformBlitRowProcs16(bool isOpaque);
enum RowFlags {
kSrcIsOpaque_RowFlag = 1 << 0
} while (--height != 0);
}
-///////////////////////////////////////////////////////////////////////////////
-
-static inline int upscale31To32(int value) {
- SkASSERT((unsigned)value <= 31);
- return value + (value >> 4);
-}
-
-static inline int blend32(int src, int dst, int scale) {
- SkASSERT((unsigned)src <= 0xFF);
- SkASSERT((unsigned)dst <= 0xFF);
- SkASSERT((unsigned)scale <= 32);
- return dst + ((src - dst) * scale >> 5);
-}
-
-static void blit_lcd16_row(SkPMColor dst[], const uint16_t src[],
- SkColor color, int width, SkPMColor) {
- int srcA = SkColorGetA(color);
- int srcR = SkColorGetR(color);
- int srcG = SkColorGetG(color);
- int srcB = SkColorGetB(color);
-
- srcA = SkAlpha255To256(srcA);
-
- for (int i = 0; i < width; i++) {
- uint16_t mask = src[i];
- if (0 == mask) {
- continue;
- }
-
- SkPMColor d = dst[i];
-
- /* We want all of these in 5bits, hence the shifts in case one of them
- * (green) is 6bits.
- */
- int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
- int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
- int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
-
- // Now upscale them to 0..32, so we can use blend32
- maskR = upscale31To32(maskR);
- maskG = upscale31To32(maskG);
- maskB = upscale31To32(maskB);
-
- maskR = maskR * srcA >> 8;
- maskG = maskG * srcA >> 8;
- maskB = maskB * srcA >> 8;
-
- int dstR = SkGetPackedR32(d);
- int dstG = SkGetPackedG32(d);
- int dstB = SkGetPackedB32(d);
-
- // LCD blitting is only supported if the dst is known/required
- // to be opaque
- dst[i] = SkPackARGB32(0xFF,
- blend32(srcR, dstR, maskR),
- blend32(srcG, dstG, maskG),
- blend32(srcB, dstB, maskB));
+SkBlitMask::BlitLCD16RowProc SkBlitMask::BlitLCD16RowFactory(bool isOpaque) {
+ BlitLCD16RowProc proc = PlatformBlitRowProcs16(isOpaque);
+ if (proc) {
+ return proc;
}
-}
-
-static void blit_lcd16_opaque_row(SkPMColor dst[], const uint16_t src[],
- SkColor color, int width, SkPMColor opaqueDst) {
- int srcR = SkColorGetR(color);
- int srcG = SkColorGetG(color);
- int srcB = SkColorGetB(color);
- for (int i = 0; i < width; i++) {
- uint16_t mask = src[i];
- if (0 == mask) {
- continue;
- }
- if (0xFFFF == mask) {
- dst[i] = opaqueDst;
- continue;
- }
-
- SkPMColor d = dst[i];
-
- /* We want all of these in 5bits, hence the shifts in case one of them
- * (green) is 6bits.
- */
- int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
- int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
- int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
-
- // Now upscale them to 0..32, so we can use blend32
- maskR = upscale31To32(maskR);
- maskG = upscale31To32(maskG);
- maskB = upscale31To32(maskB);
-
- int dstR = SkGetPackedR32(d);
- int dstG = SkGetPackedG32(d);
- int dstB = SkGetPackedB32(d);
-
- // LCD blitting is only supported if the dst is known/required
- // to be opaque
- dst[i] = SkPackARGB32(0xFF,
- blend32(srcR, dstR, maskR),
- blend32(srcG, dstG, maskG),
- blend32(srcB, dstB, maskB));
+ if (isOpaque) {
+ return SkBlitLCD16OpaqueRow;
+ } else {
+ return SkBlitLCD16Row;
}
}
const uint16_t* srcRow = (const uint16_t*)mask;
SkPMColor opaqueDst;
- void (*proc)(SkPMColor dst[], const uint16_t src[],
- SkColor color, int width, SkPMColor);
- if (0xFF == SkColorGetA(color)) {
- proc = blit_lcd16_opaque_row;
+ SkBlitMask::BlitLCD16RowProc proc = NULL;
+ bool isOpaque = (0xFF == SkColorGetA(color));
+ proc = SkBlitMask::BlitLCD16RowFactory(isOpaque);
+ SkASSERT(proc != NULL);
+
+ if (isOpaque) {
opaqueDst = SkPreMultiplyColor(color);
} else {
- proc = blit_lcd16_row;
opaqueDst = 0; // ignored
}
int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
// Now upscale them to 0..32, so we can use blend32
- maskR = upscale31To32(maskR);
- maskG = upscale31To32(maskG);
- maskB = upscale31To32(maskB);
+ maskR = SkUpscale31To32(maskR);
+ maskG = SkUpscale31To32(maskG);
+ maskB = SkUpscale31To32(maskB);
int dstR = SkGetPackedR32(d);
int dstG = SkGetPackedG32(d);
// LCD blitting is only supported if the dst is known/required
// to be opaque
dst[i] = SkPackARGB32(0xFF,
- blend32(srcR, dstR, maskR),
- blend32(srcG, dstG, maskG),
- blend32(srcB, dstB, maskB));
+ SkBlend32(srcR, dstR, maskR),
+ SkBlend32(srcG, dstG, maskG),
+ SkBlend32(srcB, dstB, maskB));
}
}
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
size_t maskRB, SkColor origColor,
- int width, int height)
-{
+ int width, int height) {
SkPMColor color = SkPreMultiplyColor(origColor);
size_t dstOffset = dstRB - (width << 2);
size_t maskOffset = maskRB - width;
mask += maskOffset;
} while (--height != 0);
}
+
+static __m128i SkBlendLCD16_SSE2(__m128i &srci, __m128i &dst,
+ __m128i &mask, __m128i &scale) {
+ // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
+ __m128i r = _mm_and_si128(_mm_slli_epi32(mask,
+ 16-SK_R16_SHIFT-(SK_R16_BITS-5)),
+ _mm_set1_epi32(0x001F0000));
+
+ __m128i g = _mm_and_si128(_mm_slli_epi32(mask,
+ 8-SK_G16_SHIFT-(SK_G16_BITS-5)),
+ _mm_set1_epi32(0x00001F00));
+
+ __m128i b = _mm_and_si128(_mm_slli_epi32(mask,
+ SK_B16_BITS-5),
+ _mm_set1_epi32(0x0000001F));
+
+ // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
+ mask = _mm_or_si128(_mm_or_si128(r, g), b);
+
+ // Interleave R,G,B into the lower byte of word.
+ __m128i maskLo, maskHi;
+ maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
+ maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
+
+ // Upscale to 0..32
+ maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
+ maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
+
+ maskLo = _mm_mullo_epi16(maskLo, scale);
+ maskHi = _mm_mullo_epi16(maskHi, scale);
+
+ maskLo = _mm_srli_epi16(maskLo, 8);
+ maskHi = _mm_srli_epi16(maskHi, 8);
+
+ // Interleave R,G,B into the lower byte of the word.
+ __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
+ __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
+
+ maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(srci, dstLo));
+ maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(srci, dstHi));
+
+ maskLo = _mm_srai_epi16(maskLo, 5);
+ maskHi = _mm_srai_epi16(maskHi, 5);
+
+ // Add two pixels into result.
+ __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
+ __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
+
+ // Pack into 4 32bit dst pixels
+ return _mm_packus_epi16(resultLo, resultHi);
+}
+
+static __m128i SkBlendLCD16Opaque_SSE2(__m128i &srci, __m128i &dst,
+ __m128i &mask) {
+ // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
+ __m128i r = _mm_and_si128(_mm_slli_epi32(mask,
+ 16-SK_R16_SHIFT-(SK_R16_BITS-5)),
+ _mm_set1_epi32(0x001F0000));
+
+ __m128i g = _mm_and_si128(_mm_slli_epi32(mask,
+ 8-SK_G16_SHIFT-(SK_G16_BITS-5)),
+ _mm_set1_epi32(0x00001F00));
+
+ __m128i b = _mm_and_si128(_mm_slli_epi32(mask, SK_B16_BITS-5),
+ _mm_set1_epi32(0x0000001F));
+
+ // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
+ mask = _mm_or_si128(_mm_or_si128(r, g), b);
+
+ // Interleave R,G,B into the lower byte of word.
+ __m128i maskLo, maskHi;
+ maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
+ maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
+
+ // Upscale to 0..32
+ maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
+ maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
+
+ // Interleave R,G,B into the lower byte of the word.
+ __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
+ __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
+
+ maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(srci, dstLo));
+ maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(srci, dstHi));
+
+ maskLo = _mm_srai_epi16(maskLo, 5);
+ maskHi = _mm_srai_epi16(maskHi, 5);
+
+ // Add two pixels into result.
+ __m128i resultLo = _mm_add_epi16(dstLo, maskLo);
+ __m128i resultHi = _mm_add_epi16(dstHi, maskHi);
+
+ // Pack into 4 32bit dst pixels
+ return _mm_packus_epi16(resultLo, resultHi);
+}
+
+void SkBlitLCD16Row_SSE2(SkPMColor dst[], const uint16_t src[],
+ SkColor color, int width, SkPMColor) {
+ if (width <= 0) {
+ return;
+ }
+
+ int srcA = SkColorGetA(color);
+ int srcR = SkColorGetR(color);
+ int srcG = SkColorGetG(color);
+ int srcB = SkColorGetB(color);
+
+ srcA = SkAlpha255To256(srcA);
+
+ if (width >= 4) {
+ SkASSERT(((size_t)dst & 0x03) == 0);
+ while (((size_t)dst & 0x0F) != 0) {
+ *dst = SkBlendLCD16(srcA, srcR, srcG, srcB, *dst, *src);
+ src++;
+ dst++;
+ width--;
+ }
+
+ __m128i *d = reinterpret_cast<__m128i*>(dst);
+ __m128i srci = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
+ srci = _mm_unpacklo_epi8(srci, _mm_setzero_si128());
+ __m128i scale = _mm_set1_epi16(srcA);
+ while (width >= 4) {
+ __m128i dst_pixel = _mm_load_si128(d);
+ __m128i mask_pixel = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(src));
+
+ // Check whether mask_pixels are equal to 0 and get the highest bit
+ // of each byte of result, if mask pixes are all zero, we will get
+ // pack_cmp to 0xFFFF
+ int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_pixel,
+ _mm_setzero_si128()));
+
+ // if mask pixels are not all zero, we will blend the dst pixels
+ if (pack_cmp != 0xFFFF) {
+ // Unpack 4 16bit mask pixels to
+ // (p0, 0, p1, 0, p2, 0, p3, 0)
+ mask_pixel = _mm_unpacklo_epi16(mask_pixel,
+ _mm_setzero_si128());
+
+ // Process 4 32bit dst pixels
+ __m128i result = SkBlendLCD16_SSE2(srci, dst_pixel,
+ mask_pixel, scale);
+ _mm_store_si128(d, result);
+ }
+
+ d++;
+ src += 4;
+ width -= 4;
+ }
+
+ dst = reinterpret_cast<SkPMColor*>(d);
+ }
+
+ while (width > 0) {
+ *dst = SkBlendLCD16(srcA, srcR, srcG, srcB, *dst, *src);
+ src++;
+ dst++;
+ width--;
+ }
+}
+
+void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t src[],
+ SkColor color, int width, SkPMColor opaqueDst) {
+ if (width <= 0) {
+ return;
+ }
+
+ int srcR = SkColorGetR(color);
+ int srcG = SkColorGetG(color);
+ int srcB = SkColorGetB(color);
+
+ if (width >= 4) {
+ SkASSERT(((size_t)dst & 0x03) == 0);
+ while (((size_t)dst & 0x0F) != 0) {
+ *dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *src, opaqueDst);
+ src++;
+ dst++;
+ width--;
+ }
+
+ __m128i *d = reinterpret_cast<__m128i*>(dst);
+ __m128i srci = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
+ srci = _mm_unpacklo_epi8(srci, _mm_setzero_si128());
+ while (width >= 4) {
+ __m128i dst_pixel = _mm_load_si128(d);
+ __m128i mask_pixel = _mm_loadl_epi64(
+ reinterpret_cast<const __m128i*>(src));
+
+ // Check whether mask_pixels are equal to 0 and get the highest bit
+ // of each byte of result, if mask pixes are all zero, we will get
+ // pack_cmp to 0xFFFF
+ int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_pixel,
+ _mm_setzero_si128()));
+
+ // if mask pixels are not all zero, we will blend the dst pixels
+ if (pack_cmp != 0xFFFF) {
+ // Unpack 4 16bit mask pixels to
+ // (p0, 0, p1, 0, p2, 0, p3, 0)
+ mask_pixel = _mm_unpacklo_epi16(mask_pixel,
+ _mm_setzero_si128());
+
+ // Process 4 32bit dst pixels
+ __m128i result = SkBlendLCD16Opaque_SSE2(srci, dst_pixel,
+ mask_pixel);
+ _mm_store_si128(d, result);
+ }
+
+ d++;
+ src += 4;
+ width -= 4;
+ }
+
+ dst = reinterpret_cast<SkPMColor*>(d);
+ }
+
+ while (width > 0) {
+ *dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *src, opaqueDst);
+ src++;
+ dst++;
+ width--;
+ }
+}
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* mask,
size_t maskRB, SkColor color,
int width, int height);
+
+void SkBlitLCD16Row_SSE2(SkPMColor dst[], const uint16_t src[],
+ SkColor color, int width, SkPMColor);
+void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t src[],
+ SkColor color, int width, SkPMColor opaqueDst);
return NULL;
}
+SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
+ return NULL;
+}
+
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
SkMask::Format maskFormat,
RowFlags flags) {
SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
SkMask::Format maskFormat,
SkColor color) {
- return NULL;
+ return NULL;
+}
+
+SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
+ return NULL;
}
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
return proc;
}
+SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
+ if (cachedHasSSE2()) {
+ if (isOpaque) {
+ return SkBlitLCD16OpaqueRow_SSE2;
+ } else {
+ return SkBlitLCD16Row_SSE2;
+ }
+ } else {
+ return NULL;
+ }
+
+}
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
SkMask::Format maskFormat,
RowFlags flags) {