{
sint16 y, cb, cr;
sint16 r, g, b;
-
int i;
+
+ /**
+ * The encoded YCbCr coeffectients are represented as 11.5 fixed-point numbers:
+ *
+ * 1 sign bit + 10 integer bits + 5 fractional bits
+ *
+ * However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
+ * In other words, the encoded coeffectients is scaled by << 5 when intepreted as sint16.
+ * It will be scaled down to original during the last RLGR encoding phase.
+ */
for (i = 0; i < 4096; i++)
{
r = y_r_buf[i];
g = cb_g_buf[i];
b = cr_b_buf[i];
- y = ((r >> 2) + (r >> 5) + (r >> 6)) + ((g >> 1) + (g >> 4) + (g >> 6) + (g >> 7)) +
- ((b >> 4) + (b >> 5) + (b >> 6) + (b >> 7));
- y_r_buf[i] = MINMAX(y, 0, 255) - 128;
- cb = 0 - ((r >> 3) + (r >> 5) + (r >> 6)) - ((g >> 2) + (g >> 4) + (g >> 6)) + (b >> 1);
- cb_g_buf[i] = MINMAX(cb, -128, 127);
- cr = (r >> 1) - ((g >> 2) + (g >> 3) + (g >> 5) + (g >> 6)) - ((b >> 4) + (b >> 6));
- cr_b_buf[i] = MINMAX(cr, -128, 127);
+ /* 0.299 << 5 = 1001.10010001(b), 0.587 << 5 = 10010.11001000(b), 0.114 << 5 = 11.10100101(b) */
+ y = ((r << 3) + (r) + (r >> 1) + (r >> 4) + (r >> 7)) +
+ ((g << 4) + (g << 1) + (g >> 1) + (g >> 2) + (g >> 5)) +
+ ((b << 1) + (b) + (b >> 1) + (b >> 3) + (b >> 6) + (b >> 7));
+ y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5);
+ /* 0.168935 << 5 = 101.01100111(b), 0.331665 << 5 = 1010.10011100(b), 0.50059 << 5 = 10000.00000100(b) */
+ cb = 0 - ((r << 2) + (r) + (r >> 2) + (r >> 3) + (r >> 5)) -
+ ((g << 3) + (g << 1) + (g >> 1) + (g >> 4) + (g >> 5) + (g >> 6)) +
+ ((b << 4) + (b >> 6));
+ cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5));
+ /* 0.499813 << 5 = 1111.11111110(b), 0.418531 << 5 = 1101.01100100(b), 0.081282 << 5 = 10.10011001(b) */
+ cr = ((r << 4) - (r >> 7)) -
+ ((g << 3) + (g << 2) + (g) + (g >> 2) + (g >> 3) + (g >> 6)) -
+ ((b << 1) + (b >> 1) + (b >> 4) + (b >> 5) + (b >> 7));
+ cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5));
}
}
}
}
+/* The encodec YCbCr coeffectients are represented as 11.5 fixed-point numbers. See rfx_encode.c */
static void rfx_encode_rgb_to_ycbcr_sse2(sint16* y_r_buffer, sint16* cb_g_buffer, sint16* cr_b_buffer)
{
- __m128i min = _mm_set1_epi16(-128);
- __m128i max = _mm_set1_epi16(127);
+ __m128i min = _mm_set1_epi16(-128 << 5);
+ __m128i max = _mm_set1_epi16(127 << 5);
__m128i* y_r_buf = (__m128i*) y_r_buffer;
__m128i* cb_g_buf = (__m128i*) cb_g_buffer;
/* b = cr_b_buf[i]; */
b = _mm_load_si128(&cr_b_buf[i]);
- /* y = ((r >> 2) + (r >> 5) + (r >> 6)) + ((g >> 1) + (g >> 4) + (g >> 6) + (g >> 7)) +
- ((b >> 4) + (b >> 5) + (b >> 6) + (b >> 7)); */
- /* y_r_buf[i] = MINMAX(y, 0, 255) - 128; */
- y = _mm_add_epi16(_mm_srai_epi16(r, 2), _mm_srai_epi16(r, 5));
- y = _mm_add_epi16(y, _mm_srai_epi16(r, 6));
+ /* y = ((r << 3) + (r) + (r >> 1) + (r >> 4) + (r >> 7)) +
+ ((g << 4) + (g << 1) + (g >> 1) + (g >> 2) + (g >> 5)) +
+ ((b << 1) + (b) + (b >> 1) + (b >> 3) + (b >> 6) + (b >> 7)); */
+ /* y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5); */
+ y = _mm_add_epi16(_mm_slli_epi16(r, 3), r);
+ y = _mm_add_epi16(y, _mm_srai_epi16(r, 1));
+ y = _mm_add_epi16(y, _mm_srai_epi16(r, 4));
+ y = _mm_add_epi16(y, _mm_srai_epi16(r, 7));
+ y = _mm_add_epi16(y, _mm_slli_epi16(g, 4));
+ y = _mm_add_epi16(y, _mm_slli_epi16(g, 1));
y = _mm_add_epi16(y, _mm_srai_epi16(g, 1));
- y = _mm_add_epi16(y, _mm_srai_epi16(g, 4));
- y = _mm_add_epi16(y, _mm_srai_epi16(g, 6));
- y = _mm_add_epi16(y, _mm_srai_epi16(g, 7));
- y = _mm_add_epi16(y, _mm_srai_epi16(b, 4));
- y = _mm_add_epi16(y, _mm_srai_epi16(b, 5));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 2));
+ y = _mm_add_epi16(y, _mm_srai_epi16(g, 5));
+ y = _mm_add_epi16(y, _mm_slli_epi16(b, 1));
+ y = _mm_add_epi16(y, b);
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 1));
+ y = _mm_add_epi16(y, _mm_srai_epi16(b, 3));
y = _mm_add_epi16(y, _mm_srai_epi16(b, 6));
y = _mm_add_epi16(y, _mm_srai_epi16(b, 7));
y = _mm_add_epi16(y, min);
_mm_between_epi16(y, min, max);
_mm_store_si128(&y_r_buf[i], y);
- /* cb = 0 - ((r >> 3) + (r >> 5) + (r >> 6)) - ((g >> 2) + (g >> 4) + (g >> 6)) + (b >> 1); */
- /* cb_g_buf[i] = MINMAX(cb, -128, 127); */
- cb = _mm_sub_epi16(_mm_srai_epi16(b, 1), _mm_srai_epi16(r, 3));
+ /* cb = 0 - ((r << 2) + (r) + (r >> 2) + (r >> 3) + (r >> 5)) -
+ ((g << 3) + (g << 1) + (g >> 1) + (g >> 4) + (g >> 5) + (g >> 6)) +
+ ((b << 4) + (b >> 6)); */
+ /* cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5)); */
+ cb = _mm_add_epi16(_mm_slli_epi16(b, 4), _mm_srai_epi16(b, 6));
+ cb = _mm_sub_epi16(cb, _mm_slli_epi16(r, 2));
+ cb = _mm_sub_epi16(cb, r);
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 2));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 3));
cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 5));
- cb = _mm_sub_epi16(cb, _mm_srai_epi16(r, 6));
- cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 2));
+ cb = _mm_sub_epi16(cb, _mm_slli_epi16(g, 3));
+ cb = _mm_sub_epi16(cb, _mm_slli_epi16(g, 1));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 1));
cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 4));
+ cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 5));
cb = _mm_sub_epi16(cb, _mm_srai_epi16(g, 6));
_mm_between_epi16(cb, min, max);
_mm_store_si128(&cb_g_buf[i], cb);
- /* cr = (r >> 1) - ((g >> 2) + (g >> 3) + (g >> 5) + (g >> 6)) - ((b >> 4) + (b >> 6)); */
- /* cr_b_buf[i] = MINMAX(cr, -128, 127); */
- cr = _mm_sub_epi16(_mm_srai_epi16(r, 1), _mm_srai_epi16(g, 2));
+ /* cr = ((r << 4) - (r >> 7)) -
+ ((g << 3) + (g << 2) + (g) + (g >> 2) + (g >> 3) + (g >> 6)) -
+ ((b << 1) + (b >> 1) + (b >> 4) + (b >> 5) + (b >> 7)); */
+ /* cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5)); */
+ cr = _mm_sub_epi16(_mm_slli_epi16(r, 4), _mm_srai_epi16(r, 7));
+ cr = _mm_sub_epi16(cr, _mm_slli_epi16(g, 3));
+ cr = _mm_sub_epi16(cr, _mm_slli_epi16(g, 2));
+ cr = _mm_sub_epi16(cr, g);
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 2));
cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 3));
- cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 5));
cr = _mm_sub_epi16(cr, _mm_srai_epi16(g, 6));
+ cr = _mm_sub_epi16(cr, _mm_slli_epi16(b, 1));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 1));
cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 4));
- cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 6));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 5));
+ cr = _mm_sub_epi16(cr, _mm_srai_epi16(b, 7));
_mm_between_epi16(cr, min, max);
_mm_store_si128(&cr_b_buf[i], cr);
}