--- /dev/null
+/* GStreamer
+ * Copyright (C) <2014> Wim Taymans <wim.taymans@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <string.h>
+
+#include "video-dither.h"
+#include "video-orc.h"
+
+struct _GstVideoDither
+{
+ GstVideoDitherMethod method;
+ GstVideoDitherFlags flags;
+ GstVideoFormat format;
+ guint width;
+
+ guint depth;
+ guint n_comp;
+
+ void (*func) (GstVideoDither * dither, gpointer pixels, guint x, guint y,
+ guint width);
+ guint8 shift[4];
+ guint16 mask[4];
+ guint64 orc_mask64;
+ guint32 orc_mask32;
+
+ gpointer errors;
+};
+
+static void
+dither_none_u8_mask (GstVideoDither * dither, gpointer pixels, guint x, guint y,
+ guint width)
+{
+ guint8 *p = pixels;
+
+ video_orc_dither_none_4u8_mask (p + (x * 4), dither->orc_mask32, width);
+}
+
+static void
+dither_none_u16_mask (GstVideoDither * dither, gpointer pixels, guint x,
+ guint y, guint width)
+{
+ guint16 *p = pixels;
+
+ video_orc_dither_none_4u16_mask (p + (x * 4), dither->orc_mask64, width);
+}
+
+static void
+dither_verterr_u8 (GstVideoDither * dither, gpointer pixels, guint x, guint y,
+ guint width)
+{
+ guint8 *p = pixels;
+ guint16 *e = dither->errors;
+
+ if (y == 0)
+ memset (e + (x * 4), 0, width * 8);
+
+ video_orc_dither_verterr_4u8_mask (p + (x * 4), e + (x * 4),
+ dither->orc_mask64, width);
+}
+
+static void
+dither_verterr_u16 (GstVideoDither * dither, gpointer pixels, guint x, guint y,
+ guint width)
+{
+ guint16 *p = pixels;
+ guint16 *e = dither->errors;
+
+ if (y == 0)
+ memset (e + (x * 4), 0, width * 8);
+
+ {
+ gint i, end;
+ guint16 *m = dither->mask;
+ guint32 v, mp;
+
+ end = (width + x) * 4;
+ for (i = x * 4; i < end; i++) {
+ mp = m[i & 3];
+ v = p[i] + e[i];
+ /* take new error and store */
+ e[i] = v & mp;
+ /* quantize and store */
+ v &= ~mp;
+ p[i] = CLAMP (v, 0, 65535);
+ }
+ }
+}
+
+static void
+dither_floyd_steinberg_u8 (GstVideoDither * dither, gpointer pixels, guint x,
+ guint y, guint width)
+{
+ guint8 *p = pixels;
+ guint16 *e = dither->errors;
+
+ if (y == 0)
+ memset (e + (x * 4), 0, (width + 1) * 8);
+
+ /* add and multiply errors from previous line */
+ video_orc_dither_fs_muladd_u8 (e + x * 4, width * 4);
+#if 1
+ {
+ gint i, end;
+ guint16 *m = dither->mask, mp;
+ guint16 v;
+
+ end = (width + x) * 4;
+
+ for (i = x * 4; i < end; i++) {
+ mp = m[i & 3];
+ v = p[i] + ((7 * e[i] + e[i + 4]) >> 4);
+ /* take new error and store */
+ e[i + 4] = v & mp;
+ /* quantize and store */
+ v &= ~mp;
+ p[i] = CLAMP (v, 0, 255);
+ }
+ }
+#else
+ video_orc_dither_fs_add_4u8 (p, e + x * 4, e + (x + 1) * 4,
+ dither->orc_mask64, width);
+#endif
+}
+
+static void
+dither_floyd_steinberg_u16 (GstVideoDither * dither, gpointer pixels, guint x,
+ guint y, guint width)
+{
+ guint16 *p = pixels;
+ guint16 *e = dither->errors;
+
+ if (y == 0)
+ memset (e + (x * 4), 0, (width + 1) * 8);
+
+ {
+ gint i, end;
+ guint16 *m = dither->mask, mp;
+ guint32 v;
+
+ end = (width + x) * 4;
+ for (i = x * 4; i < end; i++) {
+ mp = m[i & 3];
+ /* apply previous errors to pixel */
+ v = p[i] + ((7 * e[i] + e[i + 4] + 5 * e[i + 8] + 3 * e[i + 12]) >> 4);
+ /* take new error and store */
+ e[i + 4] = v & mp;
+ /* quantize and store */
+ v &= ~mp;
+ p[i] = CLAMP (v, 0, 65535);
+ }
+ }
+}
+
+static void
+dither_sierra_lite_u8 (GstVideoDither * dither, gpointer pixels, guint x,
+ guint y, guint width)
+{
+ guint8 *p = pixels;
+ guint16 *e = dither->errors;
+ gint i, end;
+ guint16 *m = dither->mask, mp;
+ guint16 v;
+
+ if (y == 0)
+ memset (e + (x * 4), 0, (width + 4) * 8);
+
+ end = (width + x) * 4;
+ for (i = x; i < end; i++) {
+ mp = m[i & 3];
+ /* apply previous errors to pixel */
+ v = p[i] + ((2 * e[i] + e[i + 8] + e[i + 12]) >> 2);
+ /* store new error */
+ e[i + 4] = v & mp;
+ /* quantize and store */
+ v &= ~mp;
+ p[i] = CLAMP (v, 0, 255);
+ }
+}
+
+static void
+dither_sierra_lite_u16 (GstVideoDither * dither, gpointer pixels, guint x,
+ guint y, guint width)
+{
+ guint16 *p = pixels;
+ guint16 *e = dither->errors;
+ gint i, end;
+ guint16 *m = dither->mask, mp;
+ guint32 v;
+
+ if (y == 0)
+ memset (e + (x * 4), 0, (width + 4) * 8);
+
+ end = (width + x) * 4;
+ for (i = x; i < end; i++) {
+ mp = m[i & 3];
+ /* apply previous errors to pixel */
+ v = p[i] + ((2 * e[i] + e[i + 8] + e[i + 12]) >> 2);
+ /* store new error */
+ e[i + 4] = v & mp;
+ /* quantize and store */
+ v &= ~mp;
+ p[i] = CLAMP (v & ~mp, 0, 65535);
+ }
+}
+
+static const guint16 bayer_map[16][16] = {
+ {0, 128, 32, 160, 8, 136, 40, 168, 2, 130, 34, 162, 10, 138, 42, 170},
+ {192, 64, 224, 96, 200, 72, 232, 104, 194, 66, 226, 98, 202, 74, 234, 106},
+ {48, 176, 16, 144, 56, 184, 24, 152, 50, 178, 18, 146, 58, 186, 26, 154},
+ {240, 112, 208, 80, 248, 120, 216, 88, 242, 114, 210, 82, 250, 122, 218, 90},
+ {12, 240, 44, 172, 4, 132, 36, 164, 14, 242, 46, 174, 6, 134, 38, 166},
+ {204, 76, 236, 108, 196, 68, 228, 100, 206, 78, 238, 110, 198, 70, 230, 102},
+ {60, 188, 28, 156, 52, 180, 20, 148, 62, 190, 30, 158, 54, 182, 22, 150},
+ {252, 142, 220, 92, 244, 116, 212, 84, 254, 144, 222, 94, 246, 118, 214, 86},
+ {3, 131, 35, 163, 11, 139, 43, 171, 1, 129, 33, 161, 9, 137, 41, 169},
+ {195, 67, 227, 99, 203, 75, 235, 107, 193, 65, 225, 97, 201, 73, 233, 105},
+ {51, 179, 19, 147, 59, 187, 27, 155, 49, 177, 17, 145, 57, 185, 25, 153},
+ {243, 115, 211, 83, 251, 123, 219, 91, 241, 113, 209, 81, 249, 121, 217, 89},
+ {15, 243, 47, 175, 7, 135, 39, 167, 13, 241, 45, 173, 5, 133, 37, 165},
+ {207, 79, 239, 111, 199, 71, 231, 103, 205, 77, 237, 109, 197, 69, 229, 101},
+ {63, 191, 31, 159, 55, 183, 23, 151, 61, 189, 29, 157, 53, 181, 21, 149},
+ {255, 145, 223, 95, 247, 119, 215, 87, 253, 143, 221, 93, 245, 117, 213, 85}
+};
+
+static void
+dither_ordered_u8 (GstVideoDither * dither, gpointer pixels, guint x, guint y,
+ guint width)
+{
+ guint8 *p = pixels;
+ guint8 *c = (guint8 *) dither->errors + ((y & 15) * width + (x & 15)) * 4;
+
+ video_orc_dither_ordered_u8 (p, c, width * 4);
+}
+
+static void
+dither_ordered_u8_mask (GstVideoDither * dither, gpointer pixels, guint x,
+ guint y, guint width)
+{
+ guint8 *p = pixels;
+ guint16 *c = (guint16 *) dither->errors + ((y & 15) * width + (x & 15)) * 4;
+
+ video_orc_dither_ordered_4u8_mask (p, c, dither->orc_mask64, width);
+}
+
+static void
+dither_ordered_u16_mask (GstVideoDither * dither, gpointer pixels, guint x,
+ guint y, guint width)
+{
+ guint16 *p = pixels;
+ guint16 *c = (guint16 *) dither->errors + ((y & 15) * width + (x & 15)) * 4;
+
+ video_orc_dither_ordered_4u16_mask (p, c, dither->orc_mask64, width);
+}
+
+static void
+alloc_errors (GstVideoDither * dither, guint lines)
+{
+ guint width, n_comp;
+
+ width = dither->width;
+ n_comp = dither->n_comp;
+
+ dither->errors = g_malloc0 (sizeof (guint16) * (width + 8) * n_comp * lines);
+}
+
+static void
+setup_bayer (GstVideoDither * dither)
+{
+ guint i, j, k, width, n_comp, errdepth;
+ guint8 *shift;
+
+ width = dither->width;
+ shift = dither->shift;
+ n_comp = dither->n_comp;
+
+ if (dither->depth == 8) {
+ if (dither->flags & GST_VIDEO_DITHER_FLAG_QUANTIZE) {
+ dither->func = dither_ordered_u8_mask;
+ errdepth = 16;
+ } else {
+ dither->func = dither_ordered_u8;
+ errdepth = 8;
+ }
+ } else {
+ dither->func = dither_ordered_u16_mask;
+ errdepth = 16;
+ }
+
+ alloc_errors (dither, 16);
+
+ if (errdepth == 8) {
+ for (i = 0; i < 16; i++) {
+ guint8 *p = (guint8 *) dither->errors + (n_comp * width * i), v;
+ for (j = 0; j < width; j++) {
+ for (k = 0; k < n_comp; k++) {
+ v = bayer_map[i & 15][j & 15];
+ if (shift[k] < 8)
+ v = v >> (8 - shift[k]);
+ p[n_comp * j + k] = v;
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < 16; i++) {
+ guint16 *p = (guint16 *) dither->errors + (n_comp * width * i), v;
+ for (j = 0; j < width; j++) {
+ for (k = 0; k < n_comp; k++) {
+ v = bayer_map[i & 15][j & 15];
+ if (shift[k] < 8)
+ v = v >> (8 - shift[k]);
+ p[n_comp * j + k] = v;
+ }
+ }
+ }
+ }
+}
+
+static gint
+count_power (guint v)
+{
+ gint res = 0;
+ while (v > 1) {
+ res++;
+ v >>= 1;
+ }
+ return res;
+}
+
+/**
+ * gst_video_dither_new:
+ * @method: a #GstVideoDitherMethod
+ * @flags: a #GstVideoDitherFlags
+ * @format: a #GstVideoFormat
+ * @quantizer: quantizer
+ * @width: the width of the lines
+ *
+ * Make a new dither object for dithering lines of @format using the
+ * algorithm described by @method.
+ *
+ * Each component will be quantized to a multiple of @quantizer. Better
+ * performance is achived when @quantizer is a power of 2.
+ *
+ * @width is the width of the lines that this ditherer will handle.
+ *
+ * Returns: a new #GstVideoDither
+ */
+GstVideoDither *
+gst_video_dither_new (GstVideoDitherMethod method, GstVideoDitherFlags flags,
+ GstVideoFormat format, guint quantizer[GST_VIDEO_MAX_COMPONENTS],
+ guint width)
+{
+ GstVideoDither *dither;
+ gint i;
+
+ dither = g_slice_new0 (GstVideoDither);
+ dither->method = method;
+ dither->flags = flags;
+ dither->format = format;
+ dither->width = width;
+
+ dither->n_comp = 4;
+
+ switch (format) {
+ case GST_VIDEO_FORMAT_AYUV:
+ case GST_VIDEO_FORMAT_ARGB:
+ dither->depth = 8;
+ break;
+ case GST_VIDEO_FORMAT_AYUV64:
+ case GST_VIDEO_FORMAT_ARGB64:
+ dither->depth = 16;
+ break;
+ default:
+ g_return_val_if_reached (NULL);
+ break;
+ }
+
+ for (i = 0; i < 4; i++) {
+ /* FIXME, only power of 2 quantizers */
+ guint q = quantizer[(i + 3) & 3];
+
+ dither->shift[i] = count_power (q);
+ dither->mask[i] = (1 << dither->shift[i]) - 1;
+ GST_DEBUG ("%d: quant %d shift %d mask %08x", i, q, dither->shift[i],
+ dither->mask[i]);
+ dither->orc_mask64 =
+ (dither->orc_mask64 << 16) | GUINT16_FROM_BE (dither->mask[i]);
+ dither->orc_mask32 = (dither->orc_mask32 << 8) | (guint8) dither->mask[i];
+ }
+ dither->orc_mask64 = GUINT64_FROM_BE (dither->orc_mask64);
+ dither->orc_mask32 = GUINT32_FROM_BE (dither->orc_mask32);
+ GST_DEBUG ("mask64 %08llx", (unsigned long long int) dither->orc_mask64);
+ GST_DEBUG ("mask32 %08x", dither->orc_mask32);
+
+ switch (method) {
+ case GST_VIDEO_DITHER_NONE:
+ if (dither->flags & GST_VIDEO_DITHER_FLAG_QUANTIZE)
+ if (dither->depth == 8)
+ dither->func = dither_none_u8_mask;
+ else
+ dither->func = dither_none_u16_mask;
+ else
+ dither->func = NULL;
+ break;
+ case GST_VIDEO_DITHER_VERTERR:
+ alloc_errors (dither, 1);
+ if (dither->depth == 8) {
+ dither->func = dither_verterr_u8;
+ } else
+ dither->func = dither_verterr_u16;
+ break;
+ case GST_VIDEO_DITHER_FLOYD_STEINBERG:
+ alloc_errors (dither, 1);
+ if (dither->depth == 8) {
+ dither->func = dither_floyd_steinberg_u8;
+ } else
+ dither->func = dither_floyd_steinberg_u16;
+ break;
+ case GST_VIDEO_DITHER_SIERRA_LITE:
+ alloc_errors (dither, 1);
+ if (dither->depth == 8) {
+ dither->func = dither_sierra_lite_u8;
+ } else
+ dither->func = dither_sierra_lite_u16;
+ break;
+ case GST_VIDEO_DITHER_BAYER:
+ setup_bayer (dither);
+ break;
+ }
+ return dither;
+}
+
+/**
+ * gst_video_dither_free:
+ * @dither: a #GstVideoDither
+ *
+ * Free @dither
+ */
+void
+gst_video_dither_free (GstVideoDither * dither)
+{
+ g_return_if_fail (dither != NULL);
+
+ g_free (dither->errors);
+ g_slice_free (GstVideoDither, dither);
+}
+
+/**
+ * gst_video_dither_line:
+ * @dither: a #GstVideoDither
+ * @line: pointer to the pixels of the line
+ * @x: x coordinate
+ * @y: y coordinate
+ * @width: the width
+ *
+ * Dither @width pixels starting from offset @x in @line using @dither.
+ *
+ * @y is the line number of @line in the output image.
+ */
+void
+gst_video_dither_line (GstVideoDither * dither, gpointer line, guint x, guint y,
+ guint width)
+{
+ g_return_if_fail (dither != NULL);
+ g_return_if_fail (x + width < dither->width);
+
+ if (dither->func)
+ dither->func (dither, line, x, y, width);
+}
void video_orc_chroma_down_v4_u16 (guint16 * ORC_RESTRICT d1,
const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2,
const guint16 * ORC_RESTRICT s3, const guint16 * ORC_RESTRICT s4, int n);
+void video_orc_dither_none_4u8_mask (guint8 * ORC_RESTRICT d1, int p1, int n);
+void video_orc_dither_none_4u16_mask (guint16 * ORC_RESTRICT d1, orc_int64 p1,
+ int n);
+void video_orc_dither_verterr_4u8_mask (guint8 * ORC_RESTRICT d1,
+ guint16 * ORC_RESTRICT d2, orc_int64 p1, int n);
+void video_orc_dither_fs_muladd_u8 (guint16 * ORC_RESTRICT d1, int n);
+void video_orc_dither_ordered_u8 (guint8 * ORC_RESTRICT d1,
+ const guint8 * ORC_RESTRICT s1, int n);
+void video_orc_dither_ordered_4u8_mask (guint8 * ORC_RESTRICT d1,
+ const guint16 * ORC_RESTRICT s1, orc_int64 p1, int n);
+void video_orc_dither_ordered_4u16_mask (guint16 * ORC_RESTRICT d1,
+ const guint16 * ORC_RESTRICT s1, orc_int64 p1, int n);
/* begin Orc C target preamble */
{
orc_union64 tmp;
tmp.i = p1;
- ex->params[ORC_VAR_P1] = tmp.x2[0];
- ex->params[ORC_VAR_T1] = tmp.x2[1];
+ ex->params[ORC_VAR_P1] = ((orc_uint64) tmp.i) & 0xffffffff;
+ ex->params[ORC_VAR_T1] = ((orc_uint64) tmp.i) >> 32;
}
{
orc_union64 tmp;
tmp.i = p2;
- ex->params[ORC_VAR_P2] = tmp.x2[0];
- ex->params[ORC_VAR_T2] = tmp.x2[1];
+ ex->params[ORC_VAR_P2] = ((orc_uint64) tmp.i) & 0xffffffff;
+ ex->params[ORC_VAR_T2] = ((orc_uint64) tmp.i) >> 32;
}
{
orc_union64 tmp;
tmp.i = p3;
- ex->params[ORC_VAR_P3] = tmp.x2[0];
- ex->params[ORC_VAR_T3] = tmp.x2[1];
+ ex->params[ORC_VAR_P3] = ((orc_uint64) tmp.i) & 0xffffffff;
+ ex->params[ORC_VAR_T3] = ((orc_uint64) tmp.i) >> 32;
}
func = c->exec;
func (ex);
}
#endif
+
+
+/* video_orc_dither_none_4u8_mask */
+#ifdef DISABLE_ORC
+void
+video_orc_dither_none_4u8_mask (guint8 * ORC_RESTRICT d1, int p1, int n)
+{
+ int i;
+ orc_union32 *ORC_RESTRICT ptr0;
+ orc_union32 var33;
+ orc_union32 var34;
+ orc_union32 var35;
+
+ ptr0 = (orc_union32 *) d1;
+
+ /* 0: loadpl */
+ var35.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadl */
+ var33 = ptr0[i];
+ /* 2: andnb */
+ var34.x4[0] = (~var35.x4[0]) & var33.x4[0];
+ var34.x4[1] = (~var35.x4[1]) & var33.x4[1];
+ var34.x4[2] = (~var35.x4[2]) & var33.x4[2];
+ var34.x4[3] = (~var35.x4[3]) & var33.x4[3];
+ /* 3: storel */
+ ptr0[i] = var34;
+ }
+
+}
+
+#else
+static void
+_backup_video_orc_dither_none_4u8_mask (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int n = ex->n;
+ orc_union32 *ORC_RESTRICT ptr0;
+ orc_union32 var33;
+ orc_union32 var34;
+ orc_union32 var35;
+
+ ptr0 = (orc_union32 *) ex->arrays[0];
+
+ /* 0: loadpl */
+ var35.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadl */
+ var33 = ptr0[i];
+ /* 2: andnb */
+ var34.x4[0] = (~var35.x4[0]) & var33.x4[0];
+ var34.x4[1] = (~var35.x4[1]) & var33.x4[1];
+ var34.x4[2] = (~var35.x4[2]) & var33.x4[2];
+ var34.x4[3] = (~var35.x4[3]) & var33.x4[3];
+ /* 3: storel */
+ ptr0[i] = var34;
+ }
+
+}
+
+void
+video_orc_dither_none_4u8_mask (guint8 * ORC_RESTRICT d1, int p1, int n)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 9, 30, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 100, 105, 116,
+ 104, 101, 114, 95, 110, 111, 110, 101, 95, 52, 117, 56, 95, 109, 97,
+ 115,
+ 107, 11, 4, 4, 16, 4, 20, 4, 115, 32, 24, 21, 2, 37, 0, 32,
+ 0, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_none_4u8_mask);
+#else
+ p = orc_program_new ();
+ orc_program_set_name (p, "video_orc_dither_none_4u8_mask");
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_none_4u8_mask);
+ orc_program_add_destination (p, 4, "d1");
+ orc_program_add_parameter (p, 4, "p1");
+ orc_program_add_temporary (p, 4, "t1");
+
+ orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "andnb", 2, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* video_orc_dither_none_4u16_mask */
+#ifdef DISABLE_ORC
+void
+video_orc_dither_none_4u16_mask (guint16 * ORC_RESTRICT d1, orc_int64 p1, int n)
+{
+ int i;
+ orc_union64 *ORC_RESTRICT ptr0;
+ orc_union64 var33;
+ orc_union64 var34;
+ orc_union64 var35;
+
+ ptr0 = (orc_union64 *) d1;
+
+ /* 0: loadpq */
+ var35.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadq */
+ var33 = ptr0[i];
+ /* 2: andnw */
+ var34.x4[0] = (~var35.x4[0]) & var33.x4[0];
+ var34.x4[1] = (~var35.x4[1]) & var33.x4[1];
+ var34.x4[2] = (~var35.x4[2]) & var33.x4[2];
+ var34.x4[3] = (~var35.x4[3]) & var33.x4[3];
+ /* 3: storeq */
+ ptr0[i] = var34;
+ }
+
+}
+
+#else
+static void
+_backup_video_orc_dither_none_4u16_mask (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int n = ex->n;
+ orc_union64 *ORC_RESTRICT ptr0;
+ orc_union64 var33;
+ orc_union64 var34;
+ orc_union64 var35;
+
+ ptr0 = (orc_union64 *) ex->arrays[0];
+
+ /* 0: loadpq */
+ var35.i =
+ (ex->params[24] & 0xffffffff) | ((orc_uint64) (ex->params[24 +
+ (ORC_VAR_T1 - ORC_VAR_P1)]) << 32);
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadq */
+ var33 = ptr0[i];
+ /* 2: andnw */
+ var34.x4[0] = (~var35.x4[0]) & var33.x4[0];
+ var34.x4[1] = (~var35.x4[1]) & var33.x4[1];
+ var34.x4[2] = (~var35.x4[2]) & var33.x4[2];
+ var34.x4[3] = (~var35.x4[3]) & var33.x4[3];
+ /* 3: storeq */
+ ptr0[i] = var34;
+ }
+
+}
+
+void
+video_orc_dither_none_4u16_mask (guint16 * ORC_RESTRICT d1, orc_int64 p1, int n)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 9, 31, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 100, 105, 116,
+ 104, 101, 114, 95, 110, 111, 110, 101, 95, 52, 117, 49, 54, 95, 109, 97,
+ 115, 107, 11, 8, 8, 18, 8, 20, 8, 134, 32, 24, 21, 2, 74, 0,
+ 32, 0, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_none_4u16_mask);
+#else
+ p = orc_program_new ();
+ orc_program_set_name (p, "video_orc_dither_none_4u16_mask");
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_none_4u16_mask);
+ orc_program_add_destination (p, 8, "d1");
+ orc_program_add_parameter_int64 (p, 8, "p1");
+ orc_program_add_temporary (p, 8, "t1");
+
+ orc_program_append_2 (p, "loadpq", 0, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "andnw", 2, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ex->arrays[ORC_VAR_D1] = d1;
+ {
+ orc_union64 tmp;
+ tmp.i = p1;
+ ex->params[ORC_VAR_P1] = ((orc_uint64) tmp.i) & 0xffffffff;
+ ex->params[ORC_VAR_T1] = ((orc_uint64) tmp.i) >> 32;
+ }
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* video_orc_dither_verterr_4u8_mask */
+#ifdef DISABLE_ORC
+void
+video_orc_dither_verterr_4u8_mask (guint8 * ORC_RESTRICT d1,
+ guint16 * ORC_RESTRICT d2, orc_int64 p1, int n)
+{
+ int i;
+ orc_union32 *ORC_RESTRICT ptr0;
+ orc_union64 *ORC_RESTRICT ptr1;
+ orc_union32 var34;
+ orc_union64 var35;
+ orc_union64 var36;
+ orc_union32 var37;
+ orc_union64 var38;
+ orc_union64 var39;
+ orc_union64 var40;
+ orc_union64 var41;
+
+ ptr0 = (orc_union32 *) d1;
+ ptr1 = (orc_union64 *) d2;
+
+ /* 0: loadpq */
+ var38.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadl */
+ var34 = ptr0[i];
+ /* 2: convubw */
+ var39.x4[0] = (orc_uint8) var34.x4[0];
+ var39.x4[1] = (orc_uint8) var34.x4[1];
+ var39.x4[2] = (orc_uint8) var34.x4[2];
+ var39.x4[3] = (orc_uint8) var34.x4[3];
+ /* 3: loadq */
+ var35 = ptr1[i];
+ /* 4: addw */
+ var40.x4[0] = var35.x4[0] + var39.x4[0];
+ var40.x4[1] = var35.x4[1] + var39.x4[1];
+ var40.x4[2] = var35.x4[2] + var39.x4[2];
+ var40.x4[3] = var35.x4[3] + var39.x4[3];
+ /* 5: andw */
+ var36.x4[0] = var38.x4[0] & var40.x4[0];
+ var36.x4[1] = var38.x4[1] & var40.x4[1];
+ var36.x4[2] = var38.x4[2] & var40.x4[2];
+ var36.x4[3] = var38.x4[3] & var40.x4[3];
+ /* 6: storeq */
+ ptr1[i] = var36;
+ /* 7: andnw */
+ var41.x4[0] = (~var38.x4[0]) & var40.x4[0];
+ var41.x4[1] = (~var38.x4[1]) & var40.x4[1];
+ var41.x4[2] = (~var38.x4[2]) & var40.x4[2];
+ var41.x4[3] = (~var38.x4[3]) & var40.x4[3];
+ /* 8: convsuswb */
+ var37.x4[0] = ORC_CLAMP_UB (var41.x4[0]);
+ var37.x4[1] = ORC_CLAMP_UB (var41.x4[1]);
+ var37.x4[2] = ORC_CLAMP_UB (var41.x4[2]);
+ var37.x4[3] = ORC_CLAMP_UB (var41.x4[3]);
+ /* 9: storel */
+ ptr0[i] = var37;
+ }
+
+}
+
+#else
+static void
+_backup_video_orc_dither_verterr_4u8_mask (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int n = ex->n;
+ orc_union32 *ORC_RESTRICT ptr0;
+ orc_union64 *ORC_RESTRICT ptr1;
+ orc_union32 var34;
+ orc_union64 var35;
+ orc_union64 var36;
+ orc_union32 var37;
+ orc_union64 var38;
+ orc_union64 var39;
+ orc_union64 var40;
+ orc_union64 var41;
+
+ ptr0 = (orc_union32 *) ex->arrays[0];
+ ptr1 = (orc_union64 *) ex->arrays[1];
+
+ /* 0: loadpq */
+ var38.i =
+ (ex->params[24] & 0xffffffff) | ((orc_uint64) (ex->params[24 +
+ (ORC_VAR_T1 - ORC_VAR_P1)]) << 32);
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadl */
+ var34 = ptr0[i];
+ /* 2: convubw */
+ var39.x4[0] = (orc_uint8) var34.x4[0];
+ var39.x4[1] = (orc_uint8) var34.x4[1];
+ var39.x4[2] = (orc_uint8) var34.x4[2];
+ var39.x4[3] = (orc_uint8) var34.x4[3];
+ /* 3: loadq */
+ var35 = ptr1[i];
+ /* 4: addw */
+ var40.x4[0] = var35.x4[0] + var39.x4[0];
+ var40.x4[1] = var35.x4[1] + var39.x4[1];
+ var40.x4[2] = var35.x4[2] + var39.x4[2];
+ var40.x4[3] = var35.x4[3] + var39.x4[3];
+ /* 5: andw */
+ var36.x4[0] = var38.x4[0] & var40.x4[0];
+ var36.x4[1] = var38.x4[1] & var40.x4[1];
+ var36.x4[2] = var38.x4[2] & var40.x4[2];
+ var36.x4[3] = var38.x4[3] & var40.x4[3];
+ /* 6: storeq */
+ ptr1[i] = var36;
+ /* 7: andnw */
+ var41.x4[0] = (~var38.x4[0]) & var40.x4[0];
+ var41.x4[1] = (~var38.x4[1]) & var40.x4[1];
+ var41.x4[2] = (~var38.x4[2]) & var40.x4[2];
+ var41.x4[3] = (~var38.x4[3]) & var40.x4[3];
+ /* 8: convsuswb */
+ var37.x4[0] = ORC_CLAMP_UB (var41.x4[0]);
+ var37.x4[1] = ORC_CLAMP_UB (var41.x4[1]);
+ var37.x4[2] = ORC_CLAMP_UB (var41.x4[2]);
+ var37.x4[3] = ORC_CLAMP_UB (var41.x4[3]);
+ /* 9: storel */
+ ptr0[i] = var37;
+ }
+
+}
+
+void
+video_orc_dither_verterr_4u8_mask (guint8 * ORC_RESTRICT d1,
+ guint16 * ORC_RESTRICT d2, orc_int64 p1, int n)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 9, 33, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 100, 105, 116,
+ 104, 101, 114, 95, 118, 101, 114, 116, 101, 114, 114, 95, 52, 117, 56,
+ 95,
+ 109, 97, 115, 107, 11, 4, 4, 11, 8, 8, 18, 8, 20, 8, 20, 8,
+ 134, 32, 24, 21, 2, 150, 33, 0, 21, 2, 70, 33, 1, 33, 21, 2,
+ 73, 1, 32, 33, 21, 2, 74, 33, 32, 33, 21, 2, 160, 0, 33, 2,
+ 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_verterr_4u8_mask);
+#else
+ p = orc_program_new ();
+ orc_program_set_name (p, "video_orc_dither_verterr_4u8_mask");
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_verterr_4u8_mask);
+ orc_program_add_destination (p, 4, "d1");
+ orc_program_add_destination (p, 8, "d2");
+ orc_program_add_parameter_int64 (p, 8, "p1");
+ orc_program_add_temporary (p, 8, "t1");
+ orc_program_add_temporary (p, 8, "t2");
+
+ orc_program_append_2 (p, "loadpq", 0, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convubw", 2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addw", 2, ORC_VAR_T2, ORC_VAR_D2, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "andw", 2, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "andnw", 2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuswb", 2, ORC_VAR_D1, ORC_VAR_T2,
+ ORC_VAR_D1, ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->arrays[ORC_VAR_D2] = d2;
+ {
+ orc_union64 tmp;
+ tmp.i = p1;
+ ex->params[ORC_VAR_P1] = ((orc_uint64) tmp.i) & 0xffffffff;
+ ex->params[ORC_VAR_T1] = ((orc_uint64) tmp.i) >> 32;
+ }
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* video_orc_dither_fs_muladd_u8 */
+#ifdef DISABLE_ORC
+void
+video_orc_dither_fs_muladd_u8 (guint16 * ORC_RESTRICT d1, int n)
+{
+ int i;
+ orc_union16 *ORC_RESTRICT ptr0;
+#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
+ volatile orc_union16 var34;
+#else
+ orc_union16 var34;
+#endif
+ orc_union16 var35;
+#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
+ volatile orc_union16 var36;
+#else
+ orc_union16 var36;
+#endif
+ orc_union16 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union16 var40;
+ orc_union16 var41;
+ orc_union16 var42;
+
+ ptr0 = (orc_union16 *) d1;
+
+ /* 1: loadpw */
+ var34.i = (int) 0x00000005; /* 5 or 2.47033e-323f */
+ /* 6: loadpw */
+ var36.i = (int) 0x00000003; /* 3 or 1.4822e-323f */
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadoffw */
+ var38 = ptr0[i + 4];
+ /* 2: mullw */
+ var39.i = (var38.i * var34.i) & 0xffff;
+ /* 3: loadw */
+ var35 = ptr0[i];
+ /* 4: addw */
+ var40.i = var39.i + var35.i;
+ /* 5: loadoffw */
+ var41 = ptr0[i + 8];
+ /* 7: mullw */
+ var42.i = (var41.i * var36.i) & 0xffff;
+ /* 8: addw */
+ var37.i = var40.i + var42.i;
+ /* 9: storew */
+ ptr0[i] = var37;
+ }
+
+}
+
+#else
+static void
+_backup_video_orc_dither_fs_muladd_u8 (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int n = ex->n;
+ orc_union16 *ORC_RESTRICT ptr0;
+#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
+ volatile orc_union16 var34;
+#else
+ orc_union16 var34;
+#endif
+ orc_union16 var35;
+#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
+ volatile orc_union16 var36;
+#else
+ orc_union16 var36;
+#endif
+ orc_union16 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union16 var40;
+ orc_union16 var41;
+ orc_union16 var42;
+
+ ptr0 = (orc_union16 *) ex->arrays[0];
+
+ /* 1: loadpw */
+ var34.i = (int) 0x00000005; /* 5 or 2.47033e-323f */
+ /* 6: loadpw */
+ var36.i = (int) 0x00000003; /* 3 or 1.4822e-323f */
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadoffw */
+ var38 = ptr0[i + 4];
+ /* 2: mullw */
+ var39.i = (var38.i * var34.i) & 0xffff;
+ /* 3: loadw */
+ var35 = ptr0[i];
+ /* 4: addw */
+ var40.i = var39.i + var35.i;
+ /* 5: loadoffw */
+ var41 = ptr0[i + 8];
+ /* 7: mullw */
+ var42.i = (var41.i * var36.i) & 0xffff;
+ /* 8: addw */
+ var37.i = var40.i + var42.i;
+ /* 9: storew */
+ ptr0[i] = var37;
+ }
+
+}
+
+void
+video_orc_dither_fs_muladd_u8 (guint16 * ORC_RESTRICT d1, int n)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 9, 29, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 100, 105, 116,
+ 104, 101, 114, 95, 102, 115, 95, 109, 117, 108, 97, 100, 100, 95, 117,
+ 56,
+ 11, 2, 2, 14, 4, 4, 0, 0, 0, 14, 2, 5, 0, 0, 0, 14,
+ 4, 8, 0, 0, 0, 14, 2, 3, 0, 0, 0, 20, 2, 20, 2, 83,
+ 33, 0, 16, 89, 33, 33, 17, 70, 32, 33, 0, 83, 33, 0, 18, 89,
+ 33, 33, 19, 70, 0, 32, 33, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_fs_muladd_u8);
+#else
+ p = orc_program_new ();
+ orc_program_set_name (p, "video_orc_dither_fs_muladd_u8");
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_fs_muladd_u8);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_constant (p, 4, 0x00000004, "c1");
+ orc_program_add_constant (p, 2, 0x00000005, "c2");
+ orc_program_add_constant (p, 4, 0x00000008, "c3");
+ orc_program_add_constant (p, 2, 0x00000003, "c4");
+ orc_program_add_temporary (p, 2, "t1");
+ orc_program_add_temporary (p, 2, "t2");
+
+ orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T2, ORC_VAR_D1,
+ ORC_VAR_C1, ORC_VAR_D1);
+ orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T2, ORC_VAR_D1,
+ ORC_VAR_C3, ORC_VAR_D1);
+ orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C4,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ex->arrays[ORC_VAR_D1] = d1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* video_orc_dither_ordered_u8 */
+#ifdef DISABLE_ORC
+void
+video_orc_dither_ordered_u8 (guint8 * ORC_RESTRICT d1,
+ const guint8 * ORC_RESTRICT s1, int n)
+{
+ int i;
+ orc_int8 *ORC_RESTRICT ptr0;
+ const orc_int8 *ORC_RESTRICT ptr4;
+ orc_int8 var32;
+ orc_int8 var33;
+ orc_int8 var34;
+
+ ptr0 = (orc_int8 *) d1;
+ ptr4 = (orc_int8 *) s1;
+
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadb */
+ var32 = ptr0[i];
+ /* 1: loadb */
+ var33 = ptr4[i];
+ /* 2: addusb */
+ var34 = ORC_CLAMP_UB ((orc_uint8) var32 + (orc_uint8) var33);
+ /* 3: storeb */
+ ptr0[i] = var34;
+ }
+
+}
+
+#else
+static void
+_backup_video_orc_dither_ordered_u8 (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int n = ex->n;
+ orc_int8 *ORC_RESTRICT ptr0;
+ const orc_int8 *ORC_RESTRICT ptr4;
+ orc_int8 var32;
+ orc_int8 var33;
+ orc_int8 var34;
+
+ ptr0 = (orc_int8 *) ex->arrays[0];
+ ptr4 = (orc_int8 *) ex->arrays[4];
+
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadb */
+ var32 = ptr0[i];
+ /* 1: loadb */
+ var33 = ptr4[i];
+ /* 2: addusb */
+ var34 = ORC_CLAMP_UB ((orc_uint8) var32 + (orc_uint8) var33);
+ /* 3: storeb */
+ ptr0[i] = var34;
+ }
+
+}
+
+void
+video_orc_dither_ordered_u8 (guint8 * ORC_RESTRICT d1,
+ const guint8 * ORC_RESTRICT s1, int n)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 100, 105, 116,
+ 104, 101, 114, 95, 111, 114, 100, 101, 114, 101, 100, 95, 117, 56, 11,
+ 1,
+ 1, 12, 1, 1, 35, 0, 0, 4, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p, _backup_video_orc_dither_ordered_u8);
+#else
+ p = orc_program_new ();
+ orc_program_set_name (p, "video_orc_dither_ordered_u8");
+ orc_program_set_backup_function (p, _backup_video_orc_dither_ordered_u8);
+ orc_program_add_destination (p, 1, "d1");
+ orc_program_add_source (p, 1, "s1");
+
+ orc_program_append_2 (p, "addusb", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* video_orc_dither_ordered_4u8_mask */
+#ifdef DISABLE_ORC
+void
+video_orc_dither_ordered_4u8_mask (guint8 * ORC_RESTRICT d1,
+ const guint16 * ORC_RESTRICT s1, orc_int64 p1, int n)
+{
+ int i;
+ orc_union32 *ORC_RESTRICT ptr0;
+ const orc_union64 *ORC_RESTRICT ptr4;
+ orc_union32 var34;
+ orc_union64 var35;
+ orc_union32 var36;
+ orc_union64 var37;
+ orc_union64 var38;
+ orc_union64 var39;
+ orc_union64 var40;
+
+ ptr0 = (orc_union32 *) d1;
+ ptr4 = (orc_union64 *) s1;
+
+ /* 0: loadpq */
+ var37.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadl */
+ var34 = ptr0[i];
+ /* 2: convubw */
+ var38.x4[0] = (orc_uint8) var34.x4[0];
+ var38.x4[1] = (orc_uint8) var34.x4[1];
+ var38.x4[2] = (orc_uint8) var34.x4[2];
+ var38.x4[3] = (orc_uint8) var34.x4[3];
+ /* 3: loadq */
+ var35 = ptr4[i];
+ /* 4: addw */
+ var39.x4[0] = var38.x4[0] + var35.x4[0];
+ var39.x4[1] = var38.x4[1] + var35.x4[1];
+ var39.x4[2] = var38.x4[2] + var35.x4[2];
+ var39.x4[3] = var38.x4[3] + var35.x4[3];
+ /* 5: andnw */
+ var40.x4[0] = (~var37.x4[0]) & var39.x4[0];
+ var40.x4[1] = (~var37.x4[1]) & var39.x4[1];
+ var40.x4[2] = (~var37.x4[2]) & var39.x4[2];
+ var40.x4[3] = (~var37.x4[3]) & var39.x4[3];
+ /* 6: convsuswb */
+ var36.x4[0] = ORC_CLAMP_UB (var40.x4[0]);
+ var36.x4[1] = ORC_CLAMP_UB (var40.x4[1]);
+ var36.x4[2] = ORC_CLAMP_UB (var40.x4[2]);
+ var36.x4[3] = ORC_CLAMP_UB (var40.x4[3]);
+ /* 7: storel */
+ ptr0[i] = var36;
+ }
+
+}
+
+#else
+static void
+_backup_video_orc_dither_ordered_4u8_mask (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int n = ex->n;
+ orc_union32 *ORC_RESTRICT ptr0;
+ const orc_union64 *ORC_RESTRICT ptr4;
+ orc_union32 var34;
+ orc_union64 var35;
+ orc_union32 var36;
+ orc_union64 var37;
+ orc_union64 var38;
+ orc_union64 var39;
+ orc_union64 var40;
+
+ ptr0 = (orc_union32 *) ex->arrays[0];
+ ptr4 = (orc_union64 *) ex->arrays[4];
+
+ /* 0: loadpq */
+ var37.i =
+ (ex->params[24] & 0xffffffff) | ((orc_uint64) (ex->params[24 +
+ (ORC_VAR_T1 - ORC_VAR_P1)]) << 32);
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadl */
+ var34 = ptr0[i];
+ /* 2: convubw */
+ var38.x4[0] = (orc_uint8) var34.x4[0];
+ var38.x4[1] = (orc_uint8) var34.x4[1];
+ var38.x4[2] = (orc_uint8) var34.x4[2];
+ var38.x4[3] = (orc_uint8) var34.x4[3];
+ /* 3: loadq */
+ var35 = ptr4[i];
+ /* 4: addw */
+ var39.x4[0] = var38.x4[0] + var35.x4[0];
+ var39.x4[1] = var38.x4[1] + var35.x4[1];
+ var39.x4[2] = var38.x4[2] + var35.x4[2];
+ var39.x4[3] = var38.x4[3] + var35.x4[3];
+ /* 5: andnw */
+ var40.x4[0] = (~var37.x4[0]) & var39.x4[0];
+ var40.x4[1] = (~var37.x4[1]) & var39.x4[1];
+ var40.x4[2] = (~var37.x4[2]) & var39.x4[2];
+ var40.x4[3] = (~var37.x4[3]) & var39.x4[3];
+ /* 6: convsuswb */
+ var36.x4[0] = ORC_CLAMP_UB (var40.x4[0]);
+ var36.x4[1] = ORC_CLAMP_UB (var40.x4[1]);
+ var36.x4[2] = ORC_CLAMP_UB (var40.x4[2]);
+ var36.x4[3] = ORC_CLAMP_UB (var40.x4[3]);
+ /* 7: storel */
+ ptr0[i] = var36;
+ }
+
+}
+
+void
+video_orc_dither_ordered_4u8_mask (guint8 * ORC_RESTRICT d1,
+ const guint16 * ORC_RESTRICT s1, orc_int64 p1, int n)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 9, 33, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 100, 105, 116,
+ 104, 101, 114, 95, 111, 114, 100, 101, 114, 101, 100, 95, 52, 117, 56,
+ 95,
+ 109, 97, 115, 107, 11, 4, 4, 12, 8, 8, 18, 8, 20, 8, 20, 8,
+ 134, 33, 24, 21, 2, 150, 32, 0, 21, 2, 70, 32, 32, 4, 21, 2,
+ 74, 32, 33, 32, 21, 2, 160, 0, 32, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_ordered_4u8_mask);
+#else
+ p = orc_program_new ();
+ orc_program_set_name (p, "video_orc_dither_ordered_4u8_mask");
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_ordered_4u8_mask);
+ orc_program_add_destination (p, 4, "d1");
+ orc_program_add_source (p, 8, "s1");
+ orc_program_add_parameter_int64 (p, 8, "p1");
+ orc_program_add_temporary (p, 8, "t1");
+ orc_program_add_temporary (p, 8, "t2");
+
+ orc_program_append_2 (p, "loadpq", 0, ORC_VAR_T2, ORC_VAR_P1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convubw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addw", 2, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "andnw", 2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuswb", 2, ORC_VAR_D1, ORC_VAR_T1,
+ ORC_VAR_D1, ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ {
+ orc_union64 tmp;
+ tmp.i = p1;
+ ex->params[ORC_VAR_P1] = ((orc_uint64) tmp.i) & 0xffffffff;
+ ex->params[ORC_VAR_T1] = ((orc_uint64) tmp.i) >> 32;
+ }
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* video_orc_dither_ordered_4u16_mask */
+#ifdef DISABLE_ORC
+void
+video_orc_dither_ordered_4u16_mask (guint16 * ORC_RESTRICT d1,
+ const guint16 * ORC_RESTRICT s1, orc_int64 p1, int n)
+{
+ int i;
+ orc_union64 *ORC_RESTRICT ptr0;
+ const orc_union64 *ORC_RESTRICT ptr4;
+ orc_union64 var34;
+ orc_union64 var35;
+ orc_union64 var36;
+ orc_union64 var37;
+ orc_union64 var38;
+
+ ptr0 = (orc_union64 *) d1;
+ ptr4 = (orc_union64 *) s1;
+
+ /* 0: loadpq */
+ var37.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadq */
+ var34 = ptr0[i];
+ /* 2: loadq */
+ var35 = ptr4[i];
+ /* 3: addw */
+ var38.x4[0] = var34.x4[0] + var35.x4[0];
+ var38.x4[1] = var34.x4[1] + var35.x4[1];
+ var38.x4[2] = var34.x4[2] + var35.x4[2];
+ var38.x4[3] = var34.x4[3] + var35.x4[3];
+ /* 4: andnw */
+ var36.x4[0] = (~var37.x4[0]) & var38.x4[0];
+ var36.x4[1] = (~var37.x4[1]) & var38.x4[1];
+ var36.x4[2] = (~var37.x4[2]) & var38.x4[2];
+ var36.x4[3] = (~var37.x4[3]) & var38.x4[3];
+ /* 5: storeq */
+ ptr0[i] = var36;
+ }
+
+}
+
+#else
+static void
+_backup_video_orc_dither_ordered_4u16_mask (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int n = ex->n;
+ orc_union64 *ORC_RESTRICT ptr0;
+ const orc_union64 *ORC_RESTRICT ptr4;
+ orc_union64 var34;
+ orc_union64 var35;
+ orc_union64 var36;
+ orc_union64 var37;
+ orc_union64 var38;
+
+ ptr0 = (orc_union64 *) ex->arrays[0];
+ ptr4 = (orc_union64 *) ex->arrays[4];
+
+ /* 0: loadpq */
+ var37.i =
+ (ex->params[24] & 0xffffffff) | ((orc_uint64) (ex->params[24 +
+ (ORC_VAR_T1 - ORC_VAR_P1)]) << 32);
+
+ for (i = 0; i < n; i++) {
+ /* 1: loadq */
+ var34 = ptr0[i];
+ /* 2: loadq */
+ var35 = ptr4[i];
+ /* 3: addw */
+ var38.x4[0] = var34.x4[0] + var35.x4[0];
+ var38.x4[1] = var34.x4[1] + var35.x4[1];
+ var38.x4[2] = var34.x4[2] + var35.x4[2];
+ var38.x4[3] = var34.x4[3] + var35.x4[3];
+ /* 4: andnw */
+ var36.x4[0] = (~var37.x4[0]) & var38.x4[0];
+ var36.x4[1] = (~var37.x4[1]) & var38.x4[1];
+ var36.x4[2] = (~var37.x4[2]) & var38.x4[2];
+ var36.x4[3] = (~var37.x4[3]) & var38.x4[3];
+ /* 5: storeq */
+ ptr0[i] = var36;
+ }
+
+}
+
+void
+video_orc_dither_ordered_4u16_mask (guint16 * ORC_RESTRICT d1,
+ const guint16 * ORC_RESTRICT s1, orc_int64 p1, int n)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 9, 34, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 100, 105, 116,
+ 104, 101, 114, 95, 111, 114, 100, 101, 114, 101, 100, 95, 52, 117, 49,
+ 54,
+ 95, 109, 97, 115, 107, 11, 8, 8, 12, 8, 8, 18, 8, 20, 8, 20,
+ 8, 134, 33, 24, 21, 2, 70, 32, 0, 4, 21, 2, 74, 0, 33, 32,
+ 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_ordered_4u16_mask);
+#else
+ p = orc_program_new ();
+ orc_program_set_name (p, "video_orc_dither_ordered_4u16_mask");
+ orc_program_set_backup_function (p,
+ _backup_video_orc_dither_ordered_4u16_mask);
+ orc_program_add_destination (p, 8, "d1");
+ orc_program_add_source (p, 8, "s1");
+ orc_program_add_parameter_int64 (p, 8, "p1");
+ orc_program_add_temporary (p, 8, "t1");
+ orc_program_add_temporary (p, 8, "t2");
+
+ orc_program_append_2 (p, "loadpq", 0, ORC_VAR_T2, ORC_VAR_P1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addw", 2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_S1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "andnw", 2, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ {
+ orc_union64 tmp;
+ tmp.i = p1;
+ ex->params[ORC_VAR_P1] = ((orc_uint64) tmp.i) & 0xffffffff;
+ ex->params[ORC_VAR_T1] = ((orc_uint64) tmp.i) >> 32;
+ }
+
+ func = c->exec;
+ func (ex);
+}
+#endif