A32_COLOR (vuya, 0, 8, 16, 24);
/* Y444, Y42B, I420, YV12, Y41B */
-#define PLANAR_YUV_BLEND(format_name,format_enum,x_round,y_round,MEMCPY,BLENDLOOP) \
+#define PLANAR_YUV_BLEND(format_name,x_round,y_round,MEMCPY,BLENDLOOP,n_bits) \
inline static void \
_blend_##format_name (const guint8 * src, guint8 * dest, \
- gint src_stride, gint dest_stride, gint src_width, gint src_height, \
+ gint src_stride, gint dest_stride, gint pstride, gint src_width, gint src_height, \
gdouble src_alpha, GstCompositorBlendMode mode) \
{ \
gint i; \
gint b_alpha; \
+ gint range; \
\
/* in source mode we just have to copy over things */ \
if (mode == COMPOSITOR_BLEND_MODE_SOURCE) { \
\
/* If it's completely opaque, we do a fast copy */ \
if (G_UNLIKELY (src_alpha == 1.0)) { \
+ gint width_in_bytes = src_width * pstride; \
GST_LOG ("Fast copy (alpha == 1.0)"); \
for (i = 0; i < src_height; i++) { \
- MEMCPY (dest, src, src_width); \
+ MEMCPY (dest, src, width_in_bytes); \
src += src_stride; \
dest += dest_stride; \
} \
return; \
} \
\
- b_alpha = CLAMP ((gint) (src_alpha * 255), 0, 255); \
+ range = (1 << n_bits) - 1; \
+ b_alpha = CLAMP ((gint) (src_alpha * range), 0, range); \
\
BLENDLOOP(dest, dest_stride, src, src_stride, b_alpha, src_width, src_height);\
} \
gint dest_width, dest_height; \
const GstVideoFormatInfo *info; \
gint src_width, src_height; \
+ gint pstride; \
\
src_width = GST_VIDEO_FRAME_WIDTH (srcframe); \
src_height = GST_VIDEO_FRAME_HEIGHT (srcframe); \
dest_comp_rowstride = GST_VIDEO_FRAME_COMP_STRIDE (destframe, 0); \
src_comp_width = GST_VIDEO_FORMAT_INFO_SCALE_WIDTH(info, 0, b_src_width); \
src_comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, b_src_height); \
+ pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (info, 0); \
comp_xpos = (xpos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 0, xpos); \
comp_ypos = (ypos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, ypos); \
comp_xoffset = (xoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 0, xoffset); \
comp_yoffset = (yoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, yoffset); \
- _blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \
- b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \
+ _blend_##format_name (b_src + comp_xoffset * pstride + comp_yoffset * src_comp_rowstride, \
+ b_dest + comp_xpos * pstride + comp_ypos * dest_comp_rowstride, \
src_comp_rowstride, \
- dest_comp_rowstride, src_comp_width, src_comp_height, \
+ dest_comp_rowstride, pstride, src_comp_width, src_comp_height, \
src_alpha, mode); \
\
b_src = GST_VIDEO_FRAME_COMP_DATA (srcframe, 1); \
dest_comp_rowstride = GST_VIDEO_FRAME_COMP_STRIDE (destframe, 1); \
src_comp_width = GST_VIDEO_FORMAT_INFO_SCALE_WIDTH(info, 1, b_src_width); \
src_comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, b_src_height); \
+ pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (info, 1); \
comp_xpos = (xpos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 1, xpos); \
comp_ypos = (ypos == 0) ? 0 : ypos >> info->h_sub[1]; \
comp_xoffset = (xoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 1, xoffset); \
comp_yoffset = (yoffset == 0) ? 0 : yoffset >> info->h_sub[1]; \
- _blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \
- b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \
+ _blend_##format_name (b_src + comp_xoffset * pstride + comp_yoffset * src_comp_rowstride, \
+ b_dest + comp_xpos * pstride + comp_ypos * dest_comp_rowstride, \
src_comp_rowstride, \
- dest_comp_rowstride, src_comp_width, src_comp_height, \
+ dest_comp_rowstride, pstride, src_comp_width, src_comp_height, \
src_alpha, mode); \
\
b_src = GST_VIDEO_FRAME_COMP_DATA (srcframe, 2); \
dest_comp_rowstride = GST_VIDEO_FRAME_COMP_STRIDE (destframe, 2); \
src_comp_width = GST_VIDEO_FORMAT_INFO_SCALE_WIDTH(info, 2, b_src_width); \
src_comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, b_src_height); \
+ pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (info, 2); \
comp_xpos = (xpos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 2, xpos); \
comp_ypos = (ypos == 0) ? 0 : ypos >> info->h_sub[2]; \
comp_xoffset = (xoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 2, xoffset); \
comp_yoffset = (yoffset == 0) ? 0 : yoffset >> info->h_sub[2]; \
- _blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \
- b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \
+ _blend_##format_name (b_src + comp_xoffset * pstride + comp_yoffset * src_comp_rowstride, \
+ b_dest + comp_xpos * pstride + comp_ypos * dest_comp_rowstride, \
src_comp_rowstride, \
- dest_comp_rowstride, src_comp_width, src_comp_height, \
+ dest_comp_rowstride, pstride, src_comp_width, src_comp_height, \
src_alpha, mode); \
}
} \
}
+#define PLANAR_YUV_HIGH_FILL_CHECKER(format_name, nbits, endian, MEMSET) \
+static void \
+fill_checker_##format_name (GstVideoFrame * frame, guint y_start, guint y_end) \
+{ \
+ gint i, j; \
+ static const int tab[] = { 80 << (nbits - 8), 160 << (nbits - 8), 80 << (nbits - 8), 160 << (nbits - 8),}; \
+ guint8 *p; \
+ gint comp_width, comp_height; \
+ gint rowstride, comp_yoffset; \
+ gint pstride; \
+ gint uv; \
+ const GstVideoFormatInfo *info; \
+ \
+ info = frame->info.finfo; \
+ p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \
+ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
+ comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \
+ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
+ pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (frame, 0); \
+ comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \
+ p += comp_yoffset * rowstride; \
+ \
+ for (i = 0; i < comp_height; i++) { \
+ for (j = 0; j < comp_width; j++) { \
+ GST_WRITE_UINT16_##endian (p, tab[(((i + y_start) & 0x8) >> 3) + ((j & 0x8) >> 3)]); \
+ p += pstride; \
+ } \
+ p += rowstride - comp_width * pstride; \
+ } \
+ \
+ uv = GUINT16_TO_##endian (1 << (nbits - 1)); \
+ p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \
+ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \
+ comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \
+ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \
+ pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (frame, 1); \
+ comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[1]; \
+ p += comp_yoffset * rowstride; \
+ MEMSET (p, rowstride, uv, comp_width, comp_height); \
+ \
+ p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \
+ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \
+ comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \
+ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \
+ pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (frame, 1); \
+ comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[2]; \
+ p += comp_yoffset * rowstride; \
+ MEMSET (p, rowstride, uv, comp_width, comp_height); \
+}
+
+#define PLANAR_YUV_HIGH_FILL_COLOR(format_name,endian,MEMSET) \
+static void \
+fill_color_##format_name (GstVideoFrame * frame, \
+ guint y_start, guint y_end, gint colY, gint colU, gint colV) \
+{ \
+ guint8 *p; \
+ gint comp_width, comp_height; \
+ gint rowstride, comp_yoffset; \
+ const GstVideoFormatInfo *info; \
+ \
+ info = frame->info.finfo; \
+ p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \
+ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
+ comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \
+ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
+ comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \
+ p += comp_yoffset * rowstride; \
+ MEMSET (p, rowstride, GUINT16_TO_##endian (colY), comp_width, comp_height); \
+ \
+ p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \
+ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \
+ comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \
+ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \
+ comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[1]; \
+ p += comp_yoffset * rowstride; \
+ MEMSET (p, rowstride, GUINT16_TO_##endian (colU), comp_width, comp_height); \
+ \
+ p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \
+ comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \
+ comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \
+ rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \
+ comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[2]; \
+ p += comp_yoffset * rowstride; \
+ MEMSET (p, rowstride, GUINT16_TO_##endian (colV), comp_width, comp_height); \
+}
+
#define GST_ROUND_UP_1(x) (x)
-PLANAR_YUV_BLEND (i420, GST_VIDEO_FORMAT_I420, GST_ROUND_UP_2,
- GST_ROUND_UP_2, memcpy, compositor_orc_blend_u8);
+PLANAR_YUV_BLEND (i420, GST_ROUND_UP_2,
+ GST_ROUND_UP_2, memcpy, compositor_orc_blend_u8, 8);
PLANAR_YUV_FILL_CHECKER (i420, GST_VIDEO_FORMAT_I420, memset);
PLANAR_YUV_FILL_COLOR (i420, GST_VIDEO_FORMAT_I420, memset);
PLANAR_YUV_FILL_COLOR (yv12, GST_VIDEO_FORMAT_YV12, memset);
-PLANAR_YUV_BLEND (y444, GST_VIDEO_FORMAT_Y444, GST_ROUND_UP_1,
- GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8);
+PLANAR_YUV_BLEND (y444, GST_ROUND_UP_1,
+ GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8, 8);
PLANAR_YUV_FILL_CHECKER (y444, GST_VIDEO_FORMAT_Y444, memset);
PLANAR_YUV_FILL_COLOR (y444, GST_VIDEO_FORMAT_Y444, memset);
-PLANAR_YUV_BLEND (y42b, GST_VIDEO_FORMAT_Y42B, GST_ROUND_UP_2,
- GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8);
+PLANAR_YUV_BLEND (y42b, GST_ROUND_UP_2,
+ GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8, 8);
PLANAR_YUV_FILL_CHECKER (y42b, GST_VIDEO_FORMAT_Y42B, memset);
PLANAR_YUV_FILL_COLOR (y42b, GST_VIDEO_FORMAT_Y42B, memset);
-PLANAR_YUV_BLEND (y41b, GST_VIDEO_FORMAT_Y41B, GST_ROUND_UP_4,
- GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8);
+PLANAR_YUV_BLEND (y41b, GST_ROUND_UP_4,
+ GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8, 8);
PLANAR_YUV_FILL_CHECKER (y41b, GST_VIDEO_FORMAT_Y41B, memset);
PLANAR_YUV_FILL_COLOR (y41b, GST_VIDEO_FORMAT_Y41B, memset);
+#define BLEND_HIGH(format_name) \
+compositor_orc_blend_##format_name
+
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+PLANAR_YUV_BLEND (i420_10le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u10), 10);
+PLANAR_YUV_BLEND (i420_10be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u10_swap), 10);
+
+PLANAR_YUV_BLEND (i420_12le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u12), 12);
+PLANAR_YUV_BLEND (i420_12be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u12_swap), 12);
+
+PLANAR_YUV_BLEND (i422_10le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u10), 10);
+PLANAR_YUV_BLEND (i422_10be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u10_swap), 10);
+
+PLANAR_YUV_BLEND (i422_12le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u12), 12);
+PLANAR_YUV_BLEND (i422_12be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u12_swap), 12);
+#else /* G_BYTE_ORDER == G_LITTLE_ENDIAN */
+PLANAR_YUV_BLEND (i420_10le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u10_swap), 10);
+PLANAR_YUV_BLEND (i420_10be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u10), 10);
+
+PLANAR_YUV_BLEND (i420_12le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u12_swap), 12);
+PLANAR_YUV_BLEND (i420_12be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy,
+ BLEND_HIGH (u12), 12);
+
+PLANAR_YUV_BLEND (i422_10le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u10_swap), 10);
+PLANAR_YUV_BLEND (i422_10be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u10), 10);
+
+PLANAR_YUV_BLEND (i422_12le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u12_swap), 12);
+PLANAR_YUV_BLEND (i422_12be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy,
+ BLEND_HIGH (u12), 12);
+#endif /* G_BYTE_ORDER == G_LITTLE_ENDIAN */
+
+PLANAR_YUV_HIGH_FILL_CHECKER (i420_10le, 10, LE, compositor_orc_memset_u16_2d);
+PLANAR_YUV_HIGH_FILL_COLOR (i420_10le, LE, compositor_orc_memset_u16_2d);
+PLANAR_YUV_HIGH_FILL_CHECKER (i420_10be, 10, BE, compositor_orc_memset_u16_2d);
+PLANAR_YUV_HIGH_FILL_COLOR (i420_10be, BE, compositor_orc_memset_u16_2d);
+
+PLANAR_YUV_HIGH_FILL_CHECKER (i420_12le, 12, LE, compositor_orc_memset_u16_2d);
+PLANAR_YUV_HIGH_FILL_COLOR (i420_12le, LE, compositor_orc_memset_u16_2d);
+PLANAR_YUV_HIGH_FILL_CHECKER (i420_12be, 12, BE, compositor_orc_memset_u16_2d);
+PLANAR_YUV_HIGH_FILL_COLOR (i420_12be, BE, compositor_orc_memset_u16_2d);
+
/* NV12, NV21 */
#define NV_YUV_BLEND(format_name,MEMCPY,BLENDLOOP) \
inline static void \
/* BGRx, xRGB, xBGR are equal to RGBx */
BlendFunction gst_compositor_blend_yuy2;
/* YVYU and UYVY are equal to YUY2 */
+BlendFunction gst_compositor_blend_i420_10le;
+BlendFunction gst_compositor_blend_i420_10be;
+BlendFunction gst_compositor_blend_i420_12le;
+BlendFunction gst_compositor_blend_i420_12be;
+BlendFunction gst_compositor_blend_i422_10le;
+BlendFunction gst_compositor_blend_i422_10be;
+BlendFunction gst_compositor_blend_i422_12le;
+BlendFunction gst_compositor_blend_i422_12be;
FillCheckerFunction gst_compositor_fill_checker_argb;
FillCheckerFunction gst_compositor_fill_checker_bgra;
FillCheckerFunction gst_compositor_fill_checker_yuy2;
/* YVYU is equal to YUY2 */
FillCheckerFunction gst_compositor_fill_checker_uyvy;
+FillCheckerFunction gst_compositor_fill_checker_i420_10le;
+FillCheckerFunction gst_compositor_fill_checker_i420_10be;
+FillCheckerFunction gst_compositor_fill_checker_i420_12le;
+FillCheckerFunction gst_compositor_fill_checker_i420_12be;
FillColorFunction gst_compositor_fill_color_argb;
FillColorFunction gst_compositor_fill_color_bgra;
FillColorFunction gst_compositor_fill_color_yuy2;
FillColorFunction gst_compositor_fill_color_yvyu;
FillColorFunction gst_compositor_fill_color_uyvy;
+FillColorFunction gst_compositor_fill_color_i420_10le;
+FillColorFunction gst_compositor_fill_color_i420_10be;
+FillColorFunction gst_compositor_fill_color_i420_12le;
+FillColorFunction gst_compositor_fill_color_i420_12be;
void
gst_compositor_init_blend (void)
gst_compositor_blend_rgb = GST_DEBUG_FUNCPTR (blend_rgb);
gst_compositor_blend_xrgb = GST_DEBUG_FUNCPTR (blend_xrgb);
gst_compositor_blend_yuy2 = GST_DEBUG_FUNCPTR (blend_yuy2);
+ gst_compositor_blend_i420_10le = GST_DEBUG_FUNCPTR (blend_i420_10le);
+ gst_compositor_blend_i420_10be = GST_DEBUG_FUNCPTR (blend_i420_10be);
+ gst_compositor_blend_i420_12le = GST_DEBUG_FUNCPTR (blend_i420_12le);
+ gst_compositor_blend_i420_12be = GST_DEBUG_FUNCPTR (blend_i420_12be);
+ gst_compositor_blend_i422_10le = GST_DEBUG_FUNCPTR (blend_i422_10le);
+ gst_compositor_blend_i422_10be = GST_DEBUG_FUNCPTR (blend_i422_10be);
+ gst_compositor_blend_i422_12le = GST_DEBUG_FUNCPTR (blend_i422_12le);
+ gst_compositor_blend_i422_12be = GST_DEBUG_FUNCPTR (blend_i422_12be);
gst_compositor_fill_checker_argb = GST_DEBUG_FUNCPTR (fill_checker_argb_c);
gst_compositor_fill_checker_bgra = GST_DEBUG_FUNCPTR (fill_checker_bgra_c);
gst_compositor_fill_checker_rgbx = GST_DEBUG_FUNCPTR (fill_checker_rgbx_c);
gst_compositor_fill_checker_yuy2 = GST_DEBUG_FUNCPTR (fill_checker_yuy2_c);
gst_compositor_fill_checker_uyvy = GST_DEBUG_FUNCPTR (fill_checker_uyvy_c);
+ gst_compositor_fill_checker_i420_10le =
+ GST_DEBUG_FUNCPTR (fill_checker_i420_10le);
+ gst_compositor_fill_checker_i420_10be =
+ GST_DEBUG_FUNCPTR (fill_checker_i420_10be);
+ gst_compositor_fill_checker_i420_12le =
+ GST_DEBUG_FUNCPTR (fill_checker_i420_12le);
+ gst_compositor_fill_checker_i420_12be =
+ GST_DEBUG_FUNCPTR (fill_checker_i420_12be);
gst_compositor_fill_color_argb = GST_DEBUG_FUNCPTR (fill_color_argb);
gst_compositor_fill_color_bgra = GST_DEBUG_FUNCPTR (fill_color_bgra);
gst_compositor_fill_color_yuy2 = GST_DEBUG_FUNCPTR (fill_color_yuy2);
gst_compositor_fill_color_yvyu = GST_DEBUG_FUNCPTR (fill_color_yvyu);
gst_compositor_fill_color_uyvy = GST_DEBUG_FUNCPTR (fill_color_uyvy);
+ gst_compositor_fill_color_i420_10le =
+ GST_DEBUG_FUNCPTR (fill_color_i420_10le);
+ gst_compositor_fill_color_i420_10be =
+ GST_DEBUG_FUNCPTR (fill_color_i420_10be);
+ gst_compositor_fill_color_i420_12le =
+ GST_DEBUG_FUNCPTR (fill_color_i420_12le);
+ gst_compositor_fill_color_i420_12be =
+ GST_DEBUG_FUNCPTR (fill_color_i420_12be);
}
void compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
void compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
const guint32 * ORC_RESTRICT s1, int n);
+void compositor_orc_memset_u16_2d (guint8 * ORC_RESTRICT d1, int d1_stride,
+ int p1, int n, int m);
void compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
+void compositor_orc_blend_u10 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
+void compositor_orc_blend_u12 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
+void compositor_orc_blend_u16 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
+void compositor_orc_blend_u10_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
+void compositor_orc_blend_u12_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
+void compositor_orc_blend_u16_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
void compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
void compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
#endif
+/* compositor_orc_memset_u16_2d */
+#ifdef DISABLE_ORC
+void
+compositor_orc_memset_u16_2d (guint8 * ORC_RESTRICT d1, int d1_stride, int p1,
+ int n, int m)
+{
+ int i;
+ int j;
+ orc_union16 *ORC_RESTRICT ptr0;
+ orc_union16 var32;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
+
+ /* 0: loadpw */
+ var32.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 1: storew */
+ ptr0[i] = var32;
+ }
+ }
+
+}
+
+#else
+static void
+_backup_compositor_orc_memset_u16_2d (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int j;
+ int n = ex->n;
+ int m = ex->params[ORC_VAR_A1];
+ orc_union16 *ORC_RESTRICT ptr0;
+ orc_union16 var32;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
+
+ /* 0: loadpw */
+ var32.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 1: storew */
+ ptr0[i] = var32;
+ }
+ }
+
+}
+
+void
+compositor_orc_memset_u16_2d (guint8 * ORC_RESTRICT d1, int d1_stride, int p1,
+ int n, int m)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 7, 9, 28, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
+ 114, 99, 95, 109, 101, 109, 115, 101, 116, 95, 117, 49, 54, 95, 50, 100,
+ 11, 2, 2, 16, 2, 97, 0, 24, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p, _backup_compositor_orc_memset_u16_2d);
+#else
+ p = orc_program_new ();
+ orc_program_set_2d (p);
+ orc_program_set_name (p, "compositor_orc_memset_u16_2d");
+ orc_program_set_backup_function (p, _backup_compositor_orc_memset_u16_2d);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_parameter (p, 2, "p1");
+
+ orc_program_append_2 (p, "storew", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ORC_EXECUTOR_M (ex) = m;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_D1] = d1_stride;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
/* compositor_orc_blend_u8 */
#ifdef DISABLE_ORC
void
#endif
+/* compositor_orc_blend_u10 */
+#ifdef DISABLE_ORC
+void
+compositor_orc_blend_u10 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ int i;
+ int j;
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var34;
+ orc_union16 var35;
+ orc_union32 var36;
+ orc_union16 var37;
+ orc_union32 var38;
+ orc_union32 var39;
+ orc_union32 var40;
+ orc_union32 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
+ ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
+
+ /* 5: loadpl */
+ var36.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var34 = ptr0[i];
+ /* 1: convuwl */
+ var38.i = (orc_uint16) var34.i;
+ /* 2: loadw */
+ var35 = ptr4[i];
+ /* 3: convuwl */
+ var39.i = (orc_uint16) var35.i;
+ /* 4: subl */
+ var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i);
+ /* 6: mulll */
+ var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff;
+ /* 7: shll */
+ var42.i = ((orc_uint32) var38.i) << 10;
+ /* 8: addl */
+ var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i);
+ /* 9: shrul */
+ var44.i = ((orc_uint32) var43.i) >> 10;
+ /* 10: convsuslw */
+ var37.i = ORC_CLAMP_UW (var44.i);
+ /* 11: storew */
+ ptr0[i] = var37;
+ }
+ }
+
+}
+
+#else
+static void
+_backup_compositor_orc_blend_u10 (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int j;
+ int n = ex->n;
+ int m = ex->params[ORC_VAR_A1];
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var34;
+ orc_union16 var35;
+ orc_union32 var36;
+ orc_union16 var37;
+ orc_union32 var38;
+ orc_union32 var39;
+ orc_union32 var40;
+ orc_union32 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
+ ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
+
+ /* 5: loadpl */
+ var36.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var34 = ptr0[i];
+ /* 1: convuwl */
+ var38.i = (orc_uint16) var34.i;
+ /* 2: loadw */
+ var35 = ptr4[i];
+ /* 3: convuwl */
+ var39.i = (orc_uint16) var35.i;
+ /* 4: subl */
+ var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i);
+ /* 6: mulll */
+ var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff;
+ /* 7: shll */
+ var42.i = ((orc_uint32) var38.i) << 10;
+ /* 8: addl */
+ var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i);
+ /* 9: shrul */
+ var44.i = ((orc_uint32) var43.i) >> 10;
+ /* 10: convsuslw */
+ var37.i = ORC_CLAMP_UW (var44.i);
+ /* 11: storew */
+ ptr0[i] = var37;
+ }
+ }
+
+}
+
+void
+compositor_orc_blend_u10 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 7, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
+ 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 48, 11, 2, 2, 12,
+ 2, 2, 14, 1, 10, 0, 0, 0, 16, 2, 20, 4, 20, 4, 154, 32,
+ 0, 154, 33, 4, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16,
+ 103, 33, 32, 33, 126, 33, 33, 16, 166, 0, 33, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p, _backup_compositor_orc_blend_u10);
+#else
+ p = orc_program_new ();
+ orc_program_set_2d (p);
+ orc_program_set_name (p, "compositor_orc_blend_u10");
+ orc_program_set_backup_function (p, _backup_compositor_orc_blend_u10);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_source (p, 2, "s1");
+ orc_program_add_constant (p, 1, 0x0000000a, "c1");
+ orc_program_add_parameter (p, 2, "p1");
+ orc_program_add_temporary (p, 4, "t1");
+ orc_program_add_temporary (p, 4, "t2");
+
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_D1, ORC_VAR_T2,
+ ORC_VAR_D1, ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ORC_EXECUTOR_M (ex) = m;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_D1] = d1_stride;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ ex->params[ORC_VAR_S1] = s1_stride;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* compositor_orc_blend_u12 */
+#ifdef DISABLE_ORC
+void
+compositor_orc_blend_u12 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ int i;
+ int j;
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var34;
+ orc_union16 var35;
+ orc_union32 var36;
+ orc_union16 var37;
+ orc_union32 var38;
+ orc_union32 var39;
+ orc_union32 var40;
+ orc_union32 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
+ ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
+
+ /* 5: loadpl */
+ var36.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var34 = ptr0[i];
+ /* 1: convuwl */
+ var38.i = (orc_uint16) var34.i;
+ /* 2: loadw */
+ var35 = ptr4[i];
+ /* 3: convuwl */
+ var39.i = (orc_uint16) var35.i;
+ /* 4: subl */
+ var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i);
+ /* 6: mulll */
+ var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff;
+ /* 7: shll */
+ var42.i = ((orc_uint32) var38.i) << 12;
+ /* 8: addl */
+ var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i);
+ /* 9: shrul */
+ var44.i = ((orc_uint32) var43.i) >> 12;
+ /* 10: convsuslw */
+ var37.i = ORC_CLAMP_UW (var44.i);
+ /* 11: storew */
+ ptr0[i] = var37;
+ }
+ }
+
+}
+
+#else
+static void
+_backup_compositor_orc_blend_u12 (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int j;
+ int n = ex->n;
+ int m = ex->params[ORC_VAR_A1];
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var34;
+ orc_union16 var35;
+ orc_union32 var36;
+ orc_union16 var37;
+ orc_union32 var38;
+ orc_union32 var39;
+ orc_union32 var40;
+ orc_union32 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
+ ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
+
+ /* 5: loadpl */
+ var36.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var34 = ptr0[i];
+ /* 1: convuwl */
+ var38.i = (orc_uint16) var34.i;
+ /* 2: loadw */
+ var35 = ptr4[i];
+ /* 3: convuwl */
+ var39.i = (orc_uint16) var35.i;
+ /* 4: subl */
+ var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i);
+ /* 6: mulll */
+ var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff;
+ /* 7: shll */
+ var42.i = ((orc_uint32) var38.i) << 12;
+ /* 8: addl */
+ var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i);
+ /* 9: shrul */
+ var44.i = ((orc_uint32) var43.i) >> 12;
+ /* 10: convsuslw */
+ var37.i = ORC_CLAMP_UW (var44.i);
+ /* 11: storew */
+ ptr0[i] = var37;
+ }
+ }
+
+}
+
+void
+compositor_orc_blend_u12 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 7, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
+ 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 50, 11, 2, 2, 12,
+ 2, 2, 14, 1, 12, 0, 0, 0, 16, 2, 20, 4, 20, 4, 154, 32,
+ 0, 154, 33, 4, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16,
+ 103, 33, 32, 33, 126, 33, 33, 16, 166, 0, 33, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p, _backup_compositor_orc_blend_u12);
+#else
+ p = orc_program_new ();
+ orc_program_set_2d (p);
+ orc_program_set_name (p, "compositor_orc_blend_u12");
+ orc_program_set_backup_function (p, _backup_compositor_orc_blend_u12);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_source (p, 2, "s1");
+ orc_program_add_constant (p, 1, 0x0000000c, "c1");
+ orc_program_add_parameter (p, 2, "p1");
+ orc_program_add_temporary (p, 4, "t1");
+ orc_program_add_temporary (p, 4, "t2");
+
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_D1, ORC_VAR_T2,
+ ORC_VAR_D1, ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ORC_EXECUTOR_M (ex) = m;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_D1] = d1_stride;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ ex->params[ORC_VAR_S1] = s1_stride;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* compositor_orc_blend_u16 */
+#ifdef DISABLE_ORC
+void
+compositor_orc_blend_u16 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ int i;
+ int j;
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var34;
+ orc_union16 var35;
+ orc_union32 var36;
+ orc_union16 var37;
+ orc_union32 var38;
+ orc_union32 var39;
+ orc_union32 var40;
+ orc_union32 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
+ ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
+
+ /* 5: loadpl */
+ var36.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var34 = ptr0[i];
+ /* 1: convuwl */
+ var38.i = (orc_uint16) var34.i;
+ /* 2: loadw */
+ var35 = ptr4[i];
+ /* 3: convuwl */
+ var39.i = (orc_uint16) var35.i;
+ /* 4: subl */
+ var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i);
+ /* 6: mulll */
+ var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff;
+ /* 7: shll */
+ var42.i = ((orc_uint32) var38.i) << 16;
+ /* 8: addl */
+ var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i);
+ /* 9: shrul */
+ var44.i = ((orc_uint32) var43.i) >> 16;
+ /* 10: convsuslw */
+ var37.i = ORC_CLAMP_UW (var44.i);
+ /* 11: storew */
+ ptr0[i] = var37;
+ }
+ }
+
+}
+
+#else
+static void
+_backup_compositor_orc_blend_u16 (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int j;
+ int n = ex->n;
+ int m = ex->params[ORC_VAR_A1];
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var34;
+ orc_union16 var35;
+ orc_union32 var36;
+ orc_union16 var37;
+ orc_union32 var38;
+ orc_union32 var39;
+ orc_union32 var40;
+ orc_union32 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
+ ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
+
+ /* 5: loadpl */
+ var36.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var34 = ptr0[i];
+ /* 1: convuwl */
+ var38.i = (orc_uint16) var34.i;
+ /* 2: loadw */
+ var35 = ptr4[i];
+ /* 3: convuwl */
+ var39.i = (orc_uint16) var35.i;
+ /* 4: subl */
+ var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i);
+ /* 6: mulll */
+ var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff;
+ /* 7: shll */
+ var42.i = ((orc_uint32) var38.i) << 16;
+ /* 8: addl */
+ var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i);
+ /* 9: shrul */
+ var44.i = ((orc_uint32) var43.i) >> 16;
+ /* 10: convsuslw */
+ var37.i = ORC_CLAMP_UW (var44.i);
+ /* 11: storew */
+ ptr0[i] = var37;
+ }
+ }
+
+}
+
+void
+compositor_orc_blend_u16 (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 7, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
+ 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 54, 11, 2, 2, 12,
+ 2, 2, 14, 1, 16, 0, 0, 0, 16, 2, 20, 4, 20, 4, 154, 32,
+ 0, 154, 33, 4, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16,
+ 103, 33, 32, 33, 126, 33, 33, 16, 166, 0, 33, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p, _backup_compositor_orc_blend_u16);
+#else
+ p = orc_program_new ();
+ orc_program_set_2d (p);
+ orc_program_set_name (p, "compositor_orc_blend_u16");
+ orc_program_set_backup_function (p, _backup_compositor_orc_blend_u16);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_source (p, 2, "s1");
+ orc_program_add_constant (p, 1, 0x00000010, "c1");
+ orc_program_add_parameter (p, 2, "p1");
+ orc_program_add_temporary (p, 4, "t1");
+ orc_program_add_temporary (p, 4, "t2");
+
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_D1, ORC_VAR_T2,
+ ORC_VAR_D1, ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ORC_EXECUTOR_M (ex) = m;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_D1] = d1_stride;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ ex->params[ORC_VAR_S1] = s1_stride;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* compositor_orc_blend_u10_swap */
+#ifdef DISABLE_ORC
+void
+compositor_orc_blend_u10_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ int i;
+ int j;
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var35;
+ orc_union16 var36;
+ orc_union32 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union32 var40;
+ orc_union16 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+ orc_union32 var45;
+ orc_union32 var46;
+ orc_union32 var47;
+ orc_union16 var48;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
+ ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
+
+ /* 7: loadpl */
+ var37.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var35 = ptr0[i];
+ /* 1: swapw */
+ var39.i = ORC_SWAP_W (var35.i);
+ /* 2: convuwl */
+ var40.i = (orc_uint16) var39.i;
+ /* 3: loadw */
+ var36 = ptr4[i];
+ /* 4: swapw */
+ var41.i = ORC_SWAP_W (var36.i);
+ /* 5: convuwl */
+ var42.i = (orc_uint16) var41.i;
+ /* 6: subl */
+ var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i);
+ /* 8: mulll */
+ var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff;
+ /* 9: shll */
+ var45.i = ((orc_uint32) var40.i) << 10;
+ /* 10: addl */
+ var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i);
+ /* 11: shrul */
+ var47.i = ((orc_uint32) var46.i) >> 10;
+ /* 12: convsuslw */
+ var48.i = ORC_CLAMP_UW (var47.i);
+ /* 13: swapw */
+ var38.i = ORC_SWAP_W (var48.i);
+ /* 14: storew */
+ ptr0[i] = var38;
+ }
+ }
+
+}
+
+#else
+static void
+_backup_compositor_orc_blend_u10_swap (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int j;
+ int n = ex->n;
+ int m = ex->params[ORC_VAR_A1];
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var35;
+ orc_union16 var36;
+ orc_union32 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union32 var40;
+ orc_union16 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+ orc_union32 var45;
+ orc_union32 var46;
+ orc_union32 var47;
+ orc_union16 var48;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
+ ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
+
+ /* 7: loadpl */
+ var37.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var35 = ptr0[i];
+ /* 1: swapw */
+ var39.i = ORC_SWAP_W (var35.i);
+ /* 2: convuwl */
+ var40.i = (orc_uint16) var39.i;
+ /* 3: loadw */
+ var36 = ptr4[i];
+ /* 4: swapw */
+ var41.i = ORC_SWAP_W (var36.i);
+ /* 5: convuwl */
+ var42.i = (orc_uint16) var41.i;
+ /* 6: subl */
+ var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i);
+ /* 8: mulll */
+ var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff;
+ /* 9: shll */
+ var45.i = ((orc_uint32) var40.i) << 10;
+ /* 10: addl */
+ var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i);
+ /* 11: shrul */
+ var47.i = ((orc_uint32) var46.i) >> 10;
+ /* 12: convsuslw */
+ var48.i = ORC_CLAMP_UW (var47.i);
+ /* 13: swapw */
+ var38.i = ORC_SWAP_W (var48.i);
+ /* 14: storew */
+ ptr0[i] = var38;
+ }
+ }
+
+}
+
+void
+compositor_orc_blend_u10_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 7, 9, 29, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
+ 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 48, 95, 115, 119, 97,
+ 112, 11, 2, 2, 12, 2, 2, 14, 1, 10, 0, 0, 0, 16, 2, 20,
+ 4, 20, 4, 20, 2, 183, 34, 0, 154, 32, 34, 183, 34, 4, 154, 33,
+ 34, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, 103, 33, 32,
+ 33, 126, 33, 33, 16, 166, 34, 33, 183, 0, 34, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_compositor_orc_blend_u10_swap);
+#else
+ p = orc_program_new ();
+ orc_program_set_2d (p);
+ orc_program_set_name (p, "compositor_orc_blend_u10_swap");
+ orc_program_set_backup_function (p,
+ _backup_compositor_orc_blend_u10_swap);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_source (p, 2, "s1");
+ orc_program_add_constant (p, 1, 0x0000000a, "c1");
+ orc_program_add_parameter (p, 2, "p1");
+ orc_program_add_temporary (p, 4, "t1");
+ orc_program_add_temporary (p, 4, "t2");
+ orc_program_add_temporary (p, 2, "t3");
+
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_T3, ORC_VAR_T2,
+ ORC_VAR_D1, ORC_VAR_D1);
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ORC_EXECUTOR_M (ex) = m;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_D1] = d1_stride;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ ex->params[ORC_VAR_S1] = s1_stride;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* compositor_orc_blend_u12_swap */
+#ifdef DISABLE_ORC
+void
+compositor_orc_blend_u12_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ int i;
+ int j;
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var35;
+ orc_union16 var36;
+ orc_union32 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union32 var40;
+ orc_union16 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+ orc_union32 var45;
+ orc_union32 var46;
+ orc_union32 var47;
+ orc_union16 var48;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
+ ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
+
+ /* 7: loadpl */
+ var37.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var35 = ptr0[i];
+ /* 1: swapw */
+ var39.i = ORC_SWAP_W (var35.i);
+ /* 2: convuwl */
+ var40.i = (orc_uint16) var39.i;
+ /* 3: loadw */
+ var36 = ptr4[i];
+ /* 4: swapw */
+ var41.i = ORC_SWAP_W (var36.i);
+ /* 5: convuwl */
+ var42.i = (orc_uint16) var41.i;
+ /* 6: subl */
+ var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i);
+ /* 8: mulll */
+ var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff;
+ /* 9: shll */
+ var45.i = ((orc_uint32) var40.i) << 12;
+ /* 10: addl */
+ var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i);
+ /* 11: shrul */
+ var47.i = ((orc_uint32) var46.i) >> 12;
+ /* 12: convsuslw */
+ var48.i = ORC_CLAMP_UW (var47.i);
+ /* 13: swapw */
+ var38.i = ORC_SWAP_W (var48.i);
+ /* 14: storew */
+ ptr0[i] = var38;
+ }
+ }
+
+}
+
+#else
+static void
+_backup_compositor_orc_blend_u12_swap (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int j;
+ int n = ex->n;
+ int m = ex->params[ORC_VAR_A1];
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var35;
+ orc_union16 var36;
+ orc_union32 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union32 var40;
+ orc_union16 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+ orc_union32 var45;
+ orc_union32 var46;
+ orc_union32 var47;
+ orc_union16 var48;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
+ ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
+
+ /* 7: loadpl */
+ var37.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var35 = ptr0[i];
+ /* 1: swapw */
+ var39.i = ORC_SWAP_W (var35.i);
+ /* 2: convuwl */
+ var40.i = (orc_uint16) var39.i;
+ /* 3: loadw */
+ var36 = ptr4[i];
+ /* 4: swapw */
+ var41.i = ORC_SWAP_W (var36.i);
+ /* 5: convuwl */
+ var42.i = (orc_uint16) var41.i;
+ /* 6: subl */
+ var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i);
+ /* 8: mulll */
+ var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff;
+ /* 9: shll */
+ var45.i = ((orc_uint32) var40.i) << 12;
+ /* 10: addl */
+ var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i);
+ /* 11: shrul */
+ var47.i = ((orc_uint32) var46.i) >> 12;
+ /* 12: convsuslw */
+ var48.i = ORC_CLAMP_UW (var47.i);
+ /* 13: swapw */
+ var38.i = ORC_SWAP_W (var48.i);
+ /* 14: storew */
+ ptr0[i] = var38;
+ }
+ }
+
+}
+
+void
+compositor_orc_blend_u12_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 7, 9, 29, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
+ 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 50, 95, 115, 119, 97,
+ 112, 11, 2, 2, 12, 2, 2, 14, 1, 12, 0, 0, 0, 16, 2, 20,
+ 4, 20, 4, 20, 2, 183, 34, 0, 154, 32, 34, 183, 34, 4, 154, 33,
+ 34, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, 103, 33, 32,
+ 33, 126, 33, 33, 16, 166, 34, 33, 183, 0, 34, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_compositor_orc_blend_u12_swap);
+#else
+ p = orc_program_new ();
+ orc_program_set_2d (p);
+ orc_program_set_name (p, "compositor_orc_blend_u12_swap");
+ orc_program_set_backup_function (p,
+ _backup_compositor_orc_blend_u12_swap);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_source (p, 2, "s1");
+ orc_program_add_constant (p, 1, 0x0000000c, "c1");
+ orc_program_add_parameter (p, 2, "p1");
+ orc_program_add_temporary (p, 4, "t1");
+ orc_program_add_temporary (p, 4, "t2");
+ orc_program_add_temporary (p, 2, "t3");
+
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_T3, ORC_VAR_T2,
+ ORC_VAR_D1, ORC_VAR_D1);
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ORC_EXECUTOR_M (ex) = m;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_D1] = d1_stride;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ ex->params[ORC_VAR_S1] = s1_stride;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
+/* compositor_orc_blend_u16_swap */
+#ifdef DISABLE_ORC
+void
+compositor_orc_blend_u16_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ int i;
+ int j;
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var35;
+ orc_union16 var36;
+ orc_union32 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union32 var40;
+ orc_union16 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+ orc_union32 var45;
+ orc_union32 var46;
+ orc_union32 var47;
+ orc_union16 var48;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
+ ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
+
+ /* 7: loadpl */
+ var37.i = p1;
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var35 = ptr0[i];
+ /* 1: swapw */
+ var39.i = ORC_SWAP_W (var35.i);
+ /* 2: convuwl */
+ var40.i = (orc_uint16) var39.i;
+ /* 3: loadw */
+ var36 = ptr4[i];
+ /* 4: swapw */
+ var41.i = ORC_SWAP_W (var36.i);
+ /* 5: convuwl */
+ var42.i = (orc_uint16) var41.i;
+ /* 6: subl */
+ var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i);
+ /* 8: mulll */
+ var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff;
+ /* 9: shll */
+ var45.i = ((orc_uint32) var40.i) << 16;
+ /* 10: addl */
+ var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i);
+ /* 11: shrul */
+ var47.i = ((orc_uint32) var46.i) >> 16;
+ /* 12: convsuslw */
+ var48.i = ORC_CLAMP_UW (var47.i);
+ /* 13: swapw */
+ var38.i = ORC_SWAP_W (var48.i);
+ /* 14: storew */
+ ptr0[i] = var38;
+ }
+ }
+
+}
+
+#else
+static void
+_backup_compositor_orc_blend_u16_swap (OrcExecutor * ORC_RESTRICT ex)
+{
+ int i;
+ int j;
+ int n = ex->n;
+ int m = ex->params[ORC_VAR_A1];
+ orc_union16 *ORC_RESTRICT ptr0;
+ const orc_union16 *ORC_RESTRICT ptr4;
+ orc_union16 var35;
+ orc_union16 var36;
+ orc_union32 var37;
+ orc_union16 var38;
+ orc_union16 var39;
+ orc_union32 var40;
+ orc_union16 var41;
+ orc_union32 var42;
+ orc_union32 var43;
+ orc_union32 var44;
+ orc_union32 var45;
+ orc_union32 var46;
+ orc_union32 var47;
+ orc_union16 var48;
+
+ for (j = 0; j < m; j++) {
+ ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
+ ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
+
+ /* 7: loadpl */
+ var37.i = ex->params[24];
+
+ for (i = 0; i < n; i++) {
+ /* 0: loadw */
+ var35 = ptr0[i];
+ /* 1: swapw */
+ var39.i = ORC_SWAP_W (var35.i);
+ /* 2: convuwl */
+ var40.i = (orc_uint16) var39.i;
+ /* 3: loadw */
+ var36 = ptr4[i];
+ /* 4: swapw */
+ var41.i = ORC_SWAP_W (var36.i);
+ /* 5: convuwl */
+ var42.i = (orc_uint16) var41.i;
+ /* 6: subl */
+ var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i);
+ /* 8: mulll */
+ var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff;
+ /* 9: shll */
+ var45.i = ((orc_uint32) var40.i) << 16;
+ /* 10: addl */
+ var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i);
+ /* 11: shrul */
+ var47.i = ((orc_uint32) var46.i) >> 16;
+ /* 12: convsuslw */
+ var48.i = ORC_CLAMP_UW (var47.i);
+ /* 13: swapw */
+ var38.i = ORC_SWAP_W (var48.i);
+ /* 14: storew */
+ ptr0[i] = var38;
+ }
+ }
+
+}
+
+void
+compositor_orc_blend_u16_swap (guint8 * ORC_RESTRICT d1, int d1_stride,
+ const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
+{
+ OrcExecutor _ex, *ex = &_ex;
+ static volatile int p_inited = 0;
+ static OrcCode *c = 0;
+ void (*func) (OrcExecutor *);
+
+ if (!p_inited) {
+ orc_once_mutex_lock ();
+ if (!p_inited) {
+ OrcProgram *p;
+
+#if 1
+ static const orc_uint8 bc[] = {
+ 1, 7, 9, 29, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111,
+ 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 54, 95, 115, 119, 97,
+ 112, 11, 2, 2, 12, 2, 2, 14, 1, 16, 0, 0, 0, 16, 2, 20,
+ 4, 20, 4, 20, 2, 183, 34, 0, 154, 32, 34, 183, 34, 4, 154, 33,
+ 34, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, 103, 33, 32,
+ 33, 126, 33, 33, 16, 166, 34, 33, 183, 0, 34, 2, 0,
+ };
+ p = orc_program_new_from_static_bytecode (bc);
+ orc_program_set_backup_function (p,
+ _backup_compositor_orc_blend_u16_swap);
+#else
+ p = orc_program_new ();
+ orc_program_set_2d (p);
+ orc_program_set_name (p, "compositor_orc_blend_u16_swap");
+ orc_program_set_backup_function (p,
+ _backup_compositor_orc_blend_u16_swap);
+ orc_program_add_destination (p, 2, "d1");
+ orc_program_add_source (p, 2, "s1");
+ orc_program_add_constant (p, 1, 0x00000010, "c1");
+ orc_program_add_parameter (p, 2, "p1");
+ orc_program_add_temporary (p, 4, "t1");
+ orc_program_add_temporary (p, 4, "t2");
+ orc_program_add_temporary (p, 2, "t3");
+
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_T3, ORC_VAR_T2,
+ ORC_VAR_D1, ORC_VAR_D1);
+ orc_program_append_2 (p, "swapw", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1,
+ ORC_VAR_D1);
+#endif
+
+ orc_program_compile (p);
+ c = orc_program_take_code (p);
+ orc_program_free (p);
+ }
+ p_inited = TRUE;
+ orc_once_mutex_unlock ();
+ }
+ ex->arrays[ORC_VAR_A2] = c;
+ ex->program = 0;
+
+ ex->n = n;
+ ORC_EXECUTOR_M (ex) = m;
+ ex->arrays[ORC_VAR_D1] = d1;
+ ex->params[ORC_VAR_D1] = d1_stride;
+ ex->arrays[ORC_VAR_S1] = (void *) s1;
+ ex->params[ORC_VAR_S1] = s1_stride;
+ ex->params[ORC_VAR_P1] = p1;
+
+ func = c->exec;
+ func (ex);
+}
+#endif
+
+
/* compositor_orc_blend_argb */
#ifdef DISABLE_ORC
void