From 05992323b6e41de10455927f68bf84505224aa53 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Fri, 13 Aug 2010 21:54:54 -0700 Subject: [PATCH] cog: Improvements in colorspace and scaler Add fast paths for YUV->YUV conversions and Orc code for all. Use Orc for horizontal resampling. --- ext/cog/cogvirtframe.c | 59 +----- ext/cog/gstcogcolorspace.c | 442 ++++++++++++++++++++++++++++++++++++++++ ext/cog/gstcogorc.orc | 495 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 947 insertions(+), 49 deletions(-) diff --git a/ext/cog/cogvirtframe.c b/ext/cog/cogvirtframe.c index f1faa8e..74336ca 100644 --- a/ext/cog/cogvirtframe.c +++ b/ext/cog/cogvirtframe.c @@ -630,19 +630,14 @@ cog_virt_frame_render_resample_horiz_1tap (CogFrame * frame, void *_dest, { uint8_t *dest = _dest; uint8_t *src; - int j; int n_src; int scale = frame->param1; - int acc; n_src = frame->virt_frame1->components[component].width; src = cog_virt_frame_get_line (frame->virt_frame1, component, i); - acc = 0; - for (j = 0; j < frame->components[component].width; j++) { - dest[j] = src[(acc >> 8)]; - acc += scale; - } + cogorc_resample_horiz_1tap (dest, src, 0, scale, + frame->components[component].width); } static void @@ -651,45 +646,14 @@ cog_virt_frame_render_resample_horiz_2tap (CogFrame * frame, void *_dest, { uint8_t *dest = _dest; uint8_t *src; - int j; int n_src; int scale = frame->param1; - int acc; n_src = frame->virt_frame1->components[component].width; src = cog_virt_frame_get_line (frame->virt_frame1, component, i); - acc = 0; - for (j = 0; j < frame->components[component].width - 2; j++) { - int src_i; - int y; - int z; - - src_i = acc >> 8; - y = acc & 255; - - z = 128; - z += (256 - y) * src[src_i + 0]; - z += y * src[src_i + 1]; - z >>= 8; - dest[j] = CLAMP (z, 0, 255); - acc += scale; - } - for (; j < frame->components[component].width; j++) { - int src_i; - int y; - int z; - - src_i = acc >> 8; - y = acc & 255; - - z = 128; - z += (256 - y) * src[CLAMP (src_i + 0, 0, n_src - 1)]; - z += y * src[CLAMP (src_i + 1, 0, n_src - 1)]; - z >>= 8; - dest[j] = CLAMP (z, 0, 255); - acc += scale; - } + cogorc_resample_horiz_2tap (dest, src, 0, scale, + frame->components[component].width); } static void @@ -712,8 +676,8 @@ cog_virt_frame_render_resample_horiz_4tap (CogFrame * frame, void *_dest, int y; int z; - src_i = acc >> 8; - y = acc & 255; + src_i = acc >> 16; + y = (acc >> 8) & 255; z = 32; z += cog_resample_table_4tap[y][0] * src[CLAMP (src_i - 1, 0, n_src - 1)]; @@ -729,8 +693,8 @@ cog_virt_frame_render_resample_horiz_4tap (CogFrame * frame, void *_dest, int y; int z; - src_i = acc >> 8; - y = acc & 255; + src_i = acc >> 16; + y = (acc >> 8) & 255; z = 32; z += cog_resample_table_4tap[y][0] * src[src_i - 1]; @@ -746,8 +710,8 @@ cog_virt_frame_render_resample_horiz_4tap (CogFrame * frame, void *_dest, int y; int z; - src_i = acc >> 8; - y = acc & 255; + src_i = acc >> 16; + y = (acc >> 8) & 255; z = 32; z += cog_resample_table_4tap[y][0] * src[CLAMP (src_i - 1, 0, n_src - 1)]; @@ -775,7 +739,7 @@ cog_virt_frame_new_horiz_resample (CogFrame * vf, int width, int n_taps) virt_frame->render_line = cog_virt_frame_render_resample_horiz_4tap; } - virt_frame->param1 = 256 * vf->width / width; + virt_frame->param1 = 65536 * vf->width / width; return virt_frame; } @@ -1429,7 +1393,6 @@ cog_virt_frame_new_color_matrix_YCbCr_to_RGB (CogFrame * vf, CogColorMatrix color_matrix, int bits) { CogFrame *virt_frame; - //int *matrix = frame->virt_priv2; virt_frame = cog_frame_new_virtual (NULL, COG_FRAME_FORMAT_U8_444, vf->width, vf->height); diff --git a/ext/cog/gstcogcolorspace.c b/ext/cog/gstcogcolorspace.c index bbd0109..96081f7 100644 --- a/ext/cog/gstcogcolorspace.c +++ b/ext/cog/gstcogcolorspace.c @@ -37,6 +37,7 @@ #include #include #include "gstcogutils.h" +#include "gstcogorc.h" #define GST_TYPE_COGCOLORSPACE \ (gst_cogcolorspace_get_type()) @@ -383,6 +384,425 @@ gst_cogcolorspace_caps_get_chroma_site (GstCaps * caps) return COG_CHROMA_SITE_MPEG2; } +static void +convert_I420_YUY2 (CogFrame * dest, CogFrame * src) +{ + int i; + + for (i = 0; i < dest->height; i += 2) { + cogorc_convert_I420_YUY2 (COG_FRAME_DATA_GET_LINE (dest->components + 0, i), + COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (src->components + 0, i), + COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1), + COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1), + (dest->width + 1) / 2); + } +} + +static void +convert_I420_UYVY (CogFrame * dest, CogFrame * src) +{ + int i; + + for (i = 0; i < dest->height; i += 2) { + cogorc_convert_I420_UYVY (COG_FRAME_DATA_GET_LINE (dest->components + 0, i), + COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (src->components + 0, i), + COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1), + COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1), + (dest->width + 1) / 2); + } +} + +static void +convert_I420_AYUV (CogFrame * dest, CogFrame * src) +{ + int i; + + for (i = 0; i < dest->height; i += 2) { + cogorc_convert_I420_AYUV (COG_FRAME_DATA_GET_LINE (dest->components + 0, i), + COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (src->components + 0, i), + COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1), + COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1), dest->width); + } +} + +static void +convert_I420_Y42B (CogFrame * dest, CogFrame * src) +{ + cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride, + src->components[0].data, src->components[0].stride, + dest->width, dest->height); + + cogorc_planar_chroma_420_422 (dest->components[1].data, + 2 * dest->components[1].stride, + COG_FRAME_DATA_GET_LINE (dest->components + 2, 1), + 2 * dest->components[1].stride, src->components[1].data, + src->components[1].stride, (dest->width + 1) / 2, dest->height / 2); + + cogorc_planar_chroma_420_422 (dest->components[2].data, + 2 * dest->components[2].stride, + COG_FRAME_DATA_GET_LINE (dest->components + 2, 1), + 2 * dest->components[2].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, dest->height / 2); +} + +static void +convert_I420_Y444 (CogFrame * dest, CogFrame * src) +{ + cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride, + src->components[0].data, src->components[0].stride, + dest->width, dest->height); + + cogorc_planar_chroma_420_444 (dest->components[1].data, + 2 * dest->components[1].stride, + COG_FRAME_DATA_GET_LINE (dest->components + 1, 1), + 2 * dest->components[1].stride, src->components[1].data, + src->components[1].stride, (dest->width + 1) / 2, (dest->height + 1) / 2); + + cogorc_planar_chroma_420_444 (dest->components[2].data, + 2 * dest->components[2].stride, + COG_FRAME_DATA_GET_LINE (dest->components + 2, 1), + 2 * dest->components[2].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, (dest->height + 1) / 2); +} + +static void +convert_YUY2_I420 (CogFrame * dest, CogFrame * src) +{ + int i; + + for (i = 0; i < dest->height; i += 2) { + cogorc_convert_YUY2_I420 (COG_FRAME_DATA_GET_LINE (dest->components + 0, i), + COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (dest->components + 1, i >> 1), + COG_FRAME_DATA_GET_LINE (dest->components + 2, i >> 1), + COG_FRAME_DATA_GET_LINE (src->components + 0, i), + COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1), + (dest->width + 1) / 2); + } +} + +static void +convert_YUY2_AYUV (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_YUY2_AYUV (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_YUY2_Y42B (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_YUY2_Y42B (dest->components[0].data, + dest->components[0].stride, dest->components[1].data, + dest->components[1].stride, dest->components[2].data, + dest->components[2].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_YUY2_Y444 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_YUY2_Y444 (dest->components[0].data, + dest->components[0].stride, dest->components[1].data, + dest->components[1].stride, dest->components[2].data, + dest->components[2].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + + +static void +convert_UYVY_I420 (CogFrame * dest, CogFrame * src) +{ + int i; + + for (i = 0; i < dest->height; i += 2) { + cogorc_convert_UYVY_I420 (COG_FRAME_DATA_GET_LINE (dest->components + 0, i), + COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1), + COG_FRAME_DATA_GET_LINE (dest->components + 1, i >> 1), + COG_FRAME_DATA_GET_LINE (dest->components + 2, i >> 1), + COG_FRAME_DATA_GET_LINE (src->components + 0, i), + COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1), + (dest->width + 1) / 2); + } +} + +static void +convert_UYVY_AYUV (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_UYVY_AYUV (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_UYVY_YUY2 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_UYVY_YUY2 (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_UYVY_Y42B (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_UYVY_Y42B (dest->components[0].data, + dest->components[0].stride, dest->components[1].data, + dest->components[1].stride, dest->components[2].data, + dest->components[2].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_UYVY_Y444 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_UYVY_Y444 (dest->components[0].data, + dest->components[0].stride, dest->components[1].data, + dest->components[1].stride, dest->components[2].data, + dest->components[2].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_AYUV_I420 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_AYUV_I420 (COG_FRAME_DATA_GET_LINE (dest->components + 0, 0), + 2 * dest->components[0].stride, + COG_FRAME_DATA_GET_LINE (dest->components + 0, 1), + 2 * dest->components[0].stride, + dest->components[1].data, dest->components[1].stride, + dest->components[2].data, dest->components[2].stride, + COG_FRAME_DATA_GET_LINE (src->components + 0, 0), + /* FIXME why not 2* ? */ + src->components[0].stride, + COG_FRAME_DATA_GET_LINE (src->components + 0, 1), + src->components[0].stride, dest->width / 2, dest->height / 2); +} + +static void +convert_AYUV_YUY2 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_AYUV_YUY2 (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, dest->width / 2, dest->height); +} + +static void +convert_AYUV_UYVY (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_AYUV_UYVY (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, dest->width / 2, dest->height); +} + +static void +convert_AYUV_Y42B (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_AYUV_Y42B (dest->components[0].data, + dest->components[0].stride, dest->components[1].data, + dest->components[1].stride, dest->components[2].data, + dest->components[2].stride, src->components[0].data, + src->components[0].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_AYUV_Y444 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_AYUV_Y444 (dest->components[0].data, + dest->components[0].stride, dest->components[1].data, + dest->components[1].stride, dest->components[2].data, + dest->components[2].stride, src->components[0].data, + src->components[0].stride, dest->width, dest->height); +} + +static void +convert_Y42B_I420 (CogFrame * dest, CogFrame * src) +{ + cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride, + src->components[0].data, src->components[0].stride, + dest->width, dest->height); + + cogorc_planar_chroma_422_420 (dest->components[1].data, + dest->components[1].stride, src->components[1].data, + 2 * src->components[1].stride, + COG_FRAME_DATA_GET_LINE (src->components + 1, 1), + 2 * src->components[1].stride, (dest->width + 1) / 2, + (dest->height + 1) / 2); + + cogorc_planar_chroma_422_420 (dest->components[2].data, + dest->components[2].stride, src->components[2].data, + 2 * src->components[2].stride, + COG_FRAME_DATA_GET_LINE (src->components + 2, 1), + 2 * src->components[2].stride, (dest->width + 1) / 2, + (dest->height + 1) / 2); +} + +static void +convert_Y42B_Y444 (CogFrame * dest, CogFrame * src) +{ + cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride, + src->components[0].data, src->components[0].stride, + dest->width, dest->height); + + cogorc_planar_chroma_422_444 (dest->components[1].data, + dest->components[1].stride, src->components[1].data, + src->components[1].stride, (dest->width + 1) / 2, dest->height); + + cogorc_planar_chroma_422_444 (dest->components[2].data, + dest->components[2].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_Y42B_YUY2 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_Y42B_YUY2 (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, src->components[1].data, + src->components[1].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_Y42B_UYVY (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_Y42B_UYVY (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, src->components[1].data, + src->components[1].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_Y42B_AYUV (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_Y42B_AYUV (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, src->components[1].data, + src->components[1].stride, src->components[2].data, + src->components[2].stride, (dest->width) / 2, dest->height); +} + +static void +convert_Y444_I420 (CogFrame * dest, CogFrame * src) +{ + cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride, + src->components[0].data, src->components[0].stride, + dest->width, dest->height); + + cogorc_planar_chroma_444_420 (dest->components[1].data, + dest->components[1].stride, src->components[1].data, + 2 * src->components[1].stride, + COG_FRAME_DATA_GET_LINE (src->components + 1, 1), + 2 * src->components[1].stride, (dest->width + 1) / 2, + (dest->height + 1) / 2); + + cogorc_planar_chroma_444_420 (dest->components[2].data, + dest->components[2].stride, src->components[2].data, + 2 * src->components[2].stride, + COG_FRAME_DATA_GET_LINE (src->components + 2, 1), + 2 * src->components[2].stride, (dest->width + 1) / 2, + (dest->height + 1) / 2); +} + +static void +convert_Y444_Y42B (CogFrame * dest, CogFrame * src) +{ + cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride, + src->components[0].data, src->components[0].stride, + dest->width, dest->height); + + cogorc_planar_chroma_444_422 (dest->components[1].data, + dest->components[1].stride, src->components[1].data, + src->components[1].stride, (dest->width + 1) / 2, dest->height); + + cogorc_planar_chroma_444_422 (dest->components[2].data, + dest->components[2].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_Y444_YUY2 (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_Y444_YUY2 (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, src->components[1].data, + src->components[1].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_Y444_UYVY (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_Y444_UYVY (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, src->components[1].data, + src->components[1].stride, src->components[2].data, + src->components[2].stride, (dest->width + 1) / 2, dest->height); +} + +static void +convert_Y444_AYUV (CogFrame * dest, CogFrame * src) +{ + cogorc_convert_Y444_AYUV (dest->components[0].data, + dest->components[0].stride, src->components[0].data, + src->components[0].stride, src->components[1].data, + src->components[1].stride, src->components[2].data, + src->components[2].stride, dest->width, dest->height); +} + + + + +typedef struct +{ + uint32_t in_format; + uint32_t out_format; + void (*convert) (CogFrame * dest, CogFrame * src); +} CogColorspaceTransform; +static CogColorspaceTransform transforms[] = { + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_YUY2, convert_I420_YUY2}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_UYVY, convert_I420_UYVY}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_AYUV, convert_I420_AYUV}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_Y42B, convert_I420_Y42B}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_Y444, convert_I420_Y444}, + + {GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_I420, convert_YUY2_I420}, + {GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_UYVY, convert_UYVY_YUY2}, /* alias */ + {GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_AYUV, convert_YUY2_AYUV}, + {GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_Y42B, convert_YUY2_Y42B}, + {GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_Y444, convert_YUY2_Y444}, + + {GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_I420, convert_UYVY_I420}, + {GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_YUY2, convert_UYVY_YUY2}, + {GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_AYUV, convert_UYVY_AYUV}, + {GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_Y42B, convert_UYVY_Y42B}, + {GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_Y444, convert_UYVY_Y444}, + + {GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_I420, convert_AYUV_I420}, + {GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_YUY2, convert_AYUV_YUY2}, + {GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_UYVY, convert_AYUV_UYVY}, + {GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_Y42B, convert_AYUV_Y42B}, + {GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_Y444, convert_AYUV_Y444}, + + {GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_I420, convert_Y42B_I420}, + {GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_YUY2, convert_Y42B_YUY2}, + {GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_UYVY, convert_Y42B_UYVY}, + {GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_AYUV, convert_Y42B_AYUV}, + {GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_Y444, convert_Y42B_Y444}, + + {GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_I420, convert_Y444_I420}, + {GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_YUY2, convert_Y444_YUY2}, + {GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_UYVY, convert_Y444_UYVY}, + {GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_AYUV, convert_Y444_AYUV}, + {GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_Y42B, convert_Y444_Y42B}, +}; + static GstFlowReturn gst_cogcolorspace_transform (GstBaseTransform * base_transform, GstBuffer * inbuf, GstBuffer * outbuf) @@ -421,6 +841,28 @@ gst_cogcolorspace_transform (GstBaseTransform * base_transform, out_frame = gst_cog_buffer_wrap (gst_buffer_ref (outbuf), out_format, width, height); + if (in_format == out_format) { + memcpy (GST_BUFFER_DATA (outbuf), GST_BUFFER_DATA (inbuf), + GST_BUFFER_SIZE (outbuf)); + } + + { + int i; + + for (i = 0; i < sizeof (transforms) / sizeof (transforms[0]); i++) { + if (transforms[i].in_format == in_format && + transforms[i].out_format == out_format) { + transforms[i].convert (out_frame, frame); + cog_frame_unref (frame); + cog_frame_unref (out_frame); + + return GST_FLOW_OK; + } + } + + GST_ERROR ("no match"); + } + switch (out_format) { case GST_VIDEO_FORMAT_YUY2: case GST_VIDEO_FORMAT_UYVY: diff --git a/ext/cog/gstcogorc.orc b/ext/cog/gstcogorc.orc index 445a798..e125e43 100644 --- a/ext/cog/gstcogorc.orc +++ b/ext/cog/gstcogorc.orc @@ -1,4 +1,11 @@ +.function cogorc_memcpy_2d +.flags 2d +.dest 1 d1 +.source 1 s1 + +copyb d1, s1 + .function cogorc_downsample_horiz_cosite_1tap .dest 1 d1 @@ -492,7 +499,7 @@ convubw t2, s2 mullw t2, t2, p2 addw t1, t1, t2 shruw t1, t1, 8 -convuuswb d1, t1 +convsuswb d1, t1 .function cogorc_combine4_u8 @@ -560,3 +567,489 @@ select1lw t1, s1 select1wb d1, t1 +.function cogorc_resample_horiz_1tap +.dest 1 d1 +.source 1 s1 +.param 4 p1 +.param 4 p2 + +ldresnearb d1, s1, p1, p2 + + +.function cogorc_resample_horiz_2tap +.dest 1 d1 +.source 1 s1 +.param 4 p1 +.param 4 p2 + +ldreslinb d1, s1, p1, p2 + + +.function cogorc_convert_I420_UYVY +.dest 4 d1 +.dest 4 d2 +.source 2 y1 +.source 2 y2 +.source 1 u +.source 1 v +.temp 2 uv + +mergebw uv, u, v +x2 mergebw d1, uv, y1 +x2 mergebw d2, uv, y2 + + +.function cogorc_convert_I420_YUY2 +.dest 4 d1 +.dest 4 d2 +.source 2 y1 +.source 2 y2 +.source 1 u +.source 1 v +.temp 2 uv + +mergebw uv, u, v +x2 mergebw d1, y1, uv +x2 mergebw d2, y2, uv + + + +.function cogorc_convert_I420_AYUV +.dest 4 d1 +.dest 4 d2 +.source 1 y1 +.source 1 y2 +.source 1 u +.source 1 v +.const 1 c255 255 +.temp 2 uv +.temp 2 ay +.temp 1 tu +.temp 1 tv + +loadupdb tu, u +loadupdb tv, v +mergebw uv, tu, tv +mergebw ay, c255, y1 +mergewl d1, ay, uv +mergebw ay, c255, y2 +mergewl d2, ay, uv + + +.function cogorc_convert_YUY2_I420 +.dest 2 y1 +.dest 2 y2 +.dest 1 u +.dest 1 v +.source 4 yuv1 +.source 4 yuv2 +.temp 2 t1 +.temp 2 t2 +.temp 2 ty + +x2 splitwb t1, ty, yuv1 +storew y1, ty +x2 splitwb t2, ty, yuv2 +storew y2, ty +x2 avgub t1, t1, t2 +splitwb v, u, t1 + + +.function cogorc_convert_UYVY_YUY2 +.flags 2d +.dest 4 yuy2 +.source 4 uyvy + +x2 swapw yuy2, uyvy + + +.function cogorc_planar_chroma_420_422 +.flags 2d +.dest 1 d1 +.dest 1 d2 +.source 1 s + +copyb d1, s +copyb d2, s + + +.function cogorc_planar_chroma_420_444 +.flags 2d +.dest 2 d1 +.dest 2 d2 +.source 1 s +.temp 2 t + +splatbw t, s +storew d1, t +storew d2, t + + +.function cogorc_planar_chroma_422_444 +.flags 2d +.dest 2 d1 +.source 1 s +.temp 2 t + +splatbw t, s +storew d1, t + + +.function cogorc_planar_chroma_444_422 +.flags 2d +.dest 1 d +.source 2 s +.temp 1 t1 +.temp 1 t2 + +splitwb t1, t2, s +avgub d, t1, t2 + + +.function cogorc_planar_chroma_444_420 +.flags 2d +.dest 1 d +.source 2 s1 +.source 2 s2 +.temp 2 t +.temp 1 t1 +.temp 1 t2 + +x2 avgub t, s1, s2 +splitwb t1, t2, t +avgub d, t1, t2 + + +.function cogorc_planar_chroma_422_420 +.flags 2d +.dest 1 d +.source 1 s1 +.source 1 s2 + +avgub d, s1, s2 + + +.function cogorc_convert_YUY2_AYUV +.flags 2d +.dest 8 ayuv +.source 4 yuy2 +.const 2 c255 0xff +.temp 2 yy +.temp 2 uv +.temp 4 ayay +.temp 4 uvuv + +x2 splitwb uv, yy, yuy2 +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_convert_UYVY_AYUV +.flags 2d +.dest 8 ayuv +.source 4 uyvy +.const 2 c255 0xff +.temp 2 yy +.temp 2 uv +.temp 4 ayay +.temp 4 uvuv + +x2 splitwb yy, uv, uyvy +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_convert_YUY2_Y42B +.flags 2d +.dest 2 y +.dest 1 u +.dest 1 v +.source 4 yuy2 +.temp 2 uv + +x2 splitwb uv, y, yuy2 +splitwb v, u, uv + + +.function cogorc_convert_UYVY_Y42B +.flags 2d +.dest 2 y +.dest 1 u +.dest 1 v +.source 4 uyvy +.temp 2 uv + +x2 splitwb y, uv, uyvy +splitwb v, u, uv + + +.function cogorc_convert_YUY2_Y444 +.flags 2d +.dest 2 y +.dest 2 uu +.dest 2 vv +.source 4 yuy2 +.temp 2 uv +.temp 1 u +.temp 1 v + +x2 splitwb uv, y, yuy2 +splitwb v, u, uv +splatbw uu, u +splatbw vv, v + + +.function cogorc_convert_UYVY_Y444 +.flags 2d +.dest 2 y +.dest 2 uu +.dest 2 vv +.source 4 uyvy +.temp 2 uv +.temp 1 u +.temp 1 v + +x2 splitwb y, uv, uyvy +splitwb v, u, uv +splatbw uu, u +splatbw vv, v + + +.function cogorc_convert_UYVY_I420 +.dest 2 y1 +.dest 2 y2 +.dest 1 u +.dest 1 v +.source 4 yuv1 +.source 4 yuv2 +.temp 2 t1 +.temp 2 t2 +.temp 2 ty + +x2 splitwb ty, t1, yuv1 +storew y1, ty +x2 splitwb ty, t2, yuv2 +storew y2, ty +x2 avgub t1, t1, t2 +splitwb v, u, t1 + + + +.function cogorc_convert_AYUV_I420 +.flags 2d +.dest 2 y1 +.dest 2 y2 +.dest 1 u +.dest 1 v +.source 8 ayuv1 +.source 8 ayuv2 +.temp 4 ay +.temp 4 uv1 +.temp 4 uv2 +.temp 4 uv +.temp 2 uu +.temp 2 vv +.temp 1 t1 +.temp 1 t2 + +x2 splitlw uv1, ay, ayuv1 +x2 select1wb y1, ay +x2 splitlw uv2, ay, ayuv2 +x2 select1wb y2, ay +x4 avgub uv, uv1, uv2 +x2 splitwb vv, uu, uv +splitwb t1, t2, uu +avgub u, t1, t2 +splitwb t1, t2, vv +avgub v, t1, t2 + + + +.function cogorc_convert_AYUV_YUY2 +.flags 2d +.dest 4 yuy2 +.source 8 ayuv +.temp 2 yy +.temp 2 uv1 +.temp 2 uv2 +.temp 4 ayay +.temp 4 uvuv + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +x2 select1wb yy, ayay +x2 mergebw yuy2, yy, uv1 + + +.function cogorc_convert_AYUV_UYVY +.flags 2d +.dest 4 yuy2 +.source 8 ayuv +.temp 2 yy +.temp 2 uv1 +.temp 2 uv2 +.temp 4 ayay +.temp 4 uvuv + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +x2 select1wb yy, ayay +x2 mergebw yuy2, uv1, yy + + + +.function cogorc_convert_AYUV_Y42B +.flags 2d +.dest 2 y +.dest 1 u +.dest 1 v +.source 8 ayuv +.temp 4 ayay +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 splitlw uvuv, ayay, ayuv +splitlw uv1, uv2, uvuv +x2 avgub uv1, uv1, uv2 +splitwb v, u, uv1 +x2 select1wb y, ayay + + +.function cogorc_convert_AYUV_Y444 +.flags 2d +.dest 1 y +.dest 1 u +.dest 1 v +.source 4 ayuv +.temp 2 ay +.temp 2 uv + +splitlw uv, ay, ayuv +splitwb v, u, uv +select1wb y, ay + + +.function cogorc_convert_Y42B_YUY2 +.flags 2d +.dest 4 yuy2 +.source 2 y +.source 1 u +.source 1 v +.temp 2 uv + +mergebw uv, u, v +x2 mergebw yuy2, y, uv + + +.function cogorc_convert_Y42B_UYVY +.flags 2d +.dest 4 uyvy +.source 2 y +.source 1 u +.source 1 v +.temp 2 uv + +mergebw uv, u, v +x2 mergebw uyvy, uv, y + + +.function cogorc_convert_Y42B_AYUV +.flags 2d +.dest 8 ayuv +.source 2 yy +.source 1 u +.source 1 v +.const 1 c255 255 +.temp 2 uv +.temp 2 ay +.temp 4 uvuv +.temp 4 ayay + +mergebw uv, u, v +x2 mergebw ayay, c255, yy +mergewl uvuv, uv, uv +x2 mergewl ayuv, ayay, uvuv + + +.function cogorc_convert_Y444_YUY2 +.flags 2d +.dest 4 yuy2 +.source 2 y +.source 2 u +.source 2 v +.temp 2 uv +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 mergebw uvuv, u, v +splitlw uv1, uv2, uvuv +x2 avgub uv, uv1, uv2 +x2 mergebw yuy2, y, uv + + +.function cogorc_convert_Y444_UYVY +.flags 2d +.dest 4 uyvy +.source 2 y +.source 2 u +.source 2 v +.temp 2 uv +.temp 4 uvuv +.temp 2 uv1 +.temp 2 uv2 + +x2 mergebw uvuv, u, v +splitlw uv1, uv2, uvuv +x2 avgub uv, uv1, uv2 +x2 mergebw uyvy, uv, y + + +.function cogorc_convert_Y444_AYUV +.flags 2d +.dest 4 ayuv +.source 1 yy +.source 1 u +.source 1 v +.const 1 c255 255 +.temp 2 uv +.temp 2 ay + +mergebw uv, u, v +mergebw ay, c255, yy +mergewl ayuv, ay, uv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.7.4