From dbfd5a5af85c9ca0a4852eaf0850e027cce99849 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Sun, 6 Jun 2010 23:47:16 -0700 Subject: [PATCH] videoscale: convert from liboil to orc --- gst/videoscale/Makefile.am | 11 +- gst/videoscale/gstvideoscale.c | 3 - gst/videoscale/gstvideoscaleorc-dist.c | 209 +++++++++++++++++++++++++++++++++ gst/videoscale/gstvideoscaleorc-dist.h | 20 ++++ gst/videoscale/gstvideoscaleorc.orc | 18 +++ gst/videoscale/vs_4tap.c | 1 - gst/videoscale/vs_4tap.h | 1 - gst/videoscale/vs_image.h | 2 +- gst/videoscale/vs_scanline.c | 67 +++++------ gst/videoscale/vs_scanline.h | 2 +- 10 files changed, 292 insertions(+), 42 deletions(-) create mode 100644 gst/videoscale/gstvideoscaleorc-dist.c create mode 100644 gst/videoscale/gstvideoscaleorc-dist.h create mode 100644 gst/videoscale/gstvideoscaleorc.orc diff --git a/gst/videoscale/Makefile.am b/gst/videoscale/Makefile.am index 359e238..2a3992b 100644 --- a/gst/videoscale/Makefile.am +++ b/gst/videoscale/Makefile.am @@ -1,16 +1,21 @@ plugin_LTLIBRARIES = libgstvideoscale.la +ORC_SOURCE=gstvideoscaleorc +include $(top_srcdir)/common/orc.mak + libgstvideoscale_la_SOURCES = \ gstvideoscale.c \ vs_image.c \ vs_scanline.c \ vs_4tap.c -libgstvideoscale_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) $(LIBOIL_CFLAGS) +nodist_libgstvideoscale_la_SOURCES = $(ORC_NODIST_SOURCES) + +libgstvideoscale_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) $(ORC_CFLAGS) libgstvideoscale_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) libgstvideoscale_la_LIBADD = \ $(top_builddir)/gst-libs/gst/video/libgstvideo-$(GST_MAJORMINOR).la \ - $(GST_BASE_LIBS) $(GST_LIBS) $(LIBOIL_LIBS) + $(GST_BASE_LIBS) $(GST_LIBS) $(ORC_LIBS) libgstvideoscale_la_LIBTOOLFLAGS = --tag=disable-static noinst_HEADERS = \ @@ -18,3 +23,5 @@ noinst_HEADERS = \ vs_image.h \ vs_scanline.h \ vs_4tap.h + + diff --git a/gst/videoscale/gstvideoscale.c b/gst/videoscale/gstvideoscale.c index 5ae058d..908224c 100644 --- a/gst/videoscale/gstvideoscale.c +++ b/gst/videoscale/gstvideoscale.c @@ -57,7 +57,6 @@ #include #include -#include #include "gstvideoscale.h" #include "vs_image.h" @@ -1380,8 +1379,6 @@ gst_video_scale_src_event (GstBaseTransform * trans, GstEvent * event) static gboolean plugin_init (GstPlugin * plugin) { - oil_init (); - if (!gst_element_register (plugin, "videoscale", GST_RANK_NONE, GST_TYPE_VIDEO_SCALE)) return FALSE; diff --git a/gst/videoscale/gstvideoscaleorc-dist.c b/gst/videoscale/gstvideoscaleorc-dist.c new file mode 100644 index 0000000..6c8da38 --- /dev/null +++ b/gst/videoscale/gstvideoscaleorc-dist.c @@ -0,0 +1,209 @@ + +/* autogenerated from gstvideoscaleorc.orc */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#ifndef DISABLE_ORC +#include +#else +#include +#endif +#include + +void orc_merge_linear_u8 (uint8_t * d1, const uint8_t * s1, const uint8_t * s2, + int p1, int p2, int n); + + +/* begin Orc C target preamble */ +#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) +#define ORC_ABS(a) ((a)<0 ? -(a) : (a)) +#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b)) +#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b)) +#define ORC_SB_MAX 127 +#define ORC_SB_MIN (-1-ORC_SB_MAX) +#define ORC_UB_MAX 255 +#define ORC_UB_MIN 0 +#define ORC_SW_MAX 32767 +#define ORC_SW_MIN (-1-ORC_SW_MAX) +#define ORC_UW_MAX 65535 +#define ORC_UW_MIN 0 +#define ORC_SL_MAX 2147483647 +#define ORC_SL_MIN (-1-ORC_SL_MAX) +#define ORC_UL_MAX 4294967295U +#define ORC_UL_MIN 0 +#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX) +#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX) +#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX) +#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX) +#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX) +#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) +#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8)) +#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24)) +#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) +#define ORC_AS_FLOAT(x) (((union { int i; float f; } *)(&x))->f) +typedef union +{ + int32_t i; + float f; +} orc_union32; +typedef union +{ + int64_t i; + double f; +} orc_union64; +/* end Orc C target preamble */ + + + +/* orc_merge_linear_u8 */ +#ifdef DISABLE_ORC +void +orc_merge_linear_u8 (uint8_t * d1, const uint8_t * s1, const uint8_t * s2, + int p1, int p2, int n) +{ + int i; + int8_t var0; + int8_t *ptr0; + int8_t var4; + const int8_t *ptr4; + int8_t var5; + const int8_t *ptr5; + const int16_t var16 = 128; + const int16_t var17 = 8; + const int8_t var24 = p1; + const int8_t var25 = p2; + int16_t var32; + int16_t var33; + int16_t var34; + int16_t var35; + int16_t var36; + + ptr0 = (int8_t *) d1; + ptr4 = (int8_t *) s1; + ptr5 = (int8_t *) s2; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + var5 = *ptr5; + ptr5++; + /* 0: mulubw */ + var32 = (uint8_t) var4 *(uint8_t) var24; + /* 1: mulubw */ + var33 = (uint8_t) var5 *(uint8_t) var25; + /* 2: addw */ + var34 = var32 + var33; + /* 3: addw */ + var35 = var34 + var16; + /* 4: shruw */ + var36 = ((uint16_t) var35) >> var17; + /* 5: convwb */ + var0 = var36; + *ptr0 = var0; + ptr0++; + } + +} + +#else +static void +_backup_orc_merge_linear_u8 (OrcExecutor * ex) +{ + int i; + int n = ex->n; + int8_t var0; + int8_t *ptr0; + int8_t var4; + const int8_t *ptr4; + int8_t var5; + const int8_t *ptr5; + const int16_t var16 = 128; + const int16_t var17 = 8; + const int8_t var24 = ex->params[24]; + const int8_t var25 = ex->params[25]; + int16_t var32; + int16_t var33; + int16_t var34; + int16_t var35; + int16_t var36; + + ptr0 = (int8_t *) ex->arrays[0]; + ptr4 = (int8_t *) ex->arrays[4]; + ptr5 = (int8_t *) ex->arrays[5]; + + for (i = 0; i < n; i++) { + var4 = *ptr4; + ptr4++; + var5 = *ptr5; + ptr5++; + /* 0: mulubw */ + var32 = (uint8_t) var4 *(uint8_t) var24; + /* 1: mulubw */ + var33 = (uint8_t) var5 *(uint8_t) var25; + /* 2: addw */ + var34 = var32 + var33; + /* 3: addw */ + var35 = var34 + var16; + /* 4: shruw */ + var36 = ((uint16_t) var35) >> var17; + /* 5: convwb */ + var0 = var36; + *ptr0 = var0; + ptr0++; + } + +} + +void +orc_merge_linear_u8 (uint8_t * d1, const uint8_t * s1, const uint8_t * s2, + int p1, int p2, int n) +{ + OrcExecutor _ex, *ex = &_ex; + static int p_inited = 0; + static OrcProgram *p = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcCompileResult result; + + p = orc_program_new (); + orc_program_set_name (p, "orc_merge_linear_u8"); + orc_program_set_backup_function (p, _backup_orc_merge_linear_u8); + orc_program_add_destination (p, 1, "d1"); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_constant (p, 2, 128, "c1"); + orc_program_add_constant (p, 2, 8, "c2"); + orc_program_add_parameter (p, 1, "p1"); + orc_program_add_parameter (p, 1, "p2"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + + orc_program_append (p, "mulubw", ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_P1); + orc_program_append (p, "mulubw", ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_P2); + orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2); + orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1); + orc_program_append (p, "shruw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2); + orc_program_append (p, "convwb", ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1); + + result = orc_program_compile (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->program = p; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->arrays[ORC_VAR_S2] = (void *) s2; + ex->params[ORC_VAR_P1] = p1; + ex->params[ORC_VAR_P2] = p2; + + func = p->code_exec; + func (ex); +} +#endif diff --git a/gst/videoscale/gstvideoscaleorc-dist.h b/gst/videoscale/gstvideoscaleorc-dist.h new file mode 100644 index 0000000..74b4a26 --- /dev/null +++ b/gst/videoscale/gstvideoscaleorc-dist.h @@ -0,0 +1,20 @@ + +/* autogenerated from gstvideoscaleorc.orc */ + +#ifndef _GSTVIDEOSCALEORC_H_ +#define _GSTVIDEOSCALEORC_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void orc_merge_linear_u8 (uint8_t * d1, const uint8_t * s1, const uint8_t * s2, int p1, int p2, int n); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/gst/videoscale/gstvideoscaleorc.orc b/gst/videoscale/gstvideoscaleorc.orc new file mode 100644 index 0000000..cc4f1c8 --- /dev/null +++ b/gst/videoscale/gstvideoscaleorc.orc @@ -0,0 +1,18 @@ + +.function orc_merge_linear_u8 +.dest 1 d1 +.source 1 s1 +.source 1 s2 +.param 1 p1 +.param 1 p2 +.temp 2 t1 +.temp 2 t2 + +mulubw t1, s1, p1 +mulubw t2, s2, p2 +addw t1, t1, t2 +addw t1, t1, 128 +shruw t1, t1, 8 +convwb d1, t1 + + diff --git a/gst/videoscale/vs_4tap.c b/gst/videoscale/vs_4tap.c index 8dc1044..87e16aa 100644 --- a/gst/videoscale/vs_4tap.c +++ b/gst/videoscale/vs_4tap.c @@ -31,7 +31,6 @@ #include "vs_4tap.h" -#include #include #define SHIFT 10 diff --git a/gst/videoscale/vs_4tap.h b/gst/videoscale/vs_4tap.h index 3b13662..ba1ffee 100644 --- a/gst/videoscale/vs_4tap.h +++ b/gst/videoscale/vs_4tap.h @@ -30,7 +30,6 @@ #include "vs_image.h" -#include void vs_4tap_init (void); void vs_scanline_resample_4tap_Y (uint8_t *dest, uint8_t *src, diff --git a/gst/videoscale/vs_image.h b/gst/videoscale/vs_image.h index 9771c77..89a2e3d 100644 --- a/gst/videoscale/vs_image.h +++ b/gst/videoscale/vs_image.h @@ -28,7 +28,7 @@ #ifndef __VS_IMAGE_H__ #define __VS_IMAGE_H__ -#include +#include <_stdint.h> typedef struct _VSImage VSImage; diff --git a/gst/videoscale/vs_scanline.c b/gst/videoscale/vs_scanline.c index e82e4f3..2827c40 100644 --- a/gst/videoscale/vs_scanline.c +++ b/gst/videoscale/vs_scanline.c @@ -27,7 +27,7 @@ #include "vs_scanline.h" -#include +#include "gstvideoscaleorc.h" /* greyscale, i.e., single componenet */ @@ -92,7 +92,7 @@ vs_scanline_merge_linear_Y (uint8_t * dest, uint8_t * src1, uint8_t * src2, { uint32_t value = x >> 8; - oil_merge_linear_u8 (dest, src1, src2, &value, n); + orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, n); } void @@ -213,6 +213,32 @@ vs_scanline_resample_nearest_RGBA (uint8_t * dest, uint8_t * src, int src_width, } #include + +static void +oil_resample_linear_argb (uint32_t * d, uint32_t * s, int n, uint32_t * in) +{ + uint8_t *src = (uint8_t *) s; + uint8_t *dest = (uint8_t *) d; + int acc = in[0]; + int increment = in[1]; + int i; + int j; + int x; + + for (i = 0; i < n; i++) { + j = acc >> 16; + x = (acc & 0xffff) >> 8; + dest[4 * i + 0] = (src[4 * j + 0] * (256 - x) + src[4 * j + 4] * x) >> 8; + dest[4 * i + 1] = (src[4 * j + 1] * (256 - x) + src[4 * j + 5] * x) >> 8; + dest[4 * i + 2] = (src[4 * j + 2] * (256 - x) + src[4 * j + 6] * x) >> 8; + dest[4 * i + 3] = (src[4 * j + 3] * (256 - x) + src[4 * j + 7] * x) >> 8; + + acc += increment; + } + + in[0] = acc; +} + void vs_scanline_resample_linear_RGBA (uint8_t * dest, uint8_t * src, int src_width, int n, int *accumulator, int increment) @@ -254,8 +280,7 @@ vs_scanline_merge_linear_RGBA (uint8_t * dest, uint8_t * src1, uint8_t * src2, { uint32_t value = x >> 8; - oil_merge_linear_argb ((uint32_t *) dest, (uint32_t *) src1, - (uint32_t *) src2, &value, n); + orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, n * 4); } @@ -336,7 +361,7 @@ vs_scanline_merge_linear_RGB (uint8_t * dest, uint8_t * src1, uint8_t * src2, { uint32_t value = x >> 8; - oil_merge_linear_u8 (dest, src1, src2, &value, n * 3); + orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, n * 3); } @@ -458,22 +483,10 @@ void vs_scanline_merge_linear_YUYV (uint8_t * dest, uint8_t * src1, uint8_t * src2, int n, int x) { - int i; int quads = (n + 1) / 2; + uint32_t value = x >> 8; - for (i = 0; i < quads; i++) { - dest[i * 4 + 0] = - (src1[i * 4 + 0] * (65536 - x) + src2[i * 4 + 0] * x) >> 16; - dest[i * 4 + 1] = - (src1[i * 4 + 1] * (65536 - x) + src2[i * 4 + 1] * x) >> 16; - - if (2 * i + 1 < n) { - dest[i * 4 + 2] = - (src1[i * 4 + 2] * (65536 - x) + src2[i * 4 + 2] * x) >> 16; - dest[i * 4 + 3] = - (src1[i * 4 + 3] * (65536 - x) + src2[i * 4 + 3] * x) >> 16; - } - } + orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, quads * 4); } @@ -595,22 +608,10 @@ void vs_scanline_merge_linear_UYVY (uint8_t * dest, uint8_t * src1, uint8_t * src2, int n, int x) { - int i; int quads = (n + 1) / 2; + uint32_t value = x >> 8; - for (i = 0; i < quads; i++) { - dest[i * 4 + 0] = - (src1[i * 4 + 0] * (65536 - x) + src2[i * 4 + 0] * x) >> 16; - dest[i * 4 + 1] = - (src1[i * 4 + 1] * (65536 - x) + src2[i * 4 + 1] * x) >> 16; - - if (2 * i + 1 < n) { - dest[i * 4 + 2] = - (src1[i * 4 + 2] * (65536 - x) + src2[i * 4 + 2] * x) >> 16; - dest[i * 4 + 3] = - (src1[i * 4 + 3] * (65536 - x) + src2[i * 4 + 3] * x) >> 16; - } - } + orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, quads * 4); } diff --git a/gst/videoscale/vs_scanline.h b/gst/videoscale/vs_scanline.h index 2e0b179..e358969 100644 --- a/gst/videoscale/vs_scanline.h +++ b/gst/videoscale/vs_scanline.h @@ -28,7 +28,7 @@ #ifndef __VS_SCANLINE_H__ #define __VS_SCANLINE_H__ -#include +#include <_stdint.h> void vs_scanline_downsample_Y (uint8_t *dest, uint8_t *src, int n); void vs_scanline_resample_nearest_Y (uint8_t *dest, uint8_t *src, int n, int src_width, int *accumulator, int increment); -- 2.7.4