From 9be707d54dd3cdbfcb3f094b2ccc230157c6c540 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Tim-Philipp=20M=C3=BCller?= Date: Sat, 16 Apr 2011 18:49:27 +0100 Subject: [PATCH] gst: update disted orc backup code --- gst/deinterlace/tvtime-dist.c | 110 ++--- gst/deinterlace/tvtime-dist.h | 18 +- gst/videobox/gstvideoboxorc-dist.c | 27 +- gst/videobox/gstvideoboxorc-dist.h | 12 +- gst/videomixer/blendorc-dist.c | 813 ++++++++++++++++++------------------- gst/videomixer/blendorc-dist.h | 23 +- 6 files changed, 526 insertions(+), 477 deletions(-) diff --git a/gst/deinterlace/tvtime-dist.c b/gst/deinterlace/tvtime-dist.c index 957f3be..e1f7ed3 100644 --- a/gst/deinterlace/tvtime-dist.c +++ b/gst/deinterlace/tvtime-dist.c @@ -4,9 +4,6 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif -#ifndef DISABLE_ORC -#include -#endif #include #ifndef _ORC_INTEGER_TYPEDEFS_ @@ -32,6 +29,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include typedef signed char orc_int8; @@ -71,16 +69,32 @@ typedef union orc_int16 x4[4]; } orc_union64; #endif +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif -void deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, - const guint8 * s3, const guint8 * s4, const guint8 * s5, int n); -void deinterlace_line_linear (guint8 * d1, const guint8 * s1, const guint8 * s2, - int n); -void deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, - const guint8 * s2, const guint8 * s3, int n); -void deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, int p1, - int n); +#ifndef DISABLE_ORC +#include +#endif +void deinterlace_line_vfir (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, + const guint8 * ORC_RESTRICT s5, int n); +void deinterlace_line_linear (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n); +void deinterlace_line_linear_blend (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, int n); +void deinterlace_line_greedy (orc_uint8 * ORC_RESTRICT d1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + int p1, int n); /* begin Orc C target preamble */ @@ -114,6 +128,7 @@ void deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) +#ifndef ORC_RESTRICT #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define ORC_RESTRICT restrict #elif defined(__GNUC__) && __GNUC__ >= 4 @@ -121,6 +136,7 @@ void deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, #else #define ORC_RESTRICT #endif +#endif /* end Orc C target preamble */ @@ -128,8 +144,9 @@ void deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, /* deinterlace_line_vfir */ #ifdef DISABLE_ORC void -deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, - const guint8 * s3, const guint8 * s4, const guint8 * s5, int n) +deinterlace_line_vfir (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, + const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, + const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, int n) { int i; orc_int8 *ORC_RESTRICT ptr0; @@ -167,7 +184,7 @@ deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, ptr8 = (orc_int8 *) s5; /* 16: loadpw */ - var40.i = 0x00000004; /* 4 or 1.97626e-323f */ + var40.i = (int) 0x00000004; /* 4 or 1.97626e-323f */ for (i = 0; i < n; i++) { /* 0: loadb */ @@ -255,7 +272,7 @@ _backup_deinterlace_line_vfir (OrcExecutor * ORC_RESTRICT ex) ptr8 = (orc_int8 *) ex->arrays[8]; /* 16: loadpw */ - var40.i = 0x00000004; /* 4 or 1.97626e-323f */ + var40.i = (int) 0x00000004; /* 4 or 1.97626e-323f */ for (i = 0; i < n; i++) { /* 0: loadb */ @@ -303,8 +320,9 @@ _backup_deinterlace_line_vfir (OrcExecutor * ORC_RESTRICT ex) } void -deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, - const guint8 * s3, const guint8 * s4, const guint8 * s5, int n) +deinterlace_line_vfir (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, + const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, + const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -314,7 +332,6 @@ deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "deinterlace_line_vfir"); @@ -362,7 +379,7 @@ deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -386,8 +403,8 @@ deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, /* deinterlace_line_linear */ #ifdef DISABLE_ORC void -deinterlace_line_linear (guint8 * d1, const guint8 * s1, const guint8 * s2, - int n) +deinterlace_line_linear (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n) { int i; orc_int8 *ORC_RESTRICT ptr0; @@ -447,8 +464,8 @@ _backup_deinterlace_line_linear (OrcExecutor * ORC_RESTRICT ex) } void -deinterlace_line_linear (guint8 * d1, const guint8 * s1, const guint8 * s2, - int n) +deinterlace_line_linear (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -458,7 +475,6 @@ deinterlace_line_linear (guint8 * d1, const guint8 * s1, const guint8 * s2, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "deinterlace_line_linear"); @@ -470,7 +486,7 @@ deinterlace_line_linear (guint8 * d1, const guint8 * s1, const guint8 * s2, orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -491,8 +507,9 @@ deinterlace_line_linear (guint8 * d1, const guint8 * s1, const guint8 * s2, /* deinterlace_line_linear_blend */ #ifdef DISABLE_ORC void -deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, - const guint8 * s2, const guint8 * s3, int n) +deinterlace_line_linear_blend (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, int n) { int i; orc_int8 *ORC_RESTRICT ptr0; @@ -519,7 +536,7 @@ deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, ptr6 = (orc_int8 *) s3; /* 9: loadpw */ - var38.i = 0x00000002; /* 2 or 9.88131e-324f */ + var38.i = (int) 0x00000002; /* 2 or 9.88131e-324f */ for (i = 0; i < n; i++) { /* 0: loadb */ @@ -582,7 +599,7 @@ _backup_deinterlace_line_linear_blend (OrcExecutor * ORC_RESTRICT ex) ptr6 = (orc_int8 *) ex->arrays[6]; /* 9: loadpw */ - var38.i = 0x00000002; /* 2 or 9.88131e-324f */ + var38.i = (int) 0x00000002; /* 2 or 9.88131e-324f */ for (i = 0; i < n; i++) { /* 0: loadb */ @@ -616,8 +633,9 @@ _backup_deinterlace_line_linear_blend (OrcExecutor * ORC_RESTRICT ex) } void -deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, - const guint8 * s2, const guint8 * s3, int n) +deinterlace_line_linear_blend (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -627,7 +645,6 @@ deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "deinterlace_line_linear_blend"); @@ -661,7 +678,7 @@ deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -683,9 +700,10 @@ deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, /* deinterlace_line_greedy */ #ifdef DISABLE_ORC void -deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, int p1, - int n) +deinterlace_line_greedy (orc_uint8 * ORC_RESTRICT d1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + int p1, int n) { int i; orc_int8 *ORC_RESTRICT ptr0; @@ -728,9 +746,9 @@ deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, ptr7 = (orc_int8 *) s4; /* 11: loadpb */ - var44 = 0x00000080; /* 128 or 6.32404e-322f */ + var44 = (int) 0x00000080; /* 128 or 6.32404e-322f */ /* 13: loadpb */ - var45 = 0x00000080; /* 128 or 6.32404e-322f */ + var45 = (int) 0x00000080; /* 128 or 6.32404e-322f */ /* 21: loadpb */ var46 = p1; /* 23: loadpb */ @@ -835,9 +853,9 @@ _backup_deinterlace_line_greedy (OrcExecutor * ORC_RESTRICT ex) ptr7 = (orc_int8 *) ex->arrays[7]; /* 11: loadpb */ - var44 = 0x00000080; /* 128 or 6.32404e-322f */ + var44 = (int) 0x00000080; /* 128 or 6.32404e-322f */ /* 13: loadpb */ - var45 = 0x00000080; /* 128 or 6.32404e-322f */ + var45 = (int) 0x00000080; /* 128 or 6.32404e-322f */ /* 21: loadpb */ var46 = ex->params[24]; /* 23: loadpb */ @@ -897,9 +915,10 @@ _backup_deinterlace_line_greedy (OrcExecutor * ORC_RESTRICT ex) } void -deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, int p1, - int n) +deinterlace_line_greedy (orc_uint8 * ORC_RESTRICT d1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + int p1, int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -909,7 +928,6 @@ deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "deinterlace_line_greedy"); @@ -981,7 +999,7 @@ deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, orc_program_append_2 (p, "maxub", 0, ORC_VAR_D1, ORC_VAR_T10, ORC_VAR_T11, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); diff --git a/gst/deinterlace/tvtime-dist.h b/gst/deinterlace/tvtime-dist.h index a86bd34..9578a83 100644 --- a/gst/deinterlace/tvtime-dist.h +++ b/gst/deinterlace/tvtime-dist.h @@ -35,6 +35,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include typedef signed char orc_int8; @@ -57,10 +58,19 @@ typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; #endif -void deinterlace_line_vfir (guint8 * d1, const guint8 * s1, const guint8 * s2, const guint8 * s3, const guint8 * s4, const guint8 * s5, int n); -void deinterlace_line_linear (guint8 * d1, const guint8 * s1, const guint8 * s2, int n); -void deinterlace_line_linear_blend (guint8 * d1, const guint8 * s1, const guint8 * s2, const guint8 * s3, int n); -void deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, int p1, int n); +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif +void deinterlace_line_vfir (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, int n); +void deinterlace_line_linear (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n); +void deinterlace_line_linear_blend (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int n); +void deinterlace_line_greedy (orc_uint8 * ORC_RESTRICT d1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, int p1, int n); #ifdef __cplusplus } diff --git a/gst/videobox/gstvideoboxorc-dist.c b/gst/videobox/gstvideoboxorc-dist.c index f0e03f8..6e9cd10 100644 --- a/gst/videobox/gstvideoboxorc-dist.c +++ b/gst/videobox/gstvideoboxorc-dist.c @@ -4,9 +4,6 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif -#ifndef DISABLE_ORC -#include -#endif #include #ifndef _ORC_INTEGER_TYPEDEFS_ @@ -32,6 +29,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include typedef signed char orc_int8; @@ -71,8 +69,20 @@ typedef union orc_int16 x4[4]; } orc_union64; #endif +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif -void orc_splat_u32 (guint32 * d1, int p1, int n); +#ifndef DISABLE_ORC +#include +#endif +void orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); /* begin Orc C target preamble */ @@ -106,6 +116,7 @@ void orc_splat_u32 (guint32 * d1, int p1, int n); #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) +#ifndef ORC_RESTRICT #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define ORC_RESTRICT restrict #elif defined(__GNUC__) && __GNUC__ >= 4 @@ -113,6 +124,7 @@ void orc_splat_u32 (guint32 * d1, int p1, int n); #else #define ORC_RESTRICT #endif +#endif /* end Orc C target preamble */ @@ -120,7 +132,7 @@ void orc_splat_u32 (guint32 * d1, int p1, int n); /* orc_splat_u32 */ #ifdef DISABLE_ORC void -orc_splat_u32 (guint32 * d1, int p1, int n) +orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n) { int i; orc_union32 *ORC_RESTRICT ptr0; @@ -166,7 +178,7 @@ _backup_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex) } void -orc_splat_u32 (guint32 * d1, int p1, int n) +orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -176,7 +188,6 @@ orc_splat_u32 (guint32 * d1, int p1, int n) if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "orc_splat_u32"); @@ -187,7 +198,7 @@ orc_splat_u32 (guint32 * d1, int p1, int n) orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); diff --git a/gst/videobox/gstvideoboxorc-dist.h b/gst/videobox/gstvideoboxorc-dist.h index e321c29..1a711a5 100644 --- a/gst/videobox/gstvideoboxorc-dist.h +++ b/gst/videobox/gstvideoboxorc-dist.h @@ -35,6 +35,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include typedef signed char orc_int8; @@ -57,7 +58,16 @@ typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; #endif -void orc_splat_u32 (guint32 * d1, int p1, int n); +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif +void orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); #ifdef __cplusplus } diff --git a/gst/videomixer/blendorc-dist.c b/gst/videomixer/blendorc-dist.c index 8ead5bb..21b6ae1 100644 --- a/gst/videomixer/blendorc-dist.c +++ b/gst/videomixer/blendorc-dist.c @@ -4,9 +4,6 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif -#ifndef DISABLE_ORC -#include -#endif #include #ifndef _ORC_INTEGER_TYPEDEFS_ @@ -72,19 +69,32 @@ typedef union orc_int16 x4[4]; } orc_union64; #endif +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif -void orc_splat_u32 (guint32 * d1, int p1, int n); -void orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n); -void orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m); -void orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, - int s1_stride, int p1, int n, int m); -void orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, - int s1_stride, int p1, int n, int m); -void orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, - int s1_stride, int p1, int n, int m); -void orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, - int s1_stride, int p1, int n, int m); +#ifndef DISABLE_ORC +#include +#endif +void orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); +void orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, + int n); +void orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); /* begin Orc C target preamble */ @@ -118,6 +128,7 @@ void orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) +#ifndef ORC_RESTRICT #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define ORC_RESTRICT restrict #elif defined(__GNUC__) && __GNUC__ >= 4 @@ -125,6 +136,7 @@ void orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, #else #define ORC_RESTRICT #endif +#endif /* end Orc C target preamble */ @@ -132,7 +144,7 @@ void orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, /* orc_splat_u32 */ #ifdef DISABLE_ORC void -orc_splat_u32 (guint32 * d1, int p1, int n) +orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n) { int i; orc_union32 *ORC_RESTRICT ptr0; @@ -178,7 +190,7 @@ _backup_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex) } void -orc_splat_u32 (guint32 * d1, int p1, int n) +orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -188,7 +200,6 @@ orc_splat_u32 (guint32 * d1, int p1, int n) if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "orc_splat_u32"); @@ -199,7 +210,7 @@ orc_splat_u32 (guint32 * d1, int p1, int n) orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -219,7 +230,8 @@ orc_splat_u32 (guint32 * d1, int p1, int n) /* orc_memcpy_u32 */ #ifdef DISABLE_ORC void -orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n) +orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, + int n) { int i; orc_union32 *ORC_RESTRICT ptr0; @@ -269,7 +281,8 @@ _backup_orc_memcpy_u32 (OrcExecutor * ORC_RESTRICT ex) } void -orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n) +orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, + int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -279,7 +292,6 @@ orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n) if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "orc_memcpy_u32"); @@ -290,7 +302,7 @@ orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n) orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -310,8 +322,8 @@ orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n) /* orc_blend_u8 */ #ifdef DISABLE_ORC void -orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { int i; int j; @@ -422,8 +434,8 @@ _backup_orc_blend_u8 (OrcExecutor * ORC_RESTRICT ex) } void -orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -433,7 +445,6 @@ orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_2d (p); @@ -463,7 +474,7 @@ orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -487,8 +498,8 @@ orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, /* orc_blend_argb */ #ifdef DISABLE_ORC void -orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { int i; int j; @@ -727,8 +738,8 @@ _backup_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex) } void -orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -738,7 +749,6 @@ orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_2d (p); @@ -792,7 +802,7 @@ orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -816,8 +826,8 @@ orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, /* orc_blend_bgra */ #ifdef DISABLE_ORC void -orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { int i; int j; @@ -1062,8 +1072,8 @@ _backup_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex) } void -orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -1073,7 +1083,6 @@ orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_2d (p); @@ -1131,7 +1140,7 @@ orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -1155,8 +1164,8 @@ orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, /* orc_overlay_argb */ #ifdef DISABLE_ORC void -orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { int i; int j; @@ -1180,8 +1189,8 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_union32 var56; orc_union16 var57; orc_int8 var58; - orc_union16 var59; - orc_union32 var60; + orc_union32 var59; + orc_union64 var60; orc_union64 var61; orc_union64 var62; orc_union64 var63; @@ -1189,12 +1198,11 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_union64 var65; orc_union64 var66; orc_union64 var67; - orc_union64 var68; + orc_union32 var68; orc_union32 var69; orc_union32 var70; orc_union32 var71; orc_union32 var72; - orc_union32 var73; for (j = 0; j < m; j++) { ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); @@ -1207,9 +1215,9 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, var41.x4[3] = p1; /* 10: loadpl */ var53.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ - /* 27: loadpl */ + /* 26: loadpl */ var42.i = (int) 0xffffff00; /* -256 or 2.122e-314f */ - /* 30: loadpl */ + /* 29: loadpl */ var43.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */ for (i = 0; i < n; i++) { @@ -1264,90 +1272,88 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, var57.i = var56.i; /* 15: convwb */ var58 = var57.i; - /* 16: convubw */ - var59.i = (orc_uint8) var58; - /* 17: splatbl */ - var60.i = + /* 16: splatbl */ + var59.i = ((var58 & 0xff) << 24) | ((var58 & 0xff) << 16) | ((var58 & 0xff) << 8) | (var58 & 0xff); - /* 18: convubw */ - var61.x4[0] = (orc_uint8) var60.x4[0]; - var61.x4[1] = (orc_uint8) var60.x4[1]; - var61.x4[2] = (orc_uint8) var60.x4[2]; - var61.x4[3] = (orc_uint8) var60.x4[3]; - /* 19: mullw */ - var62.x4[0] = (var61.x4[0] * var55.x4[0]) & 0xffff; - var62.x4[1] = (var61.x4[1] * var55.x4[1]) & 0xffff; - var62.x4[2] = (var61.x4[2] * var55.x4[2]) & 0xffff; - var62.x4[3] = (var61.x4[3] * var55.x4[3]) & 0xffff; - /* 20: div255w */ - var63.x4[0] = - ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) + - (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8; - var63.x4[1] = - ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) + - (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8; - var63.x4[2] = - ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) + - (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8; - var63.x4[3] = - ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) + - (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8; - /* 21: convubw */ - var64.x4[0] = (orc_uint8) var56.x4[0]; - var64.x4[1] = (orc_uint8) var56.x4[1]; - var64.x4[2] = (orc_uint8) var56.x4[2]; - var64.x4[3] = (orc_uint8) var56.x4[3]; - /* 22: mullw */ - var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff; - var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff; - var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff; - var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff; + /* 17: convubw */ + var60.x4[0] = (orc_uint8) var59.x4[0]; + var60.x4[1] = (orc_uint8) var59.x4[1]; + var60.x4[2] = (orc_uint8) var59.x4[2]; + var60.x4[3] = (orc_uint8) var59.x4[3]; + /* 18: mullw */ + var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff; + var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff; + var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff; + var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff; + /* 19: div255w */ + var62.x4[0] = + ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) + + (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8; + var62.x4[1] = + ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) + + (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8; + var62.x4[2] = + ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) + + (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8; + var62.x4[3] = + ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) + + (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8; + /* 20: convubw */ + var63.x4[0] = (orc_uint8) var56.x4[0]; + var63.x4[1] = (orc_uint8) var56.x4[1]; + var63.x4[2] = (orc_uint8) var56.x4[2]; + var63.x4[3] = (orc_uint8) var56.x4[3]; + /* 21: mullw */ + var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff; + var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff; + var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff; + var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff; + /* 22: addw */ + var65.x4[0] = var64.x4[0] + var52.x4[0]; + var65.x4[1] = var64.x4[1] + var52.x4[1]; + var65.x4[2] = var64.x4[2] + var52.x4[2]; + var65.x4[3] = var64.x4[3] + var52.x4[3]; /* 23: addw */ - var66.x4[0] = var65.x4[0] + var52.x4[0]; - var66.x4[1] = var65.x4[1] + var52.x4[1]; - var66.x4[2] = var65.x4[2] + var52.x4[2]; - var66.x4[3] = var65.x4[3] + var52.x4[3]; - /* 24: addw */ - var67.x4[0] = var63.x4[0] + var50.x4[0]; - var67.x4[1] = var63.x4[1] + var50.x4[1]; - var67.x4[2] = var63.x4[2] + var50.x4[2]; - var67.x4[3] = var63.x4[3] + var50.x4[3]; - /* 25: divluw */ - var68.x4[0] = - ((var67.x4[0] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) / - ((orc_uint16) var67.x4[0] & 0xff)); - var68.x4[1] = - ((var67.x4[1] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) / - ((orc_uint16) var67.x4[1] & 0xff)); - var68.x4[2] = - ((var67.x4[2] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) / - ((orc_uint16) var67.x4[2] & 0xff)); - var68.x4[3] = - ((var67.x4[3] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) / - ((orc_uint16) var67.x4[3] & 0xff)); - /* 26: convwb */ - var69.x4[0] = var68.x4[0]; - var69.x4[1] = var68.x4[1]; - var69.x4[2] = var68.x4[2]; - var69.x4[3] = var68.x4[3]; - /* 28: andl */ - var70.i = var69.i & var42.i; - /* 29: convwb */ - var71.x4[0] = var67.x4[0]; - var71.x4[1] = var67.x4[1]; - var71.x4[2] = var67.x4[2]; - var71.x4[3] = var67.x4[3]; - /* 31: andl */ - var72.i = var71.i & var43.i; - /* 32: orl */ - var73.i = var70.i | var72.i; - /* 33: storel */ - ptr0[i] = var73; + var66.x4[0] = var62.x4[0] + var50.x4[0]; + var66.x4[1] = var62.x4[1] + var50.x4[1]; + var66.x4[2] = var62.x4[2] + var50.x4[2]; + var66.x4[3] = var62.x4[3] + var50.x4[3]; + /* 24: divluw */ + var67.x4[0] = + ((var66.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) / + ((orc_uint16) var66.x4[0] & 0xff)); + var67.x4[1] = + ((var66.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) / + ((orc_uint16) var66.x4[1] & 0xff)); + var67.x4[2] = + ((var66.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) / + ((orc_uint16) var66.x4[2] & 0xff)); + var67.x4[3] = + ((var66.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) / + ((orc_uint16) var66.x4[3] & 0xff)); + /* 25: convwb */ + var68.x4[0] = var67.x4[0]; + var68.x4[1] = var67.x4[1]; + var68.x4[2] = var67.x4[2]; + var68.x4[3] = var67.x4[3]; + /* 27: andl */ + var69.i = var68.i & var42.i; + /* 28: convwb */ + var70.x4[0] = var66.x4[0]; + var70.x4[1] = var66.x4[1]; + var70.x4[2] = var66.x4[2]; + var70.x4[3] = var66.x4[3]; + /* 30: andl */ + var71.i = var70.i & var43.i; + /* 31: orl */ + var72.i = var69.i | var71.i; + /* 32: storel */ + ptr0[i] = var72; } } @@ -1381,8 +1387,8 @@ _backup_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex) orc_union32 var56; orc_union16 var57; orc_int8 var58; - orc_union16 var59; - orc_union32 var60; + orc_union32 var59; + orc_union64 var60; orc_union64 var61; orc_union64 var62; orc_union64 var63; @@ -1390,12 +1396,11 @@ _backup_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex) orc_union64 var65; orc_union64 var66; orc_union64 var67; - orc_union64 var68; + orc_union32 var68; orc_union32 var69; orc_union32 var70; orc_union32 var71; orc_union32 var72; - orc_union32 var73; for (j = 0; j < m; j++) { ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); @@ -1408,9 +1413,9 @@ _backup_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex) var41.x4[3] = ex->params[24]; /* 10: loadpl */ var53.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ - /* 27: loadpl */ + /* 26: loadpl */ var42.i = (int) 0xffffff00; /* -256 or 2.122e-314f */ - /* 30: loadpl */ + /* 29: loadpl */ var43.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */ for (i = 0; i < n; i++) { @@ -1465,98 +1470,96 @@ _backup_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex) var57.i = var56.i; /* 15: convwb */ var58 = var57.i; - /* 16: convubw */ - var59.i = (orc_uint8) var58; - /* 17: splatbl */ - var60.i = + /* 16: splatbl */ + var59.i = ((var58 & 0xff) << 24) | ((var58 & 0xff) << 16) | ((var58 & 0xff) << 8) | (var58 & 0xff); - /* 18: convubw */ - var61.x4[0] = (orc_uint8) var60.x4[0]; - var61.x4[1] = (orc_uint8) var60.x4[1]; - var61.x4[2] = (orc_uint8) var60.x4[2]; - var61.x4[3] = (orc_uint8) var60.x4[3]; - /* 19: mullw */ - var62.x4[0] = (var61.x4[0] * var55.x4[0]) & 0xffff; - var62.x4[1] = (var61.x4[1] * var55.x4[1]) & 0xffff; - var62.x4[2] = (var61.x4[2] * var55.x4[2]) & 0xffff; - var62.x4[3] = (var61.x4[3] * var55.x4[3]) & 0xffff; - /* 20: div255w */ - var63.x4[0] = - ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) + - (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8; - var63.x4[1] = - ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) + - (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8; - var63.x4[2] = - ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) + - (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8; - var63.x4[3] = - ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) + - (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8; - /* 21: convubw */ - var64.x4[0] = (orc_uint8) var56.x4[0]; - var64.x4[1] = (orc_uint8) var56.x4[1]; - var64.x4[2] = (orc_uint8) var56.x4[2]; - var64.x4[3] = (orc_uint8) var56.x4[3]; - /* 22: mullw */ - var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff; - var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff; - var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff; - var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff; + /* 17: convubw */ + var60.x4[0] = (orc_uint8) var59.x4[0]; + var60.x4[1] = (orc_uint8) var59.x4[1]; + var60.x4[2] = (orc_uint8) var59.x4[2]; + var60.x4[3] = (orc_uint8) var59.x4[3]; + /* 18: mullw */ + var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff; + var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff; + var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff; + var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff; + /* 19: div255w */ + var62.x4[0] = + ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) + + (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8; + var62.x4[1] = + ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) + + (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8; + var62.x4[2] = + ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) + + (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8; + var62.x4[3] = + ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) + + (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8; + /* 20: convubw */ + var63.x4[0] = (orc_uint8) var56.x4[0]; + var63.x4[1] = (orc_uint8) var56.x4[1]; + var63.x4[2] = (orc_uint8) var56.x4[2]; + var63.x4[3] = (orc_uint8) var56.x4[3]; + /* 21: mullw */ + var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff; + var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff; + var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff; + var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff; + /* 22: addw */ + var65.x4[0] = var64.x4[0] + var52.x4[0]; + var65.x4[1] = var64.x4[1] + var52.x4[1]; + var65.x4[2] = var64.x4[2] + var52.x4[2]; + var65.x4[3] = var64.x4[3] + var52.x4[3]; /* 23: addw */ - var66.x4[0] = var65.x4[0] + var52.x4[0]; - var66.x4[1] = var65.x4[1] + var52.x4[1]; - var66.x4[2] = var65.x4[2] + var52.x4[2]; - var66.x4[3] = var65.x4[3] + var52.x4[3]; - /* 24: addw */ - var67.x4[0] = var63.x4[0] + var50.x4[0]; - var67.x4[1] = var63.x4[1] + var50.x4[1]; - var67.x4[2] = var63.x4[2] + var50.x4[2]; - var67.x4[3] = var63.x4[3] + var50.x4[3]; - /* 25: divluw */ - var68.x4[0] = - ((var67.x4[0] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) / - ((orc_uint16) var67.x4[0] & 0xff)); - var68.x4[1] = - ((var67.x4[1] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) / - ((orc_uint16) var67.x4[1] & 0xff)); - var68.x4[2] = - ((var67.x4[2] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) / - ((orc_uint16) var67.x4[2] & 0xff)); - var68.x4[3] = - ((var67.x4[3] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) / - ((orc_uint16) var67.x4[3] & 0xff)); - /* 26: convwb */ - var69.x4[0] = var68.x4[0]; - var69.x4[1] = var68.x4[1]; - var69.x4[2] = var68.x4[2]; - var69.x4[3] = var68.x4[3]; - /* 28: andl */ - var70.i = var69.i & var42.i; - /* 29: convwb */ - var71.x4[0] = var67.x4[0]; - var71.x4[1] = var67.x4[1]; - var71.x4[2] = var67.x4[2]; - var71.x4[3] = var67.x4[3]; - /* 31: andl */ - var72.i = var71.i & var43.i; - /* 32: orl */ - var73.i = var70.i | var72.i; - /* 33: storel */ - ptr0[i] = var73; + var66.x4[0] = var62.x4[0] + var50.x4[0]; + var66.x4[1] = var62.x4[1] + var50.x4[1]; + var66.x4[2] = var62.x4[2] + var50.x4[2]; + var66.x4[3] = var62.x4[3] + var50.x4[3]; + /* 24: divluw */ + var67.x4[0] = + ((var66.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) / + ((orc_uint16) var66.x4[0] & 0xff)); + var67.x4[1] = + ((var66.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) / + ((orc_uint16) var66.x4[1] & 0xff)); + var67.x4[2] = + ((var66.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) / + ((orc_uint16) var66.x4[2] & 0xff)); + var67.x4[3] = + ((var66.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) / + ((orc_uint16) var66.x4[3] & 0xff)); + /* 25: convwb */ + var68.x4[0] = var67.x4[0]; + var68.x4[1] = var67.x4[1]; + var68.x4[2] = var67.x4[2]; + var68.x4[3] = var67.x4[3]; + /* 27: andl */ + var69.i = var68.i & var42.i; + /* 28: convwb */ + var70.x4[0] = var66.x4[0]; + var70.x4[1] = var66.x4[1]; + var70.x4[2] = var66.x4[2]; + var70.x4[3] = var66.x4[3]; + /* 30: andl */ + var71.i = var70.i & var43.i; + /* 31: orl */ + var72.i = var69.i | var71.i; + /* 32: storel */ + ptr0[i] = var72; } } } void -orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -1566,7 +1569,6 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_2d (p); @@ -1619,8 +1621,6 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, ORC_VAR_D1); orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, - ORC_VAR_D1); orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1); orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1, @@ -1652,7 +1652,7 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); @@ -1676,8 +1676,8 @@ orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, /* orc_overlay_bgra */ #ifdef DISABLE_ORC void -orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { int i; int j; @@ -1703,8 +1703,8 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_union32 var59; orc_union16 var60; orc_int8 var61; - orc_union16 var62; - orc_union32 var63; + orc_union32 var62; + orc_union64 var63; orc_union64 var64; orc_union64 var65; orc_union64 var66; @@ -1712,12 +1712,11 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_union64 var68; orc_union64 var69; orc_union64 var70; - orc_union64 var71; + orc_union32 var71; orc_union32 var72; orc_union32 var73; orc_union32 var74; orc_union32 var75; - orc_union32 var76; for (j = 0; j < m; j++) { ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); @@ -1730,9 +1729,9 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, var42.x4[3] = p1; /* 11: loadpl */ var55.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ - /* 29: loadpl */ + /* 28: loadpl */ var43.i = (int) 0x00ffffff; /* 16777215 or 8.28905e-317f */ - /* 32: loadpl */ + /* 31: loadpl */ var44.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */ for (i = 0; i < n; i++) { @@ -1741,7 +1740,7 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, /* 1: shrul */ var46.i = ((orc_uint32) var45.i) >> 24; /* 2: convlw */ - var47.i = var45.i; + var47.i = var46.i; /* 3: convwb */ var48 = var47.i; /* 4: splatbl */ @@ -1788,93 +1787,91 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, /* 15: shrul */ var59.i = ((orc_uint32) var58.i) >> 24; /* 16: convlw */ - var60.i = var58.i; + var60.i = var59.i; /* 17: convwb */ var61 = var60.i; - /* 18: convubw */ - var62.i = (orc_uint8) var61; - /* 19: splatbl */ - var63.i = + /* 18: splatbl */ + var62.i = ((var61 & 0xff) << 24) | ((var61 & 0xff) << 16) | ((var61 & 0xff) << 8) | (var61 & 0xff); - /* 20: convubw */ - var64.x4[0] = (orc_uint8) var63.x4[0]; - var64.x4[1] = (orc_uint8) var63.x4[1]; - var64.x4[2] = (orc_uint8) var63.x4[2]; - var64.x4[3] = (orc_uint8) var63.x4[3]; - /* 21: mullw */ - var65.x4[0] = (var64.x4[0] * var57.x4[0]) & 0xffff; - var65.x4[1] = (var64.x4[1] * var57.x4[1]) & 0xffff; - var65.x4[2] = (var64.x4[2] * var57.x4[2]) & 0xffff; - var65.x4[3] = (var64.x4[3] * var57.x4[3]) & 0xffff; - /* 22: div255w */ - var66.x4[0] = - ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) + - (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8; - var66.x4[1] = - ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) + - (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8; - var66.x4[2] = - ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) + - (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8; - var66.x4[3] = - ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) + - (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8; - /* 23: convubw */ - var67.x4[0] = (orc_uint8) var58.x4[0]; - var67.x4[1] = (orc_uint8) var58.x4[1]; - var67.x4[2] = (orc_uint8) var58.x4[2]; - var67.x4[3] = (orc_uint8) var58.x4[3]; - /* 24: mullw */ - var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff; - var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff; - var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff; - var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff; + /* 19: convubw */ + var63.x4[0] = (orc_uint8) var62.x4[0]; + var63.x4[1] = (orc_uint8) var62.x4[1]; + var63.x4[2] = (orc_uint8) var62.x4[2]; + var63.x4[3] = (orc_uint8) var62.x4[3]; + /* 20: mullw */ + var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff; + var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff; + var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff; + var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff; + /* 21: div255w */ + var65.x4[0] = + ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) + + (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8; + var65.x4[1] = + ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) + + (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8; + var65.x4[2] = + ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) + + (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8; + var65.x4[3] = + ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) + + (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8; + /* 22: convubw */ + var66.x4[0] = (orc_uint8) var58.x4[0]; + var66.x4[1] = (orc_uint8) var58.x4[1]; + var66.x4[2] = (orc_uint8) var58.x4[2]; + var66.x4[3] = (orc_uint8) var58.x4[3]; + /* 23: mullw */ + var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff; + var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff; + var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff; + var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff; + /* 24: addw */ + var68.x4[0] = var67.x4[0] + var54.x4[0]; + var68.x4[1] = var67.x4[1] + var54.x4[1]; + var68.x4[2] = var67.x4[2] + var54.x4[2]; + var68.x4[3] = var67.x4[3] + var54.x4[3]; /* 25: addw */ - var69.x4[0] = var68.x4[0] + var54.x4[0]; - var69.x4[1] = var68.x4[1] + var54.x4[1]; - var69.x4[2] = var68.x4[2] + var54.x4[2]; - var69.x4[3] = var68.x4[3] + var54.x4[3]; - /* 26: addw */ - var70.x4[0] = var66.x4[0] + var52.x4[0]; - var70.x4[1] = var66.x4[1] + var52.x4[1]; - var70.x4[2] = var66.x4[2] + var52.x4[2]; - var70.x4[3] = var66.x4[3] + var52.x4[3]; - /* 27: divluw */ - var71.x4[0] = - ((var70.x4[0] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) / - ((orc_uint16) var70.x4[0] & 0xff)); - var71.x4[1] = - ((var70.x4[1] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) / - ((orc_uint16) var70.x4[1] & 0xff)); - var71.x4[2] = - ((var70.x4[2] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) / - ((orc_uint16) var70.x4[2] & 0xff)); - var71.x4[3] = - ((var70.x4[3] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) / - ((orc_uint16) var70.x4[3] & 0xff)); - /* 28: convwb */ - var72.x4[0] = var71.x4[0]; - var72.x4[1] = var71.x4[1]; - var72.x4[2] = var71.x4[2]; - var72.x4[3] = var71.x4[3]; - /* 30: andl */ - var73.i = var72.i & var43.i; - /* 31: convwb */ - var74.x4[0] = var70.x4[0]; - var74.x4[1] = var70.x4[1]; - var74.x4[2] = var70.x4[2]; - var74.x4[3] = var70.x4[3]; - /* 33: andl */ - var75.i = var74.i & var44.i; - /* 34: orl */ - var76.i = var73.i | var75.i; - /* 35: storel */ - ptr0[i] = var76; + var69.x4[0] = var65.x4[0] + var52.x4[0]; + var69.x4[1] = var65.x4[1] + var52.x4[1]; + var69.x4[2] = var65.x4[2] + var52.x4[2]; + var69.x4[3] = var65.x4[3] + var52.x4[3]; + /* 26: divluw */ + var70.x4[0] = + ((var69.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) / + ((orc_uint16) var69.x4[0] & 0xff)); + var70.x4[1] = + ((var69.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) / + ((orc_uint16) var69.x4[1] & 0xff)); + var70.x4[2] = + ((var69.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) / + ((orc_uint16) var69.x4[2] & 0xff)); + var70.x4[3] = + ((var69.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) / + ((orc_uint16) var69.x4[3] & 0xff)); + /* 27: convwb */ + var71.x4[0] = var70.x4[0]; + var71.x4[1] = var70.x4[1]; + var71.x4[2] = var70.x4[2]; + var71.x4[3] = var70.x4[3]; + /* 29: andl */ + var72.i = var71.i & var43.i; + /* 30: convwb */ + var73.x4[0] = var69.x4[0]; + var73.x4[1] = var69.x4[1]; + var73.x4[2] = var69.x4[2]; + var73.x4[3] = var69.x4[3]; + /* 32: andl */ + var74.i = var73.i & var44.i; + /* 33: orl */ + var75.i = var72.i | var74.i; + /* 34: storel */ + ptr0[i] = var75; } } @@ -1910,8 +1907,8 @@ _backup_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex) orc_union32 var59; orc_union16 var60; orc_int8 var61; - orc_union16 var62; - orc_union32 var63; + orc_union32 var62; + orc_union64 var63; orc_union64 var64; orc_union64 var65; orc_union64 var66; @@ -1919,12 +1916,11 @@ _backup_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex) orc_union64 var68; orc_union64 var69; orc_union64 var70; - orc_union64 var71; + orc_union32 var71; orc_union32 var72; orc_union32 var73; orc_union32 var74; orc_union32 var75; - orc_union32 var76; for (j = 0; j < m; j++) { ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); @@ -1937,9 +1933,9 @@ _backup_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex) var42.x4[3] = ex->params[24]; /* 11: loadpl */ var55.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ - /* 29: loadpl */ + /* 28: loadpl */ var43.i = (int) 0x00ffffff; /* 16777215 or 8.28905e-317f */ - /* 32: loadpl */ + /* 31: loadpl */ var44.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */ for (i = 0; i < n; i++) { @@ -1948,7 +1944,7 @@ _backup_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex) /* 1: shrul */ var46.i = ((orc_uint32) var45.i) >> 24; /* 2: convlw */ - var47.i = var45.i; + var47.i = var46.i; /* 3: convwb */ var48 = var47.i; /* 4: splatbl */ @@ -1995,101 +1991,99 @@ _backup_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex) /* 15: shrul */ var59.i = ((orc_uint32) var58.i) >> 24; /* 16: convlw */ - var60.i = var58.i; + var60.i = var59.i; /* 17: convwb */ var61 = var60.i; - /* 18: convubw */ - var62.i = (orc_uint8) var61; - /* 19: splatbl */ - var63.i = + /* 18: splatbl */ + var62.i = ((var61 & 0xff) << 24) | ((var61 & 0xff) << 16) | ((var61 & 0xff) << 8) | (var61 & 0xff); - /* 20: convubw */ - var64.x4[0] = (orc_uint8) var63.x4[0]; - var64.x4[1] = (orc_uint8) var63.x4[1]; - var64.x4[2] = (orc_uint8) var63.x4[2]; - var64.x4[3] = (orc_uint8) var63.x4[3]; - /* 21: mullw */ - var65.x4[0] = (var64.x4[0] * var57.x4[0]) & 0xffff; - var65.x4[1] = (var64.x4[1] * var57.x4[1]) & 0xffff; - var65.x4[2] = (var64.x4[2] * var57.x4[2]) & 0xffff; - var65.x4[3] = (var64.x4[3] * var57.x4[3]) & 0xffff; - /* 22: div255w */ - var66.x4[0] = - ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) + - (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8; - var66.x4[1] = - ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) + - (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8; - var66.x4[2] = - ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) + - (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8; - var66.x4[3] = - ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) + - (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8; - /* 23: convubw */ - var67.x4[0] = (orc_uint8) var58.x4[0]; - var67.x4[1] = (orc_uint8) var58.x4[1]; - var67.x4[2] = (orc_uint8) var58.x4[2]; - var67.x4[3] = (orc_uint8) var58.x4[3]; - /* 24: mullw */ - var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff; - var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff; - var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff; - var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff; + /* 19: convubw */ + var63.x4[0] = (orc_uint8) var62.x4[0]; + var63.x4[1] = (orc_uint8) var62.x4[1]; + var63.x4[2] = (orc_uint8) var62.x4[2]; + var63.x4[3] = (orc_uint8) var62.x4[3]; + /* 20: mullw */ + var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff; + var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff; + var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff; + var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff; + /* 21: div255w */ + var65.x4[0] = + ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) + + (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8; + var65.x4[1] = + ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) + + (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8; + var65.x4[2] = + ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) + + (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8; + var65.x4[3] = + ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) + + (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8; + /* 22: convubw */ + var66.x4[0] = (orc_uint8) var58.x4[0]; + var66.x4[1] = (orc_uint8) var58.x4[1]; + var66.x4[2] = (orc_uint8) var58.x4[2]; + var66.x4[3] = (orc_uint8) var58.x4[3]; + /* 23: mullw */ + var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff; + var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff; + var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff; + var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff; + /* 24: addw */ + var68.x4[0] = var67.x4[0] + var54.x4[0]; + var68.x4[1] = var67.x4[1] + var54.x4[1]; + var68.x4[2] = var67.x4[2] + var54.x4[2]; + var68.x4[3] = var67.x4[3] + var54.x4[3]; /* 25: addw */ - var69.x4[0] = var68.x4[0] + var54.x4[0]; - var69.x4[1] = var68.x4[1] + var54.x4[1]; - var69.x4[2] = var68.x4[2] + var54.x4[2]; - var69.x4[3] = var68.x4[3] + var54.x4[3]; - /* 26: addw */ - var70.x4[0] = var66.x4[0] + var52.x4[0]; - var70.x4[1] = var66.x4[1] + var52.x4[1]; - var70.x4[2] = var66.x4[2] + var52.x4[2]; - var70.x4[3] = var66.x4[3] + var52.x4[3]; - /* 27: divluw */ - var71.x4[0] = - ((var70.x4[0] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) / - ((orc_uint16) var70.x4[0] & 0xff)); - var71.x4[1] = - ((var70.x4[1] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) / - ((orc_uint16) var70.x4[1] & 0xff)); - var71.x4[2] = - ((var70.x4[2] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) / - ((orc_uint16) var70.x4[2] & 0xff)); - var71.x4[3] = - ((var70.x4[3] & 0xff) == - 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) / - ((orc_uint16) var70.x4[3] & 0xff)); - /* 28: convwb */ - var72.x4[0] = var71.x4[0]; - var72.x4[1] = var71.x4[1]; - var72.x4[2] = var71.x4[2]; - var72.x4[3] = var71.x4[3]; - /* 30: andl */ - var73.i = var72.i & var43.i; - /* 31: convwb */ - var74.x4[0] = var70.x4[0]; - var74.x4[1] = var70.x4[1]; - var74.x4[2] = var70.x4[2]; - var74.x4[3] = var70.x4[3]; - /* 33: andl */ - var75.i = var74.i & var44.i; - /* 34: orl */ - var76.i = var73.i | var75.i; - /* 35: storel */ - ptr0[i] = var76; + var69.x4[0] = var65.x4[0] + var52.x4[0]; + var69.x4[1] = var65.x4[1] + var52.x4[1]; + var69.x4[2] = var65.x4[2] + var52.x4[2]; + var69.x4[3] = var65.x4[3] + var52.x4[3]; + /* 26: divluw */ + var70.x4[0] = + ((var69.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) / + ((orc_uint16) var69.x4[0] & 0xff)); + var70.x4[1] = + ((var69.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) / + ((orc_uint16) var69.x4[1] & 0xff)); + var70.x4[2] = + ((var69.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) / + ((orc_uint16) var69.x4[2] & 0xff)); + var70.x4[3] = + ((var69.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) / + ((orc_uint16) var69.x4[3] & 0xff)); + /* 27: convwb */ + var71.x4[0] = var70.x4[0]; + var71.x4[1] = var70.x4[1]; + var71.x4[2] = var70.x4[2]; + var71.x4[3] = var70.x4[3]; + /* 29: andl */ + var72.i = var71.i & var43.i; + /* 30: convwb */ + var73.x4[0] = var69.x4[0]; + var73.x4[1] = var69.x4[1]; + var73.x4[2] = var69.x4[2]; + var73.x4[3] = var69.x4[3]; + /* 32: andl */ + var74.i = var73.i & var44.i; + /* 33: orl */ + var75.i = var72.i | var74.i; + /* 34: storel */ + ptr0[i] = var75; } } } void -orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, - int p1, int n, int m) +orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; @@ -2099,7 +2093,6 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { - OrcCompileResult result; p = orc_program_new (); orc_program_set_2d (p); @@ -2128,7 +2121,7 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, ORC_VAR_D1); orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4, ORC_VAR_D1); - orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, + orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1); orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1); @@ -2154,12 +2147,10 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, ORC_VAR_D1); orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4, ORC_VAR_D1); - orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, + orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1); orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1, - ORC_VAR_D1); orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1); orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1, @@ -2191,7 +2182,7 @@ orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); - result = orc_program_compile (p); + orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); diff --git a/gst/videomixer/blendorc-dist.h b/gst/videomixer/blendorc-dist.h index abc9aef..9346540 100644 --- a/gst/videomixer/blendorc-dist.h +++ b/gst/videomixer/blendorc-dist.h @@ -58,13 +58,22 @@ typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; #endif -void orc_splat_u32 (guint32 * d1, int p1, int n); -void orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n); -void orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); -void orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); -void orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); -void orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); -void orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif +void orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); +void orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int n); +void orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); #ifdef __cplusplus } -- 2.7.4