From 3a0602578eb765e046ecb02f2118d0d4241b88d5 Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Fri, 4 Oct 2013 14:17:06 -0700 Subject: [PATCH] Giving consistent names to IDCT/IWHT functions. The idea is to have the following names for each transform size: vp9_idct4x4_add vp9_idct4x4_1_add vp9_idct4x4_10_add vp9_idct4x4_16_add vp9_idct8x8_add vp9_idct8x8_1_add vp9_idct8x8_10_add vp9_idct8x8_64_add etc for 16x16, 32x32 The actual list of renames in this patch: vp9_idct_add_lossless -> vp9_iwht4x4_add vp9_short_iwalsh4x4_add -> vp9_iwht4x4_16_add vp9_short_iwalsh4x4_1_add -> vp9_iwht4x4_1_add vp9_idct_add -> vp9_idct4x4_add vp9_short_idct4x4_add -> vp9_idct4x4_16_add vp9_short_idct4x4_1_add -> vp9_idct4x4_1_add Change-Id: I6f43f7437c68dd30cdd05d72e213765578ed30b1 --- test/fdct4x4_test.cc | 2 +- .../arm/neon/vp9_short_idct4x4_1_add_neon.asm | 8 ++++---- vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm | 8 ++++---- vp9/common/vp9_idct.c | 23 +++++++++++----------- vp9/common/vp9_idct.h | 5 ++--- vp9/common/vp9_rtcd_defs.sh | 16 +++++++-------- vp9/common/x86/vp9_idct_intrin_sse2.c | 4 ++-- vp9/decoder/vp9_decodframe.c | 3 +-- vp9/encoder/vp9_encodeframe.c | 8 ++++---- vp9/encoder/vp9_onyx_if.c | 8 ++++---- 10 files changed, 41 insertions(+), 44 deletions(-) diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index ea40ca6..d34c791 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -31,7 +31,7 @@ void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/, } void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, int stride, int /*tx_type*/) { - vp9_short_idct4x4_add_c(out, dst, stride >> 1); + vp9_idct4x4_16_add_c(out, dst, stride >> 1); } void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/, int stride, int tx_type) { diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm index 869ee5f..0d4a721 100644 --- a/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_idct4x4_1_add_neon.asm @@ -8,21 +8,21 @@ ; - EXPORT |vp9_short_idct4x4_1_add_neon| + EXPORT |vp9_idct4x4_1_add_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, +;void vp9_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, ; int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_short_idct4x4_1_add_neon| PROC +|vp9_idct4x4_1_add_neon| PROC ldrsh r0, [r0] ; generate cospi_16_64 = 11585 @@ -63,6 +63,6 @@ vst1.32 {d7[1]}, [r12] bx lr - ENDP ; |vp9_short_idct4x4_1_add_neon| + ENDP ; |vp9_idct4x4_1_add_neon| END diff --git a/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm index 640fb93..00283fc 100644 --- a/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_idct4x4_add_neon.asm @@ -8,7 +8,7 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_short_idct4x4_add_neon| + EXPORT |vp9_idct4x4_16_add_neon| ARM REQUIRE8 PRESERVE8 @@ -16,13 +16,13 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 AREA Block, CODE, READONLY ; name this block of code -;void vp9_short_idct4x4_add_neon(int16_t *input, uint8_t *dest, int dest_stride) +;void vp9_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int dest_stride) -|vp9_short_idct4x4_add_neon| PROC +|vp9_idct4x4_16_add_neon| PROC ; The 2D transform is done with two passes which are actually pretty ; similar. We first transform the rows. This is done by transposing @@ -185,6 +185,6 @@ vst1.32 {d26[1]}, [r1], r2 vst1.32 {d26[0]}, [r1] ; no post-increment bx lr - ENDP ; |vp9_short_idct4x4_add_neon| + ENDP ; |vp9_idct4x4_16_add_neon| END diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index 99d84c9..4636370 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -18,7 +18,7 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; @@ -70,7 +70,7 @@ void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { } } -void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { +void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) { int i; int a1, e1; int16_t tmp[4]; @@ -116,7 +116,7 @@ void vp9_idct4_1d_c(int16_t *input, int16_t *output) { output[3] = step[0] - step[3]; } -void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[4 * 4]; int16_t *outptr = out; int i, j; @@ -140,7 +140,7 @@ void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) { } } -void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { +void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int i; int a1; int16_t out = dct_const_round_shift(input[0] * cospi_16_64); @@ -1286,20 +1286,19 @@ void vp9_short_idct32x32_1_add_c(int16_t *input, uint8_t *dest, } // idct -void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob) { +void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) - vp9_short_idct4x4_add(input, dest, stride); + vp9_idct4x4_16_add(input, dest, stride); else - vp9_short_idct4x4_1_add(input, dest, stride); + vp9_idct4x4_1_add(input, dest, stride); } -void vp9_idct_add_lossless(int16_t *input, uint8_t *dest, int stride, - int eob) { +void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) - vp9_short_iwalsh4x4_add(input, dest, stride); + vp9_iwht4x4_16_add(input, dest, stride); else - vp9_short_iwalsh4x4_1_add_c(input, dest, stride); + vp9_iwht4x4_1_add(input, dest, stride); } void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) { @@ -1348,7 +1347,7 @@ void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob) { void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride, int eob) { if (tx_type == DCT_DCT) - vp9_idct_add(input, dest, stride, eob); + vp9_idct4x4_add(input, dest, stride, eob); else vp9_short_iht4x4_add(input, dest, stride, tx_type); } diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 0ef905c..a15b6d3 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h @@ -88,9 +88,8 @@ typedef struct { } transform_2d; -void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob); -void vp9_idct_add_lossless(int16_t *input, uint8_t *dest, - int stride, int eob); +void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob); +void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob); void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob); void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob); void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob); diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 61be7c6..58b7ee7 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -267,11 +267,11 @@ specialize vp9_convolve8_avg_vert ssse3 neon dspr2 # # dct # -prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct4x4_1_add sse2 neon +prototype void vp9_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_idct4x4_1_add sse2 neon -prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct4x4_add sse2 neon +prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_idct4x4_16_add sse2 neon prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct8x8_1_add sse2 neon @@ -310,11 +310,11 @@ prototype void vp9_idct4_1d "int16_t *input, int16_t *output" specialize vp9_idct4_1d sse2 # dct and add -prototype void vp9_short_iwalsh4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_iwalsh4x4_1_add +prototype void vp9_iwht4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_iwht4x4_1_add -prototype void vp9_short_iwalsh4x4_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_iwalsh4x4_add +prototype void vp9_iwht4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_iwht4x4_16_add # # Encoder functions below this point. diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index d00993c..f03af33 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -15,7 +15,7 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_idct.h" -void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); const __m128i eight = _mm_set1_epi16(8); const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64, @@ -148,7 +148,7 @@ void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE4X4(dest, input3); } -void vp9_short_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { +void vp9_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); int a; diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index da6711e..c4073e1 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -490,8 +490,7 @@ static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - xd->itxm_add = xd->lossless ? vp9_idct_add_lossless - : vp9_idct_add; + xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; } static INTERPOLATIONFILTERTYPE read_interp_filter_type( diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 4fde38d..c9da9cc 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1868,8 +1868,8 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { // printf("Switching to lossless\n"); cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4; cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; - cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add; - cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_iwht4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_iwht4x4_16_add; cpi->mb.optimize = 0; cpi->common.lf.filter_level = 0; cpi->zbin_mode_boost_enabled = 0; @@ -1878,8 +1878,8 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { // printf("Not lossless\n"); cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4; cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; - cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add; - cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_idct4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_idct4x4_16_add; } } diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 753afbc..501100b 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1260,11 +1260,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.lossless = oxcf->lossless; if (cpi->oxcf.lossless) { - cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add; - cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_iwht4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_iwht4x4_16_add; } else { - cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add; - cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add; + cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_idct4x4_1_add; + cpi->mb.e_mbd.inv_txm4x4_add = vp9_idct4x4_16_add; } cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL; -- 2.7.4