From a53d5a4c442a84cacbd8225fac72db3789b3e10c Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Thu, 15 Dec 2011 14:23:36 -0500 Subject: [PATCH] Moved dequant idct into common These functions are now used by the encoder. This is WIP with the goal of creating a common idct/add for the encoder and decoder. A boost of 1.8% was seen for the HD rt test clip used. [Tero] Added needed changes to ARM side. Change-Id: Ibbb8000be09034203d7adffc457d3c3f8b06a5bf --- vp8/common/arm/arm_systemdependent.c | 12 ++++ .../arm/armv6/dequant_idct_v6.asm | 0 .../arm/armv6/dequantize_v6.asm | 0 vp8/{decoder => common}/arm/armv6/idct_blk_v6.c | 2 +- vp8/{decoder => common}/arm/dequantize_arm.c | 3 +- vp8/{decoder => common}/arm/dequantize_arm.h | 12 ++-- .../arm/neon/dequant_idct_neon.asm | 0 .../arm/neon/dequantizeb_neon.asm | 0 vp8/{decoder => common}/arm/neon/idct_blk_neon.c | 2 +- .../arm/neon/idct_dequant_0_2x_neon.asm | 0 .../arm/neon/idct_dequant_full_2x_neon.asm | 0 vp8/{decoder => common}/dequantize.c | 0 vp8/{decoder => common}/dequantize.h | 0 vp8/common/generic/systemdependent.c | 8 +++ vp8/{decoder => common}/idct_blk.c | 0 vp8/common/invtrans.c | 56 ---------------- vp8/common/invtrans.h | 50 ++++++++++++-- vp8/common/onyxc_int.h | 2 + vp8/common/reconinter.c | 77 ++-------------------- vp8/common/reconinter.h | 8 ++- vp8/{decoder => common}/x86/dequantize_mmx.asm | 0 vp8/{decoder => common}/x86/dequantize_x86.h | 0 vp8/{decoder => common}/x86/idct_blk_mmx.c | 12 +++- vp8/{decoder => common}/x86/idct_blk_sse2.c | 2 +- vp8/common/x86/x86_systemdependent.c | 8 +++ vp8/decoder/arm/arm_dsystemdependent.c | 11 ---- vp8/decoder/decodframe.c | 12 ++-- vp8/decoder/generic/dsystemdependent.c | 8 +-- vp8/decoder/onyxd_int.h | 8 +-- vp8/decoder/threading.c | 8 +-- vp8/decoder/x86/x86_dsystemdependent.c | 40 ----------- vp8/encoder/encodeframe.c | 29 ++++---- vp8/encoder/encodeintra.c | 32 +++++---- vp8/encoder/encodemb.c | 17 +++-- vp8/encoder/encodemb.h | 1 + vp8/encoder/rdopt.c | 2 +- vp8/vp8_common.mk | 21 +++++- vp8/vp8dx.mk | 7 -- vp8/vp8dx_arm.mk | 14 ---- 39 files changed, 192 insertions(+), 272 deletions(-) rename vp8/{decoder => common}/arm/armv6/dequant_idct_v6.asm (100%) rename vp8/{decoder => common}/arm/armv6/dequantize_v6.asm (100%) rename vp8/{decoder => common}/arm/armv6/idct_blk_v6.c (98%) rename vp8/{decoder => common}/arm/dequantize_arm.c (94%) rename vp8/{decoder => common}/arm/dequantize_arm.h (89%) rename vp8/{decoder => common}/arm/neon/dequant_idct_neon.asm (100%) rename vp8/{decoder => common}/arm/neon/dequantizeb_neon.asm (100%) rename vp8/{decoder => common}/arm/neon/idct_blk_neon.c (98%) rename vp8/{decoder => common}/arm/neon/idct_dequant_0_2x_neon.asm (100%) rename vp8/{decoder => common}/arm/neon/idct_dequant_full_2x_neon.asm (100%) rename vp8/{decoder => common}/dequantize.c (100%) rename vp8/{decoder => common}/dequantize.h (100%) rename vp8/{decoder => common}/idct_blk.c (100%) delete mode 100644 vp8/common/invtrans.c rename vp8/{decoder => common}/x86/dequantize_mmx.asm (100%) rename vp8/{decoder => common}/x86/dequantize_x86.h (100%) rename vp8/{decoder => common}/x86/idct_blk_mmx.c (91%) rename vp8/{decoder => common}/x86/idct_blk_sse2.c (98%) diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c index cd55a63..1e24674 100644 --- a/vp8/common/arm/arm_systemdependent.c +++ b/vp8/common/arm/arm_systemdependent.c @@ -63,6 +63,12 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx) rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6; rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6; rtcd->recon.intra4x4_predict = vp8_intra4x4_predict_armv6; + + rtcd->dequant.block = vp8_dequantize_b_v6; + rtcd->dequant.idct_add = vp8_dequant_idct_add_v6; + rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6; + rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6; + } #endif @@ -97,6 +103,12 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx) vp8_build_intra_predictors_mby_neon; rtcd->recon.build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_neon; + + rtcd->dequant.block = vp8_dequantize_b_neon; + rtcd->dequant.idct_add = vp8_dequant_idct_add_neon; + rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon; + rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; + } #endif diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/common/arm/armv6/dequant_idct_v6.asm similarity index 100% rename from vp8/decoder/arm/armv6/dequant_idct_v6.asm rename to vp8/common/arm/armv6/dequant_idct_v6.asm diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/common/arm/armv6/dequantize_v6.asm similarity index 100% rename from vp8/decoder/arm/armv6/dequantize_v6.asm rename to vp8/common/arm/armv6/dequantize_v6.asm diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/common/arm/armv6/idct_blk_v6.c similarity index 98% rename from vp8/decoder/arm/armv6/idct_blk_v6.c rename to vp8/common/arm/armv6/idct_blk_v6.c index c1ef285..9108929 100644 --- a/vp8/decoder/arm/armv6/idct_blk_v6.c +++ b/vp8/common/arm/armv6/idct_blk_v6.c @@ -10,7 +10,7 @@ #include "vpx_config.h" #include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" +#include "vp8/common/dequantize.h" void vp8_dequant_idct_add_y_block_v6(short *q, short *dq, diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c similarity index 94% rename from vp8/decoder/arm/dequantize_arm.c rename to vp8/common/arm/dequantize_arm.c index 2918e05..20a8ac4 100644 --- a/vp8/decoder/arm/dequantize_arm.c +++ b/vp8/common/arm/dequantize_arm.c @@ -10,9 +10,8 @@ #include "vpx_config.h" -#include "vp8/decoder/dequantize.h" +#include "vp8/common/dequantize.h" #include "vp8/common/idct.h" -#include "vpx_mem/vpx_mem.h" #if HAVE_ARMV7 extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ); diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/common/arm/dequantize_arm.h similarity index 89% rename from vp8/decoder/arm/dequantize_arm.h rename to vp8/common/arm/dequantize_arm.h index 1123e84..0b4d8fe 100644 --- a/vp8/decoder/arm/dequantize_arm.h +++ b/vp8/common/arm/dequantize_arm.h @@ -22,13 +22,13 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6); #undef vp8_dequant_block #define vp8_dequant_block vp8_dequantize_b_v6 -#undef vp8_dequant_idct_add +#undef vp8_dequant_idct_add #define vp8_dequant_idct_add vp8_dequant_idct_add_v6 -#undef vp8_dequant_idct_add_y_block +#undef vp8_dequant_idct_add_y_block #define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6 -#undef vp8_dequant_idct_add_uv_block +#undef vp8_dequant_idct_add_uv_block #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6 #endif #endif @@ -44,13 +44,13 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon); #undef vp8_dequant_block #define vp8_dequant_block vp8_dequantize_b_neon -#undef vp8_dequant_idct_add +#undef vp8_dequant_idct_add #define vp8_dequant_idct_add vp8_dequant_idct_add_neon -#undef vp8_dequant_idct_add_y_block +#undef vp8_dequant_idct_add_y_block #define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon -#undef vp8_dequant_idct_add_uv_block +#undef vp8_dequant_idct_add_uv_block #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon #endif diff --git a/vp8/decoder/arm/neon/dequant_idct_neon.asm b/vp8/common/arm/neon/dequant_idct_neon.asm similarity index 100% rename from vp8/decoder/arm/neon/dequant_idct_neon.asm rename to vp8/common/arm/neon/dequant_idct_neon.asm diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/common/arm/neon/dequantizeb_neon.asm similarity index 100% rename from vp8/decoder/arm/neon/dequantizeb_neon.asm rename to vp8/common/arm/neon/dequantizeb_neon.asm diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/common/arm/neon/idct_blk_neon.c similarity index 98% rename from vp8/decoder/arm/neon/idct_blk_neon.c rename to vp8/common/arm/neon/idct_blk_neon.c index 185895f..cc55843 100644 --- a/vp8/decoder/arm/neon/idct_blk_neon.c +++ b/vp8/common/arm/neon/idct_blk_neon.c @@ -10,7 +10,7 @@ #include "vpx_config.h" #include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" +#include "vp8/common/dequantize.h" /* place these declarations here because we don't want to maintain them * outside of this scope diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm similarity index 100% rename from vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm rename to vp8/common/arm/neon/idct_dequant_0_2x_neon.asm diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm similarity index 100% rename from vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm rename to vp8/common/arm/neon/idct_dequant_full_2x_neon.asm diff --git a/vp8/decoder/dequantize.c b/vp8/common/dequantize.c similarity index 100% rename from vp8/decoder/dequantize.c rename to vp8/common/dequantize.c diff --git a/vp8/decoder/dequantize.h b/vp8/common/dequantize.h similarity index 100% rename from vp8/decoder/dequantize.h rename to vp8/common/dequantize.h diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 9641d8c..dbf8d65 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -70,6 +70,14 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) #if CONFIG_RUNTIME_CPU_DETECT VP8_COMMON_RTCD *rtcd = &ctx->rtcd; + + rtcd->dequant.block = vp8_dequantize_b_c; + rtcd->dequant.idct_add = vp8_dequant_idct_add_c; + rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c; + rtcd->dequant.idct_add_uv_block = + vp8_dequant_idct_add_uv_block_c; + + rtcd->idct.idct16 = vp8_short_idct4x4llm_c; rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c; rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; diff --git a/vp8/decoder/idct_blk.c b/vp8/common/idct_blk.c similarity index 100% rename from vp8/decoder/idct_blk.c rename to vp8/common/idct_blk.c diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c deleted file mode 100644 index 95e6980..0000000 --- a/vp8/common/invtrans.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "invtrans.h" - - -void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, - int pitch) -{ - if (*b->eob > 1) - { - IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->predictor, pitch, - *(b->base_dst) + b->dst, b->dst_stride); - } - else - { - IDCT_INVOKE(rtcd, idct1_scalar_add)(b->dqcoeff[0], b->predictor, pitch, - *(b->base_dst) + b->dst, b->dst_stride); - } - -} - -void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - int i; - - if(x->mode_info_context->mbmi.mode != SPLITMV) - { - /* do 2nd order transform on the dc block */ - IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->dqcoeff); - } - - for (i = 0; i < 16; i++) - { - vp8_inverse_transform_b(rtcd, &x->block[i], 16); - } - -} -void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - int i; - - for (i = 16; i < 24; i++) - { - vp8_inverse_transform_b(rtcd, &x->block[i], 8); - } - -} diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index d14573b..7eec58e 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -15,9 +15,51 @@ #include "vpx_config.h" #include "idct.h" #include "blockd.h" -extern void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch); -extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); -extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); -extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#include "onyxc_int.h" +static void eob_adjust(char *eobs, short *diff) +{ + /* eob adjust.... the idct can only skip if both the dc and eob are zero */ + int js; + for(js = 0; js < 16; js++) + { + if((eobs[js] == 0) && (diff[0] != 0)) + eobs[js]++; + diff+=16; + } +} + +static void vp8_inverse_transform_mby(MACROBLOCKD *xd, + const VP8_COMMON_RTCD *rtcd) +{ + short *DQC = xd->block[0].dequant; + /* save the dc dequant constant in case it is overridden */ + short dc_dequant_temp = DQC[0]; + + if (xd->mode_info_context->mbmi.mode != SPLITMV) + { + /* do 2nd order transform on the dc block */ + if (xd->eobs[24] > 1) + { + IDCT_INVOKE(&rtcd->idct, iwalsh16) + (&xd->block[24].dqcoeff[0], xd->qcoeff); + } + else + { + IDCT_INVOKE(&rtcd->idct, iwalsh1) + (&xd->block[24].dqcoeff[0], xd->qcoeff); + } + eob_adjust(xd->eobs, xd->qcoeff); + + /* override the dc dequant constant */ + DQC[0] = 1; + } + DEQUANT_INVOKE (&rtcd->dequant, idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); + + /* restore the dc dequant constant */ + DQC[0] = dc_dequant_temp; +} #endif diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 936fa9f..f733ff7 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -22,6 +22,7 @@ #if CONFIG_POSTPROC #include "postproc.h" #endif +#include "dequantize.h" /*#ifdef PACKET_TESTING*/ #include "header.h" @@ -73,6 +74,7 @@ typedef enum typedef struct VP8_COMMON_RTCD { #if CONFIG_RUNTIME_CPU_DETECT + vp8_dequant_rtcd_vtable_t dequant; vp8_idct_rtcd_vtable_t idct; vp8_recon_rtcd_vtable_t recon; vp8_subpix_rtcd_vtable_t subpix; diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 24c09a3..6c7af41 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -334,11 +334,12 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) /*encoder only*/ -void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x) +void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, + unsigned char *dst_y, + int dst_ystride) { unsigned char *ptr_base; unsigned char *ptr; - unsigned char *pred_ptr = x->predictor; int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; int pre_stride = x->block[0].pre_stride; @@ -348,11 +349,13 @@ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x) if ((mv_row | mv_col) & 7) { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16); + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, + dst_y, dst_ystride); } else { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16); + RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, + dst_ystride); } } @@ -596,69 +599,3 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) build_inter4x4_predictors_mb(xd); } } -/* encoder only*/ -static void build_inter4x4_predictors_mb_e(MACROBLOCKD *x) -{ - int i; - - if (x->mode_info_context->mbmi.partitioning < 3) - { - x->block[ 0].bmi = x->mode_info_context->bmi[ 0]; - x->block[ 2].bmi = x->mode_info_context->bmi[ 2]; - x->block[ 8].bmi = x->mode_info_context->bmi[ 8]; - x->block[10].bmi = x->mode_info_context->bmi[10]; - - build_inter_predictors4b(x, &x->block[ 0], x->block[ 0].predictor, 16); - build_inter_predictors4b(x, &x->block[ 2], x->block[ 2].predictor, 16); - build_inter_predictors4b(x, &x->block[ 8], x->block[ 8].predictor, 16); - build_inter_predictors4b(x, &x->block[10], x->block[10].predictor, 16); - } - else - { - for (i = 0; i < 16; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - x->block[i+0].bmi = x->mode_info_context->bmi[i+0]; - x->block[i+1].bmi = x->mode_info_context->bmi[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, d0->predictor, 16); - else - { - build_inter_predictors_b(d0, d0->predictor, 16, x->subpixel_predict); - build_inter_predictors_b(d1, d1->predictor, 16, x->subpixel_predict); - } - - } - - } - - for (i = 16; i < 24; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, d0->predictor, 8); - else - { - build_inter_predictors_b(d0, d0->predictor, 8, x->subpixel_predict); - build_inter_predictors_b(d1, d1->predictor, 8, x->subpixel_predict); - } - } -} -void vp8_build_inter_predictors_mb_e(MACROBLOCKD *xd) -{ - if (xd->mode_info_context->mbmi.mode != SPLITMV) - { - vp8_build_inter16x16_predictors_mb(xd, xd->predictor, &xd->predictor[256], - &xd->predictor[320], 16, 8); - } - else - { - build_4x4uvmvs(xd); - build_inter4x4_predictors_mb_e(xd); - } -} diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h index 86f9d5a..f57ff73 100644 --- a/vp8/common/reconinter.h +++ b/vp8/common/reconinter.h @@ -21,11 +21,13 @@ extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, int dst_uvstride); -extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x); -extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf); +extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, + unsigned char *dst_y, + int dst_ystride); +extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, + vp8_subpix_fn_t sppf); extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x); extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x); -extern void vp8_build_inter_predictors_mb_e(MACROBLOCKD *xd); #endif diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/common/x86/dequantize_mmx.asm similarity index 100% rename from vp8/decoder/x86/dequantize_mmx.asm rename to vp8/common/x86/dequantize_mmx.asm diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/common/x86/dequantize_x86.h similarity index 100% rename from vp8/decoder/x86/dequantize_x86.h rename to vp8/common/x86/dequantize_x86.h diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c similarity index 91% rename from vp8/decoder/x86/idct_blk_mmx.c rename to vp8/common/x86/idct_blk_mmx.c index 29276e5..49cebd6 100644 --- a/vp8/decoder/x86/idct_blk_mmx.c +++ b/vp8/common/x86/idct_blk_mmx.c @@ -10,7 +10,17 @@ #include "vpx_config.h" #include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" +#include "vp8/common/dequantize.h" + +extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); + +void vp8_dequantize_b_mmx(BLOCKD *d) +{ + short *sq = (short *) d->qcoeff; + short *dq = (short *) d->dqcoeff; + short *q = (short *) d->dequant; + vp8_dequantize_b_impl_mmx(sq, dq, q); +} void vp8_dequant_idct_add_y_block_mmx (short *q, short *dq, diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/common/x86/idct_blk_sse2.c similarity index 98% rename from vp8/decoder/x86/idct_blk_sse2.c rename to vp8/common/x86/idct_blk_sse2.c index 03c2878..44e440c 100644 --- a/vp8/decoder/x86/idct_blk_sse2.c +++ b/vp8/common/x86/idct_blk_sse2.c @@ -10,7 +10,7 @@ #include "vpx_config.h" #include "vp8/common/idct.h" -#include "vp8/decoder/dequantize.h" +#include "vp8/common/dequantize.h" void vp8_idct_dequant_0_2x_sse2 (short *q, short *dq , diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c index b24cbe4..ad3a1f7 100644 --- a/vp8/common/x86/x86_systemdependent.c +++ b/vp8/common/x86/x86_systemdependent.c @@ -37,6 +37,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) if (flags & HAS_MMX) { + rtcd->dequant.block = vp8_dequantize_b_mmx; + rtcd->dequant.idct_add = vp8_dequant_idct_add_mmx; + rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; + rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; + rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx; rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx; @@ -82,6 +87,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) rtcd->recon.build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; + rtcd->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; + rtcd->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; + rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2; rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_sse2; diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c index f802c51..bf0a348 100644 --- a/vp8/decoder/arm/arm_dsystemdependent.c +++ b/vp8/decoder/arm/arm_dsystemdependent.c @@ -11,9 +11,6 @@ #include "vpx_config.h" #include "vpx_ports/arm.h" -#include "vp8/common/blockd.h" -#include "vp8/common/pragmas.h" -#include "vp8/decoder/dequantize.h" #include "vp8/decoder/onyxd_int.h" void vp8_arch_arm_decode_init(VP8D_COMP *pbi) @@ -30,20 +27,12 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi) #if HAVE_ARMV6 if (flags & HAS_MEDIA) { - pbi->dequant.block = vp8_dequantize_b_v6; - pbi->dequant.idct_add = vp8_dequant_idct_add_v6; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6; } #endif #if HAVE_ARMV7 if (flags & HAS_NEON) { - pbi->dequant.block = vp8_dequantize_b_neon; - pbi->dequant.idct_add = vp8_dequant_idct_add_neon; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; } #endif #endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 31eafcf..4ab09c5 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -15,7 +15,7 @@ #include "vp8/common/reconintra4x4.h" #include "vp8/common/recon.h" #include "vp8/common/reconinter.h" -#include "dequantize.h" +#include "vp8/common/dequantize.h" #include "detokenize.h" #include "vp8/common/invtrans.h" #include "vp8/common/alloccommon.h" @@ -32,7 +32,7 @@ #endif #include "vpx_mem/vpx_mem.h" #include "vp8/common/idct.h" -#include "dequantize.h" + #include "vp8/common/threading.h" #include "decoderthreading.h" #include "dboolhuff.h" @@ -218,7 +218,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, { if (xd->eobs[i] > 1) { - DEQUANT_INVOKE(&pbi->dequant, idct_add) + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add) (b->qcoeff, b->dequant, *(b->base_dst) + b->dst, b->dst_stride); } @@ -247,7 +247,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { - DEQUANT_INVOKE(&pbi->dequant, block)(b); + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b); IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], xd->qcoeff); @@ -272,7 +272,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, DQC[0] = 1; } - DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block) (xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); @@ -281,7 +281,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, DQC[0] = dc_dequant_temp; } - DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block) (xd->qcoeff+16*16, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c index d9f9ba3..8a84e56 100644 --- a/vp8/decoder/generic/dsystemdependent.c +++ b/vp8/decoder/generic/dsystemdependent.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#include "vp8/decoder/dequantize.h" +#include "vp8/common/dequantize.h" #include "vp8/decoder/onyxd_int.h" extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi); @@ -20,11 +20,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) { /* Pure C: */ #if CONFIG_RUNTIME_CPU_DETECT - pbi->mb.rtcd = &pbi->common.rtcd; - pbi->dequant.block = vp8_dequantize_b_c; - pbi->dequant.idct_add = vp8_dequant_idct_add_c; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; + pbi->mb.rtcd = &pbi->common.rtcd; #endif #if ARCH_X86 || ARCH_X86_64 diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 519a7f2..f48f517 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -16,7 +16,8 @@ #include "treereader.h" #include "vp8/common/onyxc_int.h" #include "vp8/common/threading.h" -#include "dequantize.h" + + #if CONFIG_ERROR_CONCEALMENT #include "ec_types.h" #endif @@ -93,11 +94,6 @@ typedef struct VP8Decompressor DATARATE dr[16]; -#if CONFIG_RUNTIME_CPU_DETECT - vp8_dequant_rtcd_vtable_t dequant; -#endif - - vp8_prob prob_intra; vp8_prob prob_last; vp8_prob prob_gf; diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 1967781..947b3a1 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -189,7 +189,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m { if (xd->eobs[i] > 1) { - DEQUANT_INVOKE(&pbi->dequant, idct_add) + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, idct_add) (b->qcoeff, b->dequant, *(b->base_dst) + b->dst, b->dst_stride); } @@ -217,7 +217,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { - DEQUANT_INVOKE(&pbi->dequant, block)(b); + DEQUANT_INVOKE(&pbi->common.rtcd.dequant, block)(b); IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], xd->qcoeff); @@ -248,13 +248,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m DQC = local_dequant; } - DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_y_block) (xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); } - DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) + DEQUANT_INVOKE (&pbi->common.rtcd.dequant, idct_add_uv_block) (xd->qcoeff+16*16, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c index 91dba7e..27bf5dd 100644 --- a/vp8/decoder/x86/x86_dsystemdependent.c +++ b/vp8/decoder/x86/x86_dsystemdependent.c @@ -13,47 +13,7 @@ #include "vpx_ports/x86.h" #include "vp8/decoder/onyxd_int.h" - -#if HAVE_MMX -void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); - -void vp8_dequantize_b_mmx(BLOCKD *d) -{ - short *sq = (short *) d->qcoeff; - short *dq = (short *) d->dqcoeff; - short *q = (short *) d->dequant; - vp8_dequantize_b_impl_mmx(sq, dq, q); -} -#endif - void vp8_arch_x86_decode_init(VP8D_COMP *pbi) { -#if CONFIG_RUNTIME_CPU_DETECT - int flags = x86_simd_caps(); - - /* Note: - * - * This platform can be built without runtime CPU detection as well. If - * you modify any of the function mappings present in this file, be sure - * to also update them in static mapings (/filename_.h) - */ - /* Override default functions with fastest ones for this CPU. */ -#if HAVE_MMX - if (flags & HAS_MMX) - { - pbi->dequant.block = vp8_dequantize_b_mmx; - pbi->dequant.idct_add = vp8_dequant_idct_add_mmx; - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; - } -#endif -#if HAVE_SSE2 - if (flags & HAS_SSE2) - { - pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; - pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; - } -#endif -#endif } diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 0da8a9e..102dee0 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1091,8 +1091,10 @@ static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ) #endif } -int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int mb_row, int mb_col) +int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, + int mb_row, int mb_col) { + MACROBLOCKD *xd = &x->e_mbd; int rate; if (cpi->sf.RD && cpi->compressor_speed != 2) @@ -1112,14 +1114,17 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); + sum_intra_stats(cpi, x); vp8_tokenize_mb(cpi, &x->e_mbd, t); - if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED) - vp8_inverse_transform_mby(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd); - - vp8_inverse_transform_mbuv(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd); + if (xd->mode_info_context->mbmi.mode != B_PRED) + vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd)); + DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block) + (xd->qcoeff+16*16, xd->block[16].dequant, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16); return rate; } #ifdef SPEEDSTATS @@ -1312,12 +1317,14 @@ int vp8cx_encode_inter_macroblock if (!x->skip) { vp8_tokenize_mb(cpi, xd, t); - if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED) - { - vp8_inverse_transform_mby(IF_RTCD(&cpi->rtcd.common->idct), - &x->e_mbd); - } - vp8_inverse_transform_mbuv(IF_RTCD(&cpi->rtcd.common->idct), &x->e_mbd); + + if (xd->mode_info_context->mbmi.mode != B_PRED) + vp8_inverse_transform_mby(xd, IF_RTCD(&cpi->common.rtcd)); + + DEQUANT_INVOKE (&cpi->common.rtcd.dequant, idct_add_uv_block) + (xd->qcoeff+16*16, xd->block[16].dequant, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16); } else { diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index a3b800a..4378b63 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -45,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) vp8_encode_intra16x16mby(rtcd, x); - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + vp8_inverse_transform_mby(&x->e_mbd, IF_RTCD(&cpi->common.rtcd)); } else { @@ -77,8 +77,17 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, x->quantize_b(be, b); - vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 16); - + if (*b->eob > 1) + { + IDCT_INVOKE(IF_RTCD(&rtcd->common->idct), idct16)(b->dqcoeff, + b->predictor, 16, *(b->base_dst) + b->dst, b->dst_stride); + } + else + { + IDCT_INVOKE(IF_RTCD(&rtcd->common->idct), idct1_scalar_add) + (b->dqcoeff[0], b->predictor, 16, *(b->base_dst) + b->dst, + b->dst_stride); + } } void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb) @@ -96,11 +105,12 @@ void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb) void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { BLOCK *b = &x->block[0]; + MACROBLOCKD *xd = &x->e_mbd; - RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd); + RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby_s)(&x->e_mbd); - ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), - b->src_stride, x->e_mbd.predictor, 16); + ENCODEMB_INVOKE(&rtcd->encodemb, submby) (x->src_diff, *(b->base_src), + b->src_stride, xd->dst.y_buffer, xd->dst.y_stride); vp8_transform_intra_mby(x); @@ -108,16 +118,17 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) if (x->optimize) vp8_optimize_mby(x, rtcd); - } void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd); + MACROBLOCKD *xd = &x->e_mbd; + + RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv_s)(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, - x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256], - &x->e_mbd.predictor[320], 8); + x->src.v_buffer, x->src.uv_stride, xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.uv_stride); vp8_transform_mbuv(x); @@ -125,5 +136,4 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) if (x->optimize) vp8_optimize_mbuv(x, rtcd); - } diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index e9042e1..c9f7553 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -105,10 +105,10 @@ static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) BLOCK *b = &x->block[0]; ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), - b->src_stride, x->e_mbd.predictor, 16); + b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, - x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256], - &x->e_mbd.predictor[320], 8); + x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, + x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride); } static void build_dcblock(MACROBLOCK *x) @@ -625,7 +625,7 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { - vp8_build_inter_predictors_mb_e(&x->e_mbd); + vp8_build_inter_predictors_mb(&x->e_mbd); vp8_subtract_mb(rtcd, x); @@ -635,7 +635,6 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) if (x->optimize) optimize_mb(x, rtcd); - } /* this funciton is used by first pass only */ @@ -643,15 +642,15 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { BLOCK *b = &x->block[0]; - vp8_build_inter16x16_predictors_mby(&x->e_mbd); + vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer, + x->e_mbd.dst.y_stride); ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), - b->src_stride, x->e_mbd.predictor, 16); + b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); transform_mby(x); vp8_quantize_mby(x); - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - + vp8_inverse_transform_mby(&x->e_mbd, IF_RTCD(rtcd->common)); } diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h index 597a57b..0fa87cf 100644 --- a/vp8/encoder/encodemb.h +++ b/vp8/encoder/encodemb.h @@ -12,6 +12,7 @@ #ifndef __INC_ENCODEMB_H #define __INC_ENCODEMB_H + #include "vpx_config.h" #include "block.h" diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 79fe63c..9b90034 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -2166,7 +2166,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, continue; vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]); - vp8_build_inter16x16_predictors_mby(&x->e_mbd); + vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16); if (cpi->active_map_enabled && x->active_ptr[0] == 0) { x->skip = 1; diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 683af34..e2756a6 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -20,6 +20,8 @@ VP8_COMMON_SRCS-yes += common/blockd.c VP8_COMMON_SRCS-yes += common/coefupdateprobs.h VP8_COMMON_SRCS-yes += common/debugmodes.c VP8_COMMON_SRCS-yes += common/default_coef_probs.h +VP8_COMMON_SRCS-yes += common/dequantize.c +VP8_COMMON_SRCS-yes += common/dequantize.h VP8_COMMON_SRCS-yes += common/entropy.c VP8_COMMON_SRCS-yes += common/entropymode.c VP8_COMMON_SRCS-yes += common/entropymv.c @@ -28,6 +30,7 @@ VP8_COMMON_SRCS-yes += common/filter.c VP8_COMMON_SRCS-yes += common/filter.h VP8_COMMON_SRCS-yes += common/findnearmv.c VP8_COMMON_SRCS-yes += common/generic/systemdependent.c +VP8_COMMON_SRCS-yes += common/idct_blk.c VP8_COMMON_SRCS-yes += common/idctllm.c VP8_COMMON_SRCS-yes += common/alloccommon.h VP8_COMMON_SRCS-yes += common/blockd.h @@ -57,7 +60,6 @@ VP8_COMMON_SRCS-yes += common/swapyv12buffer.h VP8_COMMON_SRCS-yes += common/systemdependent.h VP8_COMMON_SRCS-yes += common/threading.h VP8_COMMON_SRCS-yes += common/treecoder.h -VP8_COMMON_SRCS-yes += common/invtrans.c VP8_COMMON_SRCS-yes += common/loopfilter.c VP8_COMMON_SRCS-yes += common/loopfilter_filters.c VP8_COMMON_SRCS-yes += common/mbpitch.c @@ -69,9 +71,13 @@ VP8_COMMON_SRCS-yes += common/reconintra.c VP8_COMMON_SRCS-yes += common/reconintra4x4.c VP8_COMMON_SRCS-yes += common/setupintrarecon.c VP8_COMMON_SRCS-yes += common/swapyv12buffer.c + + + VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c VP8_COMMON_SRCS-yes += common/treecoder.c +VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/dequantize_x86.h VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h @@ -84,11 +90,14 @@ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c +VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm +VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm +VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c @@ -115,6 +124,8 @@ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.h VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/recon_arm.h VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/subpixel_arm.h +VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.c +VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.h # common (armv6) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM) @@ -129,6 +140,9 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/loopfilter_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/simpleloopfilter_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/sixtappredict8x4_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/intra4x4_predict_v6$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dequant_idct_v6$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dequantize_v6$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/idct_blk_v6.c # common (neon) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict4x4_neon$(ASM) @@ -151,3 +165,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x8_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict16x16_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dequant_idct_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/idct_dequant_full_2x_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/idct_dequant_0_2x_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dequantizeb_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/idct_blk_neon.c diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk index d88b595..d6dc153 100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@ -52,7 +52,6 @@ VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c VP8_DX_SRCS-yes += decoder/dboolhuff.c VP8_DX_SRCS-yes += decoder/decodemv.c VP8_DX_SRCS-yes += decoder/decodframe.c -VP8_DX_SRCS-yes += decoder/dequantize.c VP8_DX_SRCS-yes += decoder/detokenize.c VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h @@ -61,20 +60,14 @@ VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c VP8_DX_SRCS-yes += decoder/dboolhuff.h VP8_DX_SRCS-yes += decoder/decodemv.h VP8_DX_SRCS-yes += decoder/decoderthreading.h -VP8_DX_SRCS-yes += decoder/dequantize.h VP8_DX_SRCS-yes += decoder/detokenize.h VP8_DX_SRCS-yes += decoder/onyxd_int.h VP8_DX_SRCS-yes += decoder/treereader.h VP8_DX_SRCS-yes += decoder/onyxd_if.c VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c -VP8_DX_SRCS-yes += decoder/idct_blk.c VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes)) -VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c -VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm -VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c -VP8_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk index b08f946..fa1aaea 100644 --- a/vp8/vp8dx_arm.mk +++ b/vp8/vp8dx_arm.mk @@ -12,17 +12,3 @@ #VP8_DX_SRCS list is modified according to different platforms. VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c -VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.c -VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.h - -#File list for armv6 -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_idct_v6$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantize_v6$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/idct_blk_v6.c - -#File list for neon -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequant_idct_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_full_2x_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_0_2x_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantizeb_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_blk_neon.c -- 2.7.4