#include <arm_neon.h>
#include "vp8/encoder/block.h"
-#include "vpx_mem/vpx_mem.h"
static const uint16_t inv_zig_zag[16] = {
- 0x0001, 0x0002, 0x0006, 0x0007,
- 0x0003, 0x0005, 0x0008, 0x000d,
- 0x0004, 0x0009, 0x000c, 0x000e,
- 0x000a, 0x000b, 0x000f, 0x0010
+ 1, 2, 6, 7,
+ 3, 5, 8, 13,
+ 4, 9, 12, 14,
+ 10, 11, 15, 16
};
void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
}
-
-void vp8_fast_quantize_b_pair_neon(BLOCK *b0, BLOCK *b1,
- BLOCKD *d0, BLOCKD *d1) {
- const int16x8_t one_q = vdupq_n_s16(0xff),
- b0_z0 = vld1q_s16(b0->coeff),
- b0_z1 = vld1q_s16(b0->coeff + 8),
- b0_round0 = vld1q_s16(b0->round),
- b0_round1 = vld1q_s16(b0->round + 8),
- b0_quant0 = vld1q_s16(b0->quant_fast),
- b0_quant1 = vld1q_s16(b0->quant_fast + 8),
- d0_dequant0 = vld1q_s16(d0->dequant),
- d0_dequant1 = vld1q_s16(d0->dequant + 8),
- b1_z0 = vld1q_s16(b1->coeff),
- b1_z1 = vld1q_s16(b1->coeff + 8),
- b1_round0 = vld1q_s16(b1->round),
- b1_round1 = vld1q_s16(b1->round + 8),
- b1_quant0 = vld1q_s16(b1->quant_fast),
- b1_quant1 = vld1q_s16(b1->quant_fast + 8),
- d1_dequant0 = vld1q_s16(d1->dequant),
- d1_dequant1 = vld1q_s16(d1->dequant + 8);
- const uint16x8_t zig_zag0 = vld1q_u16(inv_zig_zag),
- zig_zag1 = vld1q_u16(inv_zig_zag + 8);
- int16x8_t b0_x0, b0_x1, b0_sz0, b0_sz1, b0_y0, b0_y1,
- b1_x0, b1_x1, b1_sz0, b1_sz1, b1_y0, b1_y1;
- uint16x8_t b0_eob0, b0_eob1,
- b1_eob0, b1_eob1;
- uint16x4_t b0_eob_d16, b1_eob_d16;
- uint32x2_t b0_eob_d32, b1_eob_d32;
- uint32x4_t b0_eob_q32, b1_eob_q32;
-
- /* sign of z: z >> 15 */
- b0_sz0 = vshrq_n_s16(b0_z0, 15);
- b0_sz1 = vshrq_n_s16(b0_z1, 15);
- b1_sz0 = vshrq_n_s16(b1_z0, 15);
- b1_sz1 = vshrq_n_s16(b1_z1, 15);
-
- /* x = abs(z) */
- b0_x0 = vabsq_s16(b0_z0);
- b0_x1 = vabsq_s16(b0_z1);
- b1_x0 = vabsq_s16(b1_z0);
- b1_x1 = vabsq_s16(b1_z1);
-
- /* x += round */
- b0_x0 = vaddq_s16(b0_x0, b0_round0);
- b0_x1 = vaddq_s16(b0_x1, b0_round1);
- b1_x0 = vaddq_s16(b1_x0, b1_round0);
- b1_x1 = vaddq_s16(b1_x1, b1_round1);
-
- /* y = 2 * (x * quant) >> 16 */
- b0_y0 = vqdmulhq_s16(b0_x0, b0_quant0);
- b0_y1 = vqdmulhq_s16(b0_x1, b0_quant1);
- b1_y0 = vqdmulhq_s16(b1_x0, b1_quant0);
- b1_y1 = vqdmulhq_s16(b1_x1, b1_quant1);
-
- /* Compensate for doubling in vqdmulhq */
- b0_y0 = vshrq_n_s16(b0_y0, 1);
- b0_y1 = vshrq_n_s16(b0_y1, 1);
- b1_y0 = vshrq_n_s16(b1_y0, 1);
- b1_y1 = vshrq_n_s16(b1_y1, 1);
-
- /* Restore sign bit */
- b0_y0 = veorq_s16(b0_y0, b0_sz0);
- b0_y1 = veorq_s16(b0_y1, b0_sz1);
- b0_x0 = vsubq_s16(b0_y0, b0_sz0);
- b0_x1 = vsubq_s16(b0_y1, b0_sz1);
- b1_y0 = veorq_s16(b1_y0, b1_sz0);
- b1_y1 = veorq_s16(b1_y1, b1_sz1);
- b1_x0 = vsubq_s16(b1_y0, b1_sz0);
- b1_x1 = vsubq_s16(b1_y1, b1_sz1);
-
- /* find non-zero elements */
- b0_eob0 = vtstq_s16(b0_x0, one_q);
- b0_eob1 = vtstq_s16(b0_x1, one_q);
- b1_eob0 = vtstq_s16(b1_x0, one_q);
- b1_eob1 = vtstq_s16(b1_x1, one_q);
-
- /* mask zig zag */
- b0_eob0 = vandq_u16(b0_eob0, zig_zag0);
- b0_eob1 = vandq_u16(b0_eob1, zig_zag1);
- b1_eob0 = vandq_u16(b1_eob0, zig_zag0);
- b1_eob1 = vandq_u16(b1_eob1, zig_zag1);
-
- /* select the largest value */
- b0_eob0 = vmaxq_u16(b0_eob0, b0_eob1);
- b0_eob_d16 = vmax_u16(vget_low_u16(b0_eob0),
- vget_high_u16(b0_eob0));
- b0_eob_q32 = vmovl_u16(b0_eob_d16);
- b0_eob_d32 = vmax_u32(vget_low_u32(b0_eob_q32),
- vget_high_u32(b0_eob_q32));
- b0_eob_d32 = vpmax_u32(b0_eob_d32, b0_eob_d32);
-
- b1_eob0 = vmaxq_u16(b1_eob0, b1_eob1);
- b1_eob_d16 = vmax_u16(vget_low_u16(b1_eob0),
- vget_high_u16(b1_eob0));
- b1_eob_q32 = vmovl_u16(b1_eob_d16);
- b1_eob_d32 = vmax_u32(vget_low_u32(b1_eob_q32),
- vget_high_u32(b1_eob_q32));
- b1_eob_d32 = vpmax_u32(b1_eob_d32, b1_eob_d32);
-
- /* qcoeff = x */
- vst1q_s16(d0->qcoeff, b0_x0);
- vst1q_s16(d0->qcoeff + 8, b0_x1);
- vst1q_s16(d1->qcoeff, b1_x0);
- vst1q_s16(d1->qcoeff + 8, b1_x1);
-
- /* dqcoeff = x * dequant */
- vst1q_s16(d0->dqcoeff, vmulq_s16(d0_dequant0, b0_x0));
- vst1q_s16(d0->dqcoeff + 8, vmulq_s16(d0_dequant1, b0_x1));
- vst1q_s16(d1->dqcoeff, vmulq_s16(d1_dequant0, b1_x0));
- vst1q_s16(d1->dqcoeff + 8, vmulq_s16(d1_dequant1, b1_x1));
-
- vst1_lane_s8((int8_t *)d0->eob, vreinterpret_s8_u32(b0_eob_d32), 0);
- vst1_lane_s8((int8_t *)d1->eob, vreinterpret_s8_u32(b1_eob_d32), 0);
- return;
-}
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
-#include "vp8/encoder/block.h"
-#include <math.h>
-#include "vpx_mem/vpx_mem.h"
-#include "vp8/encoder/quantize.h"
-#include "vp8/common/entropy.h"
-
-
-#if HAVE_NEON
-
-/* vp8_quantize_mbX functions here differs from corresponding ones in
- * quantize.c only by using quantize_b_pair function pointer instead of
- * the regular quantize_b function pointer */
-void vp8_quantize_mby_neon(MACROBLOCK *x)
-{
- int i;
- int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
- && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
-
- for (i = 0; i < 16; i+=2)
- x->quantize_b_pair(&x->block[i], &x->block[i+1],
- &x->e_mbd.block[i], &x->e_mbd.block[i+1]);
-
- if(has_2nd_order)
- x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
-}
-
-void vp8_quantize_mb_neon(MACROBLOCK *x)
-{
- int i;
- int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
- && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
-
- for (i = 0; i < 24; i+=2)
- x->quantize_b_pair(&x->block[i], &x->block[i+1],
- &x->e_mbd.block[i], &x->e_mbd.block[i+1]);
-
- if (has_2nd_order)
- x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
-}
-
-
-void vp8_quantize_mbuv_neon(MACROBLOCK *x)
-{
- int i;
-
- for (i = 16; i < 24; i+=2)
- x->quantize_b_pair(&x->block[i], &x->block[i+1],
- &x->e_mbd.block[i], &x->e_mbd.block[i+1]);
-}
-
-#endif /* HAVE_NEON */
*d->eob = (char)(eob + 1);
}
-void vp8_quantize_mby_c(MACROBLOCK *x)
+void vp8_quantize_mby(MACROBLOCK *x)
{
int i;
int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
}
-void vp8_quantize_mb_c(MACROBLOCK *x)
+void vp8_quantize_mb(MACROBLOCK *x)
{
int i;
int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
}
-void vp8_quantize_mbuv_c(MACROBLOCK *x)
+void vp8_quantize_mbuv(MACROBLOCK *x)
{
int i;
x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
}
-/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
- * these two C functions if corresponding optimized routine is not available.
- * NEON optimized version implements currently the fast quantization for pair
- * of blocks. */
-void vp8_regular_quantize_b_pair(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
-{
- vp8_regular_quantize_b(b1, d1);
- vp8_regular_quantize_b(b2, d2);
-}
-
-void vp8_fast_quantize_b_pair_c(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
-{
- vp8_fast_quantize_b_c(b1, d1);
- vp8_fast_quantize_b_c(b2, d2);
-}
-
-
static const int qrounding_factors[129] =
{
48, 48, 48, 48, 48, 48, 48, 48,