coeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
qcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
dqcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
+ // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values.
+#if HAVE_NEON
+ max_value_ = (1 << (7 + bit_depth_)) - 1;
+#else
max_value_ = (1 << bit_depth_) - 1;
+#endif
zbin_ptr_ =
reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
round_fp_ptr_ = reinterpret_cast<int16_t *>(
const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int16x8_t v_abs = vabsq_s16(v_coeff);
+ const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i);
const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int16x8_t v_abs = vabsq_s16(v_coeff);
+ const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
const int32x4_t v_tmp_lo =
vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
const int32x4_t v_tmp_hi =
const int16x8_t dequant_mask =
vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
- int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+ int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
int32x4_t dqcoeff_0, dqcoeff_1;
int16x8_t dqcoeff;
uint16x8_t eob_max;
const int16x8_t dequant_mask =
vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
- int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+ int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
int32x4_t dqcoeff_0, dqcoeff_1;
int16x8_t dqcoeff;