Be specific about the data type size.
Use convenience macro vp9_zero_array.
Change-Id: I5fadf7dbd408befb73820d85db0be4832e8cfcbd
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad avx/;
-add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count";
specialize qw/vp9_temporal_filter_apply sse4_1/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+ add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count";
}
# End vp9_high encoder functions
#include <limits.h>
#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_reconinter.h"
const uint8_t *frame2,
unsigned int block_width,
unsigned int block_height, int strength,
- int filter_weight, unsigned int *accumulator,
+ int filter_weight, uint32_t *accumulator,
uint16_t *count) {
unsigned int i, j, k;
int modifier;
void vp9_highbd_temporal_filter_apply_c(
const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8,
unsigned int block_width, unsigned int block_height, int strength,
- int filter_weight, unsigned int *accumulator, uint16_t *count) {
+ int filter_weight, uint32_t *accumulator, uint16_t *count) {
const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
unsigned int i, j, k;
unsigned int filter_weight;
int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
- DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
+ DECLARE_ALIGNED(16, uint32_t, accumulator[16 * 16 * 3]);
DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
MACROBLOCKD *mbd = &td->mb.e_mbd;
YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
int stride;
MV ref_mv;
- memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
- memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
+ vp9_zero_array(accumulator, 16 * 16 * 3);
+ vp9_zero_array(count, 16 * 16 * 3);
td->mb.mv_limits.col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
td->mb.mv_limits.col_max =
// Add 'sum_u16' to 'count'. Multiply by 'pred' and add to 'accumulator.'
static void accumulate_and_store_8(const __m128i sum_u16, const uint8_t *pred,
- uint16_t *count, unsigned int *accumulator) {
+ uint16_t *count, uint32_t *accumulator) {
const __m128i pred_u8 = _mm_loadl_epi64((const __m128i *)pred);
const __m128i zero = _mm_setzero_si128();
__m128i count_u16 = _mm_loadu_si128((const __m128i *)count);
static void accumulate_and_store_16(const __m128i sum_0_u16,
const __m128i sum_1_u16,
const uint8_t *pred, uint16_t *count,
- unsigned int *accumulator) {
+ uint32_t *accumulator) {
const __m128i pred_u8 = _mm_loadu_si128((const __m128i *)pred);
const __m128i zero = _mm_setzero_si128();
__m128i count_0_u16 = _mm_loadu_si128((const __m128i *)count),
void vp9_temporal_filter_apply_sse4_1(const uint8_t *a, unsigned int stride,
const uint8_t *b, unsigned int width,
unsigned int height, int strength,
- int weight, unsigned int *accumulator,
+ int weight, uint32_t *accumulator,
uint16_t *count) {
unsigned int h;
const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
assert(width == 8 || width == 16);
- // TODO(johannkoenig) Use uint32_t for accumulator.
- assert(sizeof(*accumulator) == sizeof(uint32_t));
-
if (width == 8) {
__m128i sum_row_a, sum_row_b, sum_row_c;
__m128i mul_constants = _mm_setr_epi16(