From 7f31bfeddbe4a0309b2a79129c1d526ba5725864 Mon Sep 17 00:00:00 2001 From: James Zern Date: Tue, 18 Oct 2016 22:46:06 -0700 Subject: [PATCH] Revert "vp9_bitstream: Encode tiles in parallel" This reverts commit 9e8efa5b189a5abf78b1bcbc8076893728129d1e. this change causes ubsan warnings, failures in vpxenc_vp9_webm_rt_multithread_tiled BUG=webm:1309 Change-Id: I020c7be985c771bfff4b3de1afe51cc8edb980da --- vp9/encoder/vp9_bitstream.c | 123 +------------------------------------------- vp9/encoder/vp9_bitstream.h | 18 ------- vp9/encoder/vp9_encoder.c | 5 +- vp9/encoder/vp9_encoder.h | 1 - 4 files changed, 2 insertions(+), 145 deletions(-) diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 048bdb7..22b28de 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -915,120 +915,6 @@ int vp9_get_refresh_mask(VP9_COMP *cpi) { } } -static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) { - MACROBLOCKD *const xd = &data->xd; - vpx_start_encode(&data->bit_writer, data->dest); - write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info, - &data->bit_writer, &data->tok, data->tok_end, - &data->max_mv_magnitude, data->interp_filter_selected); - assert(data->tok == data->tok_end); - vpx_stop_encode(&data->bit_writer); - return 1; -} - -void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) { - if (cpi->vp9_bitstream_worker_data) { - int i; - for (i = 1; i < cpi->num_workers; ++i) { - vpx_free(cpi->vp9_bitstream_worker_data[i].dest); - } - vpx_free(cpi->vp9_bitstream_worker_data); - cpi->vp9_bitstream_worker_data = NULL; - } -} - -static int encode_tiles_buffer_alloc(VP9_COMP *const cpi) { - int i; - cpi->vp9_bitstream_worker_data = - vpx_calloc(cpi->num_workers, sizeof(*cpi->vp9_bitstream_worker_data)); - if (!cpi->vp9_bitstream_worker_data) return 1; - for (i = 1; i < cpi->num_workers; ++i) { - cpi->vp9_bitstream_worker_data[i].dest_size = - cpi->oxcf.width * cpi->oxcf.height; - cpi->vp9_bitstream_worker_data[i].dest = - vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size); - if (!cpi->vp9_bitstream_worker_data[i].dest) return 1; - } - return 0; -} - -static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - VP9_COMMON *const cm = &cpi->common; - const int tile_cols = 1 << cm->log2_tile_cols; - const int num_workers = cpi->num_workers; - size_t total_size = 0; - int tile_col = 0; - - if (!cpi->vp9_bitstream_worker_data || - cpi->vp9_bitstream_worker_data[1].dest_size > - (cpi->oxcf.width * cpi->oxcf.height)) { - vp9_bitstream_encode_tiles_buffer_dealloc(cpi); - if (encode_tiles_buffer_alloc(cpi)) return 0; - } - - while (tile_col < tile_cols) { - int i, j; - for (i = 0; i < num_workers && tile_col < tile_cols; ++i) { - VPxWorker *const worker = &cpi->workers[i]; - VP9BitstreamWorkerData *const data = &cpi->vp9_bitstream_worker_data[i]; - - // Populate the worker data. - data->xd = cpi->td.mb.e_mbd; - data->tile_idx = tile_col; - data->tok = cpi->tile_tok[0][tile_col]; - data->tok_end = cpi->tile_tok[0][tile_col] + cpi->tok_count[0][tile_col]; - data->max_mv_magnitude = cpi->max_mv_magnitude; - memset(data->interp_filter_selected, 0, - sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE); - - // First thread can directly write into the output buffer. - if (i == 0) { - data->dest = data_ptr + total_size + 4; - } - worker->data1 = cpi; - worker->data2 = data; - worker->hook = (VPxWorkerHook)encode_tile_worker; - worker->had_error = 0; - - if (i < num_workers - 1) { - winterface->launch(worker); - } else { - winterface->execute(worker); - } - ++tile_col; - } - for (j = 0; j < i; ++j) { - VPxWorker *const worker = &cpi->workers[j]; - VP9BitstreamWorkerData *const data = - (VP9BitstreamWorkerData *)worker->data2; - uint32_t tile_size; - int k; - - if (!winterface->sync(worker)) return 0; - tile_size = data->bit_writer.pos; - - // Aggregate per-thread bitstream stats. - cpi->max_mv_magnitude = - VPXMAX(cpi->max_mv_magnitude, data->max_mv_magnitude); - for (k = 0; k < SWITCHABLE; ++k) { - cpi->interp_filter_selected[0][k] += data->interp_filter_selected[0][k]; - } - - // Prefix the size of the tile on all but the last. - if (tile_col != tile_cols || j < i - 1) { - mem_put_be32(data_ptr + total_size, tile_size); - total_size += 4; - } - if (j > 0) { - memcpy(data_ptr + total_size, data->dest, tile_size); - } - total_size += tile_size; - } - } - return total_size; -} - static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; @@ -1042,14 +928,6 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); - // Encoding tiles in parallel is done only for realtime mode now. In other - // modes the speed up is insignificant and requires further testing to ensure - // that it does not make the overall process worse in any case. - if (cpi->oxcf.mode == REALTIME && cpi->num_workers > 1 && tile_rows == 1 && - tile_cols > 1) { - return encode_tiles_mt(cpi, data_ptr); - } - for (tile_row = 0; tile_row < tile_rows; tile_row++) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { int tile_idx = tile_row * tile_cols + tile_col; @@ -1077,6 +955,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { total_size += residual_bc.pos; } } + return total_size; } diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h index c669d71..8c97d37 100644 --- a/vp9/encoder/vp9_bitstream.h +++ b/vp9/encoder/vp9_bitstream.h @@ -17,28 +17,10 @@ extern "C" { #include "vp9/encoder/vp9_encoder.h" -typedef struct VP9BitstreamWorkerData { - uint8_t *dest; - int dest_size; - TOKENEXTRA *tok; - TOKENEXTRA *tok_end; - vpx_writer bit_writer; - int tile_idx; - unsigned int max_mv_magnitude; - // The size of interp_filter_selected in VP9_COMP is actually - // MAX_REFERENCE_FRAMES x SWITCHABLE. But when encoding tiles, all we ever do - // is increment the very first index (index 0) for the first dimension. Hence - // this is sufficient. - int interp_filter_selected[1][SWITCHABLE]; - DECLARE_ALIGNED(16, MACROBLOCKD, xd); -} VP9BitstreamWorkerData; - int vp9_get_refresh_mask(VP9_COMP *cpi); void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size); -void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi); - static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) { return !cpi->multi_arf_allowed && cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref && diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 43b708b..d98c493 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2030,10 +2030,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vpx_free(cpi->tile_thr_data); vpx_free(cpi->workers); - if (cpi->num_workers > 1) { - vp9_loop_filter_dealloc(&cpi->lf_row_sync); - vp9_bitstream_encode_tiles_buffer_dealloc(cpi); - } + if (cpi->num_workers > 1) vp9_loop_filter_dealloc(&cpi->lf_row_sync); vp9_alt_ref_aq_destroy(cpi->alt_ref_aq); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index e353d47..77eb31c 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -601,7 +601,6 @@ typedef struct VP9_COMP { VPxWorker *workers; struct EncWorkerData *tile_thr_data; VP9LfSync lf_row_sync; - struct VP9BitstreamWorkerData *vp9_bitstream_worker_data; int keep_level_stats; Vp9LevelInfo level_info; -- 2.7.4