From e9b8810b4d5c1db78ef07b6061b05d2d62d55eb6 Mon Sep 17 00:00:00 2001 From: James Zern Date: Sun, 31 Aug 2014 13:16:37 -0700 Subject: [PATCH] move LFWorkerData allocation to VP9LfSync this removes an assumption that worker->data1 would be pointing to a TileWorkerData allocation. additionally, within the multi-threaded loopfilter pass VP9LfSync as a parameter to the worker hook, removing the need for a shadow pointer in LFWorkerData. Change-Id: Ic7b2faa34e3eb59dbcb8a7c67f333448fa047c88 --- vp9/common/vp9_loopfilter.h | 3 --- vp9/decoder/vp9_dthread.c | 33 +++++++++++++++++---------------- vp9/decoder/vp9_dthread.h | 9 +++++---- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h index 0ede58a..c349f36 100644 --- a/vp9/common/vp9_loopfilter.h +++ b/vp9/common/vp9_loopfilter.h @@ -124,9 +124,6 @@ typedef struct LoopFilterWorkerData { int start; int stop; int y_only; - - struct VP9LfSyncData *lf_sync; - int num_lf_workers; } LFWorkerData; // Operates on the rows described by 'lf_data'. diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c index 8d83b51..fa66a8c 100644 --- a/vp9/decoder/vp9_dthread.c +++ b/vp9/decoder/vp9_dthread.c @@ -92,12 +92,12 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, VP9_COMMON *const cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only, - VP9LfSync *const lf_sync, int num_lf_workers) { + VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; int r, c; // SB row and col const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; - for (r = start; r < stop; r += num_lf_workers) { + for (r = start; r < stop; r += lf_sync->num_workers) { const int mi_row = r << MI_BLOCK_SIZE_LOG2; MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride; @@ -121,13 +121,10 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, } // Row-based multi-threaded loopfilter hook -static int loop_filter_row_worker(TileWorkerData *const tile_data, - void *unused) { - LFWorkerData *const lf_data = &tile_data->lfdata; - (void)unused; +static int loop_filter_row_worker(VP9LfSync *const lf_sync, + LFWorkerData *const lf_data) { loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only, - lf_data->lf_sync, lf_data->num_lf_workers); + lf_data->start, lf_data->stop, lf_data->y_only, lf_sync); return 1; } @@ -149,9 +146,10 @@ void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync, if (!frame_filter_level) return; - if (!lf_sync->sync_range || cm->last_height != cm->height) { + if (!lf_sync->sync_range || cm->last_height != cm->height || + num_workers > lf_sync->num_workers) { vp9_loop_filter_dealloc(lf_sync); - vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width); + vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); } vp9_loop_filter_frame_init(cm, frame_filter_level); @@ -169,10 +167,11 @@ void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync, // then the number of workers used by the loopfilter should be revisited. for (i = 0; i < num_workers; ++i) { VP9Worker *const worker = &workers[i]; - TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - LFWorkerData *const lf_data = &tile_data->lfdata; + LFWorkerData *const lf_data = &lf_sync->lfdata[i]; worker->hook = (VP9WorkerHook)loop_filter_row_worker; + worker->data1 = lf_sync; + worker->data2 = lf_data; // Loopfilter data lf_data->frame_buffer = frame; @@ -182,9 +181,6 @@ void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync, lf_data->stop = sb_rows; lf_data->y_only = y_only; - lf_data->lf_sync = lf_sync; - lf_data->num_lf_workers = num_workers; - // Start loopfiltering if (i == num_workers - 1) { winterface->execute(worker); @@ -215,7 +211,7 @@ static int get_sync_range(int width) { // Allocate memory for lf row synchronization void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, - int width) { + int width, int num_workers) { lf_sync->rows = rows; #if CONFIG_MULTITHREAD { @@ -239,6 +235,10 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, } #endif // CONFIG_MULTITHREAD + CHECK_MEM_ERROR(cm, lf_sync->lfdata, + vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); + lf_sync->num_workers = num_workers; + CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); @@ -265,6 +265,7 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { vpx_free(lf_sync->cond_); } #endif // CONFIG_MULTITHREAD + vpx_free(lf_sync->lfdata); vpx_free(lf_sync->cur_sb_col); // clear the structure as the source of this call may be a resize in which // case this call will be followed by an _alloc() which may fail. diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index 51c1ba6..d5810b4 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -22,9 +22,6 @@ typedef struct TileWorkerData { struct VP9Common *cm; vp9_reader bit_reader; DECLARE_ALIGNED(16, struct macroblockd, xd); - - // Row-based parallel loopfilter data - LFWorkerData lfdata; } TileWorkerData; // Loopfilter row synchronization @@ -39,11 +36,15 @@ typedef struct VP9LfSyncData { // determined by testing. Currently, it is chosen to be a power-of-2 number. int sync_range; int rows; + + // Row-based parallel loopfilter data + LFWorkerData *lfdata; + int num_workers; } VP9LfSync; // Allocate memory for loopfilter row synchronization. void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, - int width); + int width, int num_workers); // Deallocate loopfilter synchronization related mutex and data. void vp9_loop_filter_dealloc(VP9LfSync *lf_sync); -- 2.7.4