From: Vignesh Venkatasubramanian Date: Mon, 13 Feb 2017 19:36:02 +0000 (-0800) Subject: vp9,realtime: Enable row multithreading for non-rd X-Git-Tag: v1.7.0~659^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=453f18040f62f4f1699c0e7c5e1ee288e571d9d2;p=platform%2Fupstream%2Flibvpx.git vp9,realtime: Enable row multithreading for non-rd Enable row level multithreading for realtime encodes where non-rd path is used (speed >= 5). Change-Id: I5439cb49a02171166d8e1de06c7d5e6f8e819a41 --- diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc index af41618..1947d05 100644 --- a/test/vp9_ethread_test.cc +++ b/test/vp9_ethread_test.cc @@ -274,16 +274,15 @@ class VPxEncoderThreadTest encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0); - - encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_); - // While row_mt = 1/0(with/without row-based multi-threading), several - // speed features that would adaptively adjust encoding parameters have - // to be disabled to guarantee the bit match of the resulted bitstream. - encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_); } else { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0); encoder->Control(VP9E_SET_AQ_MODE, 3); } + encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_); + // While row_mt = 1, several speed features that would adaptively adjust + // encoding parameters have to be disabled to guarantee the bit exactness + // of the resulting bitstream. + encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_); encoder_initialized_ = true; } } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 76484bf..ceead82 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3907,13 +3907,18 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, const int mi_col_start = tile_info->mi_col_start; const int mi_col_end = tile_info->mi_col_end; int mi_col; + const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; + const int num_sb_cols = + get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); + int sb_col_in_tile; // Initialize the left context for the new SB row memset(&xd->left_context, 0, sizeof(xd->left_context)); memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row - for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { + for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; + mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) { const struct segmentation *const seg = &cm->seg; RD_COST dummy_rdc; const int idx_str = cm->mi_stride * mi_row + mi_col; @@ -3921,6 +3926,10 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; BLOCK_SIZE bsize = BLOCK_64X64; int seg_skip = 0; + + (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, + sb_col_in_tile - 1); + x->source_variance = UINT_MAX; vp9_zero(x->pred_mv); vp9_rd_cost_init(&dummy_rdc); @@ -3996,6 +4005,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, break; default: assert(0); break; } + + (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, + sb_col_in_tile, num_sb_cols); } } // end RTC play code diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 21771a0..bb0ffcb 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -5235,4 +5235,11 @@ void vp9_set_row_mt(VP9_COMP *cpi) { (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt && !cpi->use_svc) cpi->row_mt = 1; + + // In realtime mode, enable row based multi-threading for all the speed levels + // where non-rd path is used. + if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt && + !cpi->use_svc) { + cpi->row_mt = 1; + } } diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index 7b7e0fd..b618b42 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -625,6 +625,23 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } + + // Handle use_nonrd_pick_mode case. + if (cpi->sf.use_nonrd_pick_mode) { + MACROBLOCK *const x = &thread_data->td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; + int j; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + p[j].coeff = ctx->coeff_pbuf[j][0]; + p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; + pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; + p[j].eobs = ctx->eobs_pbuf[j][0]; + } + } } launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook, diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 08f3f38..3790f98 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1666,11 +1666,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, cpi->rc.frames_since_golden > 4) mode_rd_thresh = mode_rd_thresh << 3; - if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + if (rd_less_than_thresh( + best_rdc.rdcost, mode_rd_thresh, #if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, + // Synchronization of this function is only necessary when + // adaptive_rd_thresh is > 0. + cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL, #endif - &rd_thresh_freq_fact[mode_index])) + &rd_thresh_freq_fact[mode_index])) continue; if (this_mode == NEWMV) { @@ -2030,11 +2033,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; - if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + if (rd_less_than_thresh( + best_rdc.rdcost, mode_rd_thresh, #if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, + // Synchronization of this function is only necessary when + // adaptive_rd_thresh is > 0. + cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL, #endif - &rd_thresh_freq_fact[mode_index])) + &rd_thresh_freq_fact[mode_index])) continue; mi->mode = this_mode; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 6a1d888..f33ba8d 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -558,6 +558,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, sf->limit_newmv_early_exit = 0; sf->use_simple_block_yrd = 0; } + // Turn off adaptive_rd_thresh if row_mt is on for all the non-rd paths. This + // causes too many locks in realtime mode in certain platforms (Android ARM, + // Mac). + if (speed >= 5 && cpi->row_mt && cpi->num_workers > 1) { + sf->adaptive_rd_thresh = 0; + } } void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {