From: Paul Wilkins Date: Fri, 10 Feb 2012 16:02:10 +0000 (+0000) Subject: Removal of threading code. X-Git-Tag: v1.3.0~1217^2~380^2~61 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2615ca5d41388216ec65db9d9cb0f719efef855a;p=platform%2Fupstream%2Flibvpx.git Removal of threading code. For the experimental branch we are trying to slim the codebase down removing features such as threading for now which complicate the process of development and testing. Change-Id: I657c0246aef4d1fa8c8ffc6a1adfeee45bce8e24 --- diff --git a/configure b/configure index 5be1c40..311f86e 100755 --- a/configure +++ b/configure @@ -34,7 +34,6 @@ Advanced options: ${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders) ${toggle_mem_tracker} track memory usage ${toggle_postproc} postprocessing - ${toggle_multithread} multithreaded encoding and decoding. ${toggle_spatial_resampling} spatial sampling (scaling) support ${toggle_realtime_only} enable this option while building for real-time encoding ${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses @@ -159,7 +158,6 @@ enable optimizations enable fast_unaligned #allow unaligned accesses, if supported by hw enable md5 enable spatial_resampling -enable multithread enable os_support [ -d ${source_path}/../include ] && enable alt_tree_layout @@ -257,7 +255,6 @@ CONFIG_LIST=" dc_recon runtime_cpu_detect postproc - multithread internal_stats ${CODECS} ${CODEC_FAMILIES} @@ -303,7 +300,6 @@ CMDLINE_SELECT=" dequant_tokens dc_recon postproc - multithread internal_stats ${CODECS} ${CODEC_FAMILIES} @@ -404,7 +400,6 @@ process_targets() { enabled debug_libs && DIST_DIR="${DIST_DIR}-debug" enabled codec_srcs && DIST_DIR="${DIST_DIR}-src" ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost" - ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt" ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs" DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}" case "${tgt_os}" in diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 22d249b..fcee14c 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -17,54 +17,9 @@ #include "vp8/common/idct.h" #include "vp8/common/onyxc_int.h" -#if CONFIG_MULTITHREAD -#if HAVE_UNISTD_H -#include -#elif defined(_WIN32) -#include -typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO); -#endif -#endif - extern void vp8_arch_x86_common_init(VP8_COMMON *ctx); extern void vp8_arch_arm_common_init(VP8_COMMON *ctx); -#if CONFIG_MULTITHREAD -static int get_cpu_count() -{ - int core_count = 16; - -#if HAVE_UNISTD_H -#if defined(_SC_NPROCESSORS_ONLN) - core_count = sysconf(_SC_NPROCESSORS_ONLN); -#elif defined(_SC_NPROC_ONLN) - core_count = sysconf(_SC_NPROC_ONLN); -#endif -#elif defined(_WIN32) - { - PGNSI pGNSI; - SYSTEM_INFO sysinfo; - - /* Call GetNativeSystemInfo if supported or - * GetSystemInfo otherwise. */ - - pGNSI = (PGNSI) GetProcAddress( - GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); - if (pGNSI != NULL) - pGNSI(&sysinfo); - else - GetSystemInfo(&sysinfo); - - core_count = sysinfo.dwNumberOfProcessors; - } -#else - /* other platforms */ -#endif - - return core_count > 0 ? core_count : 1; -} -#endif - void vp8_machine_specific_config(VP8_COMMON *ctx) { #if CONFIG_RUNTIME_CPU_DETECT @@ -156,7 +111,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; -#if CONFIG_MULTITHREAD - ctx->processor_core_count = get_cpu_count(); -#endif /* CONFIG_MULTITHREAD */ } diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 1c2910c..233fd2f 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -270,9 +270,7 @@ typedef struct VP8Common #if CONFIG_RUNTIME_CPU_DETECT VP8_COMMON_RTCD rtcd; #endif -#if CONFIG_MULTITHREAD - int processor_core_count; -#endif + #if CONFIG_POSTPROC struct postproc_state postproc_state; #endif diff --git a/vp8/common/threading.h b/vp8/common/threading.h deleted file mode 100644 index 5927cb1..0000000 --- a/vp8/common/threading.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef _PTHREAD_EMULATION -#define _PTHREAD_EMULATION - -#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD - -/* Thread management macros */ -#ifdef _WIN32 -/* Win32 */ -#define _WIN32_WINNT 0x500 /* WINBASE.H - Enable signal_object_and_wait */ -#include -#include -#define THREAD_FUNCTION DWORD WINAPI -#define THREAD_FUNCTION_RETURN DWORD -#define THREAD_SPECIFIC_INDEX DWORD -#define pthread_t HANDLE -#define pthread_attr_t DWORD -#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL) -#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread)) -#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) -#define thread_sleep(nms) Sleep(nms) -#define pthread_cancel(thread) terminate_thread(thread,0) -#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();}; -#define pthread_getspecific(ts_key) TlsGetValue(ts_key) -#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value) -#define pthread_self() GetCurrentThreadId() -#else -#ifdef __APPLE__ -#include -#include -#include -#include -#include - -#else -#include -#endif - -#include -/* pthreads */ -/* Nearly everything is already defined */ -#define THREAD_FUNCTION void * -#define THREAD_FUNCTION_RETURN void * -#define THREAD_SPECIFIC_INDEX pthread_key_t -#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor); -#endif - -/* Syncrhronization macros: Win32 and Pthreads */ -#ifdef _WIN32 -#define sem_t HANDLE -#define pause(voidpara) __asm PAUSE -#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL) -#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE)) -#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL) -#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE) -#define thread_sleep(nms) Sleep(nms) - -#else - -#ifdef __APPLE__ -#define sem_t semaphore_t -#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z) -#define sem_wait(sem) (semaphore_wait(*sem) ) -#define sem_post(sem) semaphore_signal(*sem) -#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem) -#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ -#else -#include -#include -#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ -#endif -/* Not Windows. Assume pthreads */ - -#endif - -#if ARCH_X86 || ARCH_X86_64 -#include "vpx_ports/x86.h" -#else -#define x86_pause_hint() -#endif - -#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ - -#endif diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h deleted file mode 100644 index 60c39d1..0000000 --- a/vp8/decoder/decoderthreading.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - - - - -#ifndef _DECODER_THREADING_H -#define _DECODER_THREADING_H - -#if CONFIG_MULTITHREAD -extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); -extern void vp8_decoder_remove_threads(VP8D_COMP *pbi); -extern void vp8_decoder_create_threads(VP8D_COMP *pbi); -extern void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); -extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); -#endif - -#endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 3198b2c..64b58b5 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -34,8 +34,6 @@ #include "vpx_mem/vpx_mem.h" #include "vp8/common/idct.h" #include "dequantize.h" -#include "vp8/common/threading.h" -#include "decoderthreading.h" #include "dboolhuff.h" #include "vp8/common/seg_common.h" @@ -797,12 +795,6 @@ static void setup_token_decoder_partition_input(VP8D_COMP *pbi) bool_decoder++; } - -#if CONFIG_MULTITHREAD - /* Clamp number of decoder threads */ - if (pbi->decoding_thread_count > pbi->num_partitions - 1) - pbi->decoding_thread_count = pbi->num_partitions - 1; -#endif } @@ -892,12 +884,6 @@ static void setup_token_decoder(VP8D_COMP *pbi, partition += partition_size; bool_decoder++; } - -#if CONFIG_MULTITHREAD - /* Clamp number of decoder threads */ - if (pbi->decoding_thread_count > num_part - 1) - pbi->decoding_thread_count = num_part - 1; -#endif } @@ -1123,10 +1109,6 @@ int vp8_decode_frame(VP8D_COMP *pbi) } #endif -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); -#endif } } } @@ -1467,10 +1449,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) vpx_calloc((pc->mb_rows * pc->mb_cols), 1)); /* set up frame new frame for intra coded blocks */ -#if CONFIG_MULTITHREAD - if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) -#endif - vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); + vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); vp8_setup_block_dptrs(xd); @@ -1503,18 +1482,6 @@ int vp8_decode_frame(VP8D_COMP *pbi) // Resset the macroblock mode info context to the start of the list xd->mode_info_context = pc->mi; -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) - { - int i; - pbi->frame_corrupt_residual = 0; - vp8mt_decode_mb_rows(pbi, xd); - vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/ - for (i = 0; i < pbi->decoding_thread_count; ++i) - corrupt_tokens |= pbi->mb_row_di[i].mbd.corrupted; - } - else -#endif { int ibc = 0; int num_part = 1 << pc->multi_token_partition; diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index a812d0e..4943caf 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -21,8 +21,6 @@ #include "vp8/common/loopfilter.h" #include "vp8/common/swapyv12buffer.h" #include "vp8/common/g_common.h" -#include "vp8/common/threading.h" -#include "decoderthreading.h" #include #include @@ -155,11 +153,6 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; -#if CONFIG_MULTITHREAD - pbi->max_threads = oxcf->max_threads; - vp8_decoder_create_threads(pbi); -#endif - /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid * unnecessary calling of vp8cx_init_de_quantizer() for every frame. */ @@ -203,11 +196,6 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr) if (pbi->common.last_frame_seg_map != 0) vpx_free(pbi->common.last_frame_seg_map); -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); - vp8_decoder_remove_threads(pbi); -#endif #if CONFIG_ERROR_CONCEALMENT vp8_de_alloc_overlap_lists(pbi); #endif @@ -540,26 +528,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign return retcode; } -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION) - { - if (swap_frame_buffers (cm)) - { -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_pop_neon(dx_store_reg); - } -#endif - pbi->common.error.error_code = VPX_CODEC_ERROR; - pbi->common.error.setjmp = 0; - pbi->num_partitions = 0; - return -1; - } - } else -#endif { if (swap_frame_buffers (cm)) { diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index df2cc6f..4114726 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -15,7 +15,6 @@ #include "vp8/common/onyxd.h" #include "treereader.h" #include "vp8/common/onyxc_int.h" -#include "vp8/common/threading.h" #include "dequantize.h" #if CONFIG_ERROR_CONCEALMENT #include "ec_types.h" @@ -90,33 +89,6 @@ typedef struct VP8Decompressor unsigned int partition_sizes[MAX_PARTITIONS]; unsigned int num_partitions; -#if CONFIG_MULTITHREAD - /* variable for threading */ - - volatile int b_multithreaded_rd; - int max_threads; - int current_mb_col_main; - int decoding_thread_count; - int allocated_decoding_thread_count; - int mt_baseline_filter_level[MAX_MB_SEGMENTS]; - int sync_range; - int *mt_current_mb_col; /* Each row remembers its already decoded column. */ - unsigned char **mt_yabove_row; /* mb_rows x width */ - unsigned char **mt_uabove_row; - unsigned char **mt_vabove_row; - unsigned char **mt_yleft_col; /* mb_rows x 16 */ - unsigned char **mt_uleft_col; /* mb_rows x 8 */ - unsigned char **mt_vleft_col; /* mb_rows x 8 */ - - MB_ROW_DEC *mb_row_di; - DECODETHREAD_DATA *de_thread_data; - - pthread_t *h_decoding_thread; - sem_t *h_event_start_decoding; - sem_t h_event_end_decoding; - /* end of threading data */ -#endif - vp8_reader *mbc; int64_t last_time_stamp; int ready_for_new_data; diff --git a/vp8/decoder/reconintra_mt.h b/vp8/decoder/reconintra_mt.h index d401295..b045379 100644 --- a/vp8/decoder/reconintra_mt.h +++ b/vp8/decoder/reconintra_mt.h @@ -12,15 +12,4 @@ #ifndef __INC_RECONINTRA_MT_H #define __INC_RECONINTRA_MT_H -/* reconintra functions used in multi-threaded decoder */ -#if CONFIG_MULTITHREAD -extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); -extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); -extern void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); -extern void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); - -extern void vp8mt_predict_intra4x4(VP8D_COMP *pbi, MACROBLOCKD *x, int b_mode, unsigned char *predictor, int mb_row, int mb_col, int num); -extern void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); -#endif - #endif diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c deleted file mode 100644 index 396ffe9..0000000 --- a/vp8/decoder/threading.c +++ /dev/null @@ -1,1012 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1 -# include -#endif -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/threading.h" - -#include "vp8/common/loopfilter.h" -#include "vp8/common/extend.h" -#include "vpx_ports/vpx_timer.h" -#include "detokenize.h" -#include "vp8/common/reconinter.h" -#include "reconintra_mt.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif - -extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); - -#if CONFIG_RUNTIME_CPU_DETECT -#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x) -#else -#define RTCD_VTABLE(x) NULL -#endif - -static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) -{ - VP8_COMMON *const pc = & pbi->common; - int i, j; - - for (i = 0; i < count; i++) - { - MACROBLOCKD *mbd = &mbrd[i].mbd; -#if CONFIG_RUNTIME_CPU_DETECT - mbd->rtcd = xd->rtcd; -#endif - mbd->subpixel_predict = xd->subpixel_predict; - mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; - mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; - mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; - mbd->subpixel_predict_avg8x8 = xd->subpixel_predict_avg8x8; - mbd->subpixel_predict_avg16x16 = xd->subpixel_predict_avg16x16; - - mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1); - mbd->mode_info_stride = pc->mode_info_stride; - - mbd->frame_type = pc->frame_type; - mbd->frames_since_golden = pc->frames_since_golden; - mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame; - - mbd->pre = pc->yv12_fb[pc->lst_fb_idx]; - mbd->dst = pc->yv12_fb[pc->new_fb_idx]; - - vp8_setup_block_dptrs(mbd); - vp8_build_block_doffsets(mbd); - mbd->segmentation_enabled = xd->segmentation_enabled; - mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - - vpx_memcpy(mbd->segment_feature_data, - xd->segment_feature_data, - sizeof(xd->segment_feature_data)); - vpx_memcpy(mbd->segment_feature_mask, - xd->segment_feature_mask, - sizeof(xd->segment_feature_mask)); - - /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ - vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); - /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ - vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); - /*unsigned char mode_ref_lf_delta_enabled; - unsigned char mode_ref_lf_delta_update;*/ - mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; - mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; - - mbd->current_bc = &pbi->bc2; - - for (j = 0; j < 25; j++) - { - mbd->block[j].dequant = xd->block[j].dequant; - } - - mbd->fullpixel_mask = 0xffffffff; - if(pc->full_pixel) - mbd->fullpixel_mask = 0xfffffff8; - - } - - for (i=0; i< pc->mb_rows; i++) - pbi->mt_current_mb_col[i]=-1; -} - - -static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col) -{ - int eobtotal = 0; - int throw_residual = 0; - int i; - - if (xd->mode_info_context->mbmi.mb_skip_coeff) - { - vp8_reset_mb_tokens_context(xd); - } - else if (!vp8dx_bool_error(xd->current_bc)) - { - eobtotal = vp8_decode_mb_tokens(pbi, xd); - } - - eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED || - xd->mode_info_context->mbmi.mode == SPLITMV); - if (!eobtotal && !vp8dx_bool_error(xd->current_bc)) - { - /* Special case: Force the loopfilter to skip when eobtotal and - * mb_skip_coeff are zero. - * */ - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - - /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { - vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col); - vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col); - } - else - { - vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); - } - return; - } - - if (xd->segmentation_enabled) - mb_init_dequantizer(pbi, xd); - - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { - vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col); - - if (xd->mode_info_context->mbmi.mode != B_PRED) - { - vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col); - } else { - vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col); - } - } - else - { - vp8_build_inter_predictors_mb(xd); - } - - /* When we have independent partitions we can apply residual even - * though other partitions within the frame are corrupt. - */ - throw_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual); - throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); - -#if CONFIG_ERROR_CONCEALMENT - if (pbi->ec_active && - (mb_row * pbi->common.mb_cols + mb_col >= pbi->mvs_corrupt_from_mb || - throw_residual)) - { - /* MB with corrupt residuals or corrupt mode/motion vectors. - * Better to use the predictor as reconstruction. - */ - pbi->frame_corrupt_residual = 1; - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - vp8_conceal_corrupt_mb(xd); - return; - } -#endif - - /* dequantization and idct */ - if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) - { - BLOCKD *b = &xd->block[24]; - DEQUANT_INVOKE(&pbi->dequant, block)(b); - - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; - } - else - { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; - } - - DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs, xd->block[24].diff); - } - else if (xd->mode_info_context->mbmi.mode == B_PRED) - { - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; - int b_mode = xd->mode_info_context->bmi[i].as_mode; - - vp8mt_predict_intra4x4(pbi, xd, b_mode, b->predictor, mb_row, mb_col, i); - - if (xd->eobs[i] > 1) - { - DEQUANT_INVOKE(&pbi->dequant, idct_add) - (b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - } - else - { - IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add) - (b->qcoeff[0] * b->dequant[0], b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); - ((int *)b->qcoeff)[0] = 0; - } - } - } - else - { - DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); - } - - DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) - (xd->qcoeff+16*16, xd->block[16].dequant, - xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs+16); -} - - -static THREAD_FUNCTION thread_decoding_proc(void *p_data) -{ - int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; - VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); - MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); - ENTROPY_CONTEXT_PLANES mb_row_left_context; - - while (1) - { - if (pbi->b_multithreaded_rd == 0) - break; - - /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/ - if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) - { - if (pbi->b_multithreaded_rd == 0) - break; - else - { - VP8_COMMON *pc = &pbi->common; - MACROBLOCKD *xd = &mbrd->mbd; - - int mb_row; - int num_part = 1 << pbi->common.multi_token_partition; - volatile int *last_row_current_mb_col; - int nsync = pbi->sync_range; - - for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) - { - int i; - int recon_yoffset, recon_uvoffset; - int mb_col; - int ref_fb_idx = pc->lst_fb_idx; - int dst_fb_idx = pc->new_fb_idx; - int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - - int filter_level; - loop_filter_info_n *lfi_n = &pc->lf_info; - - pbi->mb_row_di[ithread].mb_row = mb_row; - pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part]; - - last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - /* reset above block coeffs */ - - xd->above_context = pc->above_context; - xd->left_context = &mb_row_left_context; - vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context)); - xd->up_available = (mb_row != 0); - - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { - if ((mb_col & (nsync-1)) == 0) - { - while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1) - { - x86_pause_hint(); - thread_sleep(0); - } - } - - /* Distance of MB to the various image edges. - * These are specified to 8th pel as they are always - * compared to values that are in 1/8th pel units. - */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - -#if CONFIG_ERROR_CONCEALMENT - { - int corrupt_residual = - (!pbi->independent_partitions && - pbi->frame_corrupt_residual) || - vp8dx_bool_error(xd->current_bc); - if (pbi->ec_active && - (xd->mode_info_context->mbmi.ref_frame == - INTRA_FRAME) && - corrupt_residual) - { - /* We have an intra block with corrupt - * coefficients, better to conceal with an inter - * block. - * Interpolate MVs from neighboring MBs - * - * Note that for the first mb with corrupt - * residual in a frame, we might not discover - * that before decoding the residual. That - * happens after this check, and therefore no - * inter concealment will be done. - */ - vp8_interpolate_motion(xd, - mb_row, mb_col, - pc->mb_rows, pc->mb_cols, - pc->mode_info_stride); - } - } -#endif - - - xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - - xd->left_available = (mb_col != 0); - - /* Select the appropriate reference frame for this MB */ - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = pc->lst_fb_idx; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = pc->gld_fb_idx; - else - ref_fb_idx = pc->alt_fb_idx; - - xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; - - if (xd->mode_info_context->mbmi.ref_frame != - INTRA_FRAME) - { - /* propagate errors from reference frames */ - xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted; - } - - decode_macroblock(pbi, xd, mb_row, mb_col); - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= vp8dx_bool_error(xd->current_bc); - - if (pbi->common.filter_level) - { - int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV && - xd->mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; - const int seg = xd->mode_info_context->mbmi.segment_id; - const int ref_frame = xd->mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if( mb_row != pc->mb_rows-1 ) - { - /* Save decoded MB last row data for next-row decoding */ - vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); - vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); - vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); - } - - /* save left_col for next MB decoding */ - if(mb_col != pc->mb_cols-1) - { - MODE_INFO *next = xd->mode_info_context +1; - - if (next->mbmi.ref_frame == INTRA_FRAME) - { - for (i = 0; i < 16; i++) - pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; - for (i = 0; i < 8; i++) - { - pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; - pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; - } - } - } - - /* loopfilter on this macroblock. */ - if (filter_level) - { - if(pc->filter_type == NORMAL_LOOPFILTER) - { - loop_filter_info lfi; - FRAME_TYPE frame_type = pc->frame_type; - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - } - else - { - if (mb_col > 0) - LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v) - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v) - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h) - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h) - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - } - } - - } - - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; - - /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/ - pbi->mt_current_mb_col[mb_row] = mb_col; - } - - /* adjust to the next row of mbs */ - if (pbi->common.filter_level) - { - if(mb_row != pc->mb_rows-1) - { - int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS; - int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1); - - for (i = 0; i < 4; i++) - { - pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; - pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; - pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; - } - } - } else - vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - ++xd->mode_info_context; /* skip prediction column */ - - /* since we have multithread */ - xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; - } - } - } - /* add this to each frame */ - if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1)) - { - /*SetEvent(pbi->h_event_end_decoding);*/ - sem_post(&pbi->h_event_end_decoding); - } - } - - return 0 ; -} - - -void vp8_decoder_create_threads(VP8D_COMP *pbi) -{ - int core_count = 0; - int ithread; - - pbi->b_multithreaded_rd = 0; - pbi->allocated_decoding_thread_count = 0; - - /* limit decoding threads to the max number of token partitions */ - core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; - - /* limit decoding threads to the available cores */ - if (core_count > pbi->common.processor_core_count) - core_count = pbi->common.processor_core_count; - - if (core_count > 1) - { - pbi->b_multithreaded_rd = 1; - pbi->decoding_thread_count = core_count - 1; - - CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count)); - CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count)); - CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count)); - vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count); - CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count)); - - for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) - { - sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); - - pbi->de_thread_data[ithread].ithread = ithread; - pbi->de_thread_data[ithread].ptr1 = (void *)pbi; - pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; - - pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread])); - } - - sem_init(&pbi->h_event_end_decoding, 0, 0); - - pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; - } -} - - -void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) -{ - int i; - - if (pbi->b_multithreaded_rd) - { - vpx_free(pbi->mt_current_mb_col); - pbi->mt_current_mb_col = NULL ; - - /* Free above_row buffers. */ - if (pbi->mt_yabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_yabove_row[i]); - pbi->mt_yabove_row[i] = NULL ; - } - vpx_free(pbi->mt_yabove_row); - pbi->mt_yabove_row = NULL ; - } - - if (pbi->mt_uabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_uabove_row[i]); - pbi->mt_uabove_row[i] = NULL ; - } - vpx_free(pbi->mt_uabove_row); - pbi->mt_uabove_row = NULL ; - } - - if (pbi->mt_vabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_vabove_row[i]); - pbi->mt_vabove_row[i] = NULL ; - } - vpx_free(pbi->mt_vabove_row); - pbi->mt_vabove_row = NULL ; - } - - /* Free left_col buffers. */ - if (pbi->mt_yleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_yleft_col[i]); - pbi->mt_yleft_col[i] = NULL ; - } - vpx_free(pbi->mt_yleft_col); - pbi->mt_yleft_col = NULL ; - } - - if (pbi->mt_uleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_uleft_col[i]); - pbi->mt_uleft_col[i] = NULL ; - } - vpx_free(pbi->mt_uleft_col); - pbi->mt_uleft_col = NULL ; - } - - if (pbi->mt_vleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_vleft_col[i]); - pbi->mt_vleft_col[i] = NULL ; - } - vpx_free(pbi->mt_vleft_col); - pbi->mt_vleft_col = NULL ; - } - } -} - - -void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) -{ - VP8_COMMON *const pc = & pbi->common; - int i; - int uv_width; - - if (pbi->b_multithreaded_rd) - { - vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); - - /* our internal buffers are always multiples of 16 */ - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if (width < 640) pbi->sync_range = 1; - else if (width <= 1280) pbi->sync_range = 8; - else if (width <= 2560) pbi->sync_range =16; - else pbi->sync_range = 32; - - uv_width = width >>1; - - /* Allocate an int for each mb row. */ - CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows)); - - /* Allocate memory for above_row buffers. */ - CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1)); - - CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1)); - - CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1)); - - /* Allocate memory for left_col buffers. */ - CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); - - CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - - CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); - for (i=0; i< pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - } -} - - -void vp8_decoder_remove_threads(VP8D_COMP *pbi) -{ - /* shutdown MB Decoding thread; */ - if (pbi->b_multithreaded_rd) - { - int i; - - pbi->b_multithreaded_rd = 0; - - /* allow all threads to exit */ - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - sem_post(&pbi->h_event_start_decoding[i]); - pthread_join(pbi->h_decoding_thread[i], NULL); - } - - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - sem_destroy(&pbi->h_event_start_decoding[i]); - } - - sem_destroy(&pbi->h_event_end_decoding); - - vpx_free(pbi->h_decoding_thread); - pbi->h_decoding_thread = NULL; - - vpx_free(pbi->h_event_start_decoding); - pbi->h_event_start_decoding = NULL; - - vpx_free(pbi->mb_row_di); - pbi->mb_row_di = NULL ; - - vpx_free(pbi->de_thread_data); - pbi->de_thread_data = NULL; - } -} - -void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - int mb_row; - VP8_COMMON *pc = &pbi->common; - - int num_part = 1 << pbi->common.multi_token_partition; - int i; - volatile int *last_row_current_mb_col = NULL; - int nsync = pbi->sync_range; - - int filter_level = pc->filter_level; - loop_filter_info_n *lfi_n = &pc->lf_info; - - if (filter_level) - { - /* Set above_row buffer to 127 for decoding first MB row */ - vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5); - vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); - vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); - - for (i=1; imb_rows; i++) - { - vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); - vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - } - - /* Set left_col to 129 initially */ - for (i=0; imb_rows; i++) - { - vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16); - vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8); - vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8); - } - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); - } - - setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); - - for (i = 0; i < pbi->decoding_thread_count; i++) - sem_post(&pbi->h_event_start_decoding[i]); - - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) - { - xd->current_bc = &pbi->mbc[mb_row%num_part]; - - /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */ - { - int i; - int recon_yoffset, recon_uvoffset; - int mb_col; - int ref_fb_idx = pc->lst_fb_idx; - int dst_fb_idx = pc->new_fb_idx; - int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - - /* volatile int *last_row_current_mb_col = NULL; */ - if (mb_row > 0) - last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; - - vpx_memset(&pc->left_context, 0, sizeof(pc->left_context)); - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - /* reset above block coeffs */ - - xd->above_context = pc->above_context; - xd->up_available = (mb_row != 0); - - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { - if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){ - while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1) - { - x86_pause_hint(); - thread_sleep(0); - } - } - - /* Distance of MB to the various image edges. - * These are specified to 8th pel as they are always compared to - * values that are in 1/8th pel units. - */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - -#if CONFIG_ERROR_CONCEALMENT - { - int corrupt_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual) || - vp8dx_bool_error(xd->current_bc); - if (pbi->ec_active && - (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && - corrupt_residual) - { - /* We have an intra block with corrupt coefficients, - * better to conceal with an inter block. Interpolate - * MVs from neighboring MBs - * - * Note that for the first mb with corrupt residual in a - * frame, we might not discover that before decoding the - * residual. That happens after this check, and - * therefore no inter concealment will be done. - */ - vp8_interpolate_motion(xd, - mb_row, mb_col, - pc->mb_rows, pc->mb_cols, - pc->mode_info_stride); - } - } -#endif - - - xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - - xd->left_available = (mb_col != 0); - - /* Select the appropriate reference frame for this MB */ - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = pc->lst_fb_idx; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = pc->gld_fb_idx; - else - ref_fb_idx = pc->alt_fb_idx; - - xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; - - if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) - { - /* propagate errors from reference frames */ - xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted; - } - - decode_macroblock(pbi, xd, mb_row, mb_col); - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= vp8dx_bool_error(xd->current_bc); - - if (pbi->common.filter_level) - { - int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != I8X8_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV && - xd->mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; - const int seg = xd->mode_info_context->mbmi.segment_id; - const int ref_frame = xd->mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - /* Save decoded MB last row data for next-row decoding */ - if(mb_row != pc->mb_rows-1) - { - vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); - vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); - vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); - } - - /* save left_col for next MB decoding */ - if(mb_col != pc->mb_cols-1) - { - MODE_INFO *next = xd->mode_info_context +1; - - if (next->mbmi.ref_frame == INTRA_FRAME) - { - for (i = 0; i < 16; i++) - pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; - for (i = 0; i < 8; i++) - { - pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; - pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; - } - } - } - - /* loopfilter on this macroblock. */ - if (filter_level) - { - if(pc->filter_type == NORMAL_LOOPFILTER) - { - loop_filter_info lfi; - FRAME_TYPE frame_type = pc->frame_type; - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_v) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, normal_b_v) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - LF_INVOKE(&pc->rtcd.loopfilter, normal_mb_h) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, normal_b_h) - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - } - else - { - if (mb_col > 0) - LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_v) - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, simple_b_v) - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - LF_INVOKE(&pc->rtcd.loopfilter, simple_mb_h) - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - LF_INVOKE(&pc->rtcd.loopfilter, simple_b_h) - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - } - } - - } - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; - - pbi->mt_current_mb_col[mb_row] = mb_col; - } - - /* adjust to the next row of mbs */ - if (pbi->common.filter_level) - { - if(mb_row != pc->mb_rows-1) - { - int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS; - int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1); - - for (i = 0; i < 4; i++) - { - pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; - pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; - pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; - } - } - }else - vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - ++xd->mode_info_context; /* skip prediction column */ - } - xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; - } - - sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ -} diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index ae3ddd2..f3f0c1b 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -2868,12 +2868,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) { vp8_start_encode(&cpi->bc2, cx_data + bc->pos); -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded) - pack_mb_row_tokens(cpi, &cpi->bc2); - else -#endif - pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count); + pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count); vp8_stop_encode(&cpi->bc2); diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index edbdf3f..7ad51df 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -834,16 +834,6 @@ void encode_mb_row(VP8_COMP *cpi, int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cpi->common.mb_cols); -#if CONFIG_MULTITHREAD - const int nsync = cpi->mt_sync_range; - const int rightmost_col = cm->mb_cols - 1; - volatile const int *last_row_current_mb_col; - - if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) - last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; - else - last_row_current_mb_col = &rightmost_col; -#endif // Reset the left context vp8_zero(cm->left_context) @@ -902,21 +892,6 @@ void encode_mb_row(VP8_COMP *cpi, //Copy current mb to a buffer RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#if CONFIG_MULTITHREAD - if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) - { - if ((mb_col & (nsync - 1)) == 0) - { - while (mb_col > (*last_row_current_mb_col - nsync) - && (*last_row_current_mb_col) != (cm->mb_cols - 1)) - { - x86_pause_hint(); - thread_sleep(0); - } - } - } -#endif - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x); @@ -1036,12 +1011,6 @@ void encode_mb_row(VP8_COMP *cpi, x->partition_info++; xd->above_context++; -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded != 0) - { - cpi->mt_current_mb_col[mb_row] = mb_col; - } -#endif } //extend the recon for intra prediction @@ -1056,14 +1025,6 @@ void encode_mb_row(VP8_COMP *cpi, xd->mode_info_context++; x->partition_info++; -#if CONFIG_MULTITHREAD - if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1)) - { - sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ - } -#endif - - // debug output #if DBG_PRNT_SEGMAP { @@ -1286,61 +1247,6 @@ static void encode_frame_internal(VP8_COMP *cpi) struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded) - { - int i; - - vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); - - for (i = 0; i < cm->mb_rows; i++) - cpi->mt_current_mb_col[i] = -1; - - for (i = 0; i < cpi->encoding_thread_count; i++) - { - sem_post(&cpi->h_event_start_encoding[i]); - } - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) - { - //vp8_zero(cm->left_context) - - tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24); - - encode_mb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate); - - // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; - - xd->mode_info_context += xd->mode_info_stride - * cpi->encoding_thread_count; - xd->prev_mode_info_context += xd->mode_info_stride - * cpi->encoding_thread_count; - - x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; - x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; - - } - - sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */ - - cpi->tok_count = 0; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) - { - cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start; - } - - for (i = 0; i < cpi->encoding_thread_count; i++) - { - totalrate += cpi->mb_row_ei[i].totalrate; - } - - } - else -#endif { #if CONFIG_SUPERBLOCKS // for each superblock row in the image diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c deleted file mode 100644 index be0d21a..0000000 --- a/vp8/encoder/ethreading.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "onyx_int.h" -#include "vp8/common/threading.h" -#include "vp8/common/common.h" -#include "vp8/common/extend.h" - -#if CONFIG_MULTITHREAD - -extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset); -extern int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t); -extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); -extern void vp8_build_block_offsets(MACROBLOCK *x); -extern void vp8_setup_block_ptrs(MACROBLOCK *x); - -#ifdef MODE_STATS -extern unsigned int inter_y_modes[MB_MODE_COUNT]; -extern unsigned int inter_uv_modes[VP8_UV_MODES]; -extern unsigned int inter_b_modes[B_MODE_COUNT]; -extern unsigned int y_modes[VP8_YMODES]; -extern unsigned int uv_modes[VP8_UV_MODES]; -extern unsigned int b_modes[B_MODE_COUNT]; -#endif -extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); - -static THREAD_FUNCTION loopfilter_thread(void *p_data) -{ - VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); - VP8_COMMON *cm = &cpi->common; - - while (1) - { - if (cpi->b_multi_threaded == 0) - break; - - if (sem_wait(&cpi->h_event_start_lpf) == 0) - { - if (cpi->b_multi_threaded == FALSE) // we're shutting down - break; - - loopfilter_frame(cpi, cm); - - sem_post(&cpi->h_event_end_lpf); - } - } - - return 0; -} - -static -THREAD_FUNCTION thread_encoding_proc(void *p_data) -{ - int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; - VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); - MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); - ENTROPY_CONTEXT_PLANES mb_row_left_context; - - const int nsync = cpi->mt_sync_range; - //printf("Started thread %d\n", ithread); - - while (1) - { - if (cpi->b_multi_threaded == 0) - break; - - //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0) - if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) - { - VP8_COMMON *cm = &cpi->common; - int mb_row; - MACROBLOCK *x = &mbri->mb; - MACROBLOCKD *xd = &x->e_mbd; - TOKENEXTRA *tp ; - - int *totalrate = &mbri->totalrate; - - if (cpi->b_multi_threaded == FALSE) // we're shutting down - break; - - for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) - { - - int recon_yoffset, recon_uvoffset; - int mb_col; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - int map_index = (mb_row * cm->mb_cols); - volatile int *last_row_current_mb_col; - - tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); - - last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; - - // reset above block coeffs - xd->above_context = cm->above_context; - xd->left_context = &mb_row_left_context; - - vp8_zero(mb_row_left_context); - - xd->up_available = (mb_row != 0); - recon_yoffset = (mb_row * recon_y_stride * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8); - - cpi->tplist[mb_row].start = tp; - - //printf("Thread mb_row = %d\n", mb_row); - - // Set the mb activity pointer to the start of the row. - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - // for each macroblock col in image - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - if ((mb_col & (nsync - 1)) == 0) - { - while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1) - { - x86_pause_hint(); - thread_sleep(0); - } - } - - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - - // Set up limit values for motion vectors used to prevent them extending outside the UMV borders - x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - xd->left_available = (mb_col != 0); - - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - - //Copy current mb to a buffer - RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer, x->src.y_stride, x->thismb, 16); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp8_activity_masking(cpi, x); - - // Is segmentation enabled - // MB level adjutment to quantizer - if (xd->segmentation_enabled) - { - // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) - if (cpi->segmentation_map[map_index + mb_col] <= 3) - xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; - else - xd->mode_info_context->mbmi.segment_id = 0; - - vp8cx_mb_init_quantizer(cpi, x); - } - else - xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default - - x->active_ptr = cpi->active_map + map_index + mb_col; - - if (cm->frame_type == KEY_FRAME) - { - *totalrate += vp8cx_encode_intra_macro_block(cpi, x, &tp); -#ifdef MODE_STATS - y_modes[xd->mode_info_context->mbmi.mode] ++; -#endif - } - else - { - *totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset); - -#ifdef MODE_STATS - inter_y_modes[xd->mode_info_context->mbmi.mode] ++; - - if (xd->mode_info_context->mbmi.mode == SPLITMV) - { - int b; - - for (b = 0; b < x->partition_info->count; b++) - { - inter_b_modes[x->partition_info->bmi[b].mode] ++; - } - } - -#endif - - // Count of last ref frame 0,0 useage - if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) - cpi->inter_zz_count++; - - // Special case code for cyclic refresh - // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode - // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map - if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) - { - const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; - - // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): - // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) - // else mark it as dirty (1). - if (mbmi->segment_id) - cpi->cyclic_refresh_map[map_index + mb_col] = -1; - else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) - { - if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) - cpi->cyclic_refresh_map[map_index + mb_col] = 0; - } - else - cpi->cyclic_refresh_map[map_index + mb_col] = 1; - - } - } - cpi->tplist[mb_row].stop = tp; - - // Increment pointer into gf useage flags structure. - x->gf_active_ptr++; - - // Increment the activity mask pointers. - x->mb_activity_ptr++; - - // adjust to the next column of macroblocks - x->src.y_buffer += 16; - x->src.u_buffer += 8; - x->src.v_buffer += 8; - - recon_yoffset += 16; - recon_uvoffset += 8; - - // skip to next mb - xd->prev_mode_info_context++; - xd->mode_info_context++; - x->partition_info++; - xd->above_context++; - - cpi->mt_current_mb_col[mb_row] = mb_col; - } - - //extend the recon for intra prediction - vp8_extend_mb_row( - &cm->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, - xd->dst.v_buffer + 8); - - // this is to account for the border - xd->prev_mode_info_context++; - - xd->mode_info_context++; - x->partition_info++; - - x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; - - xd->mode_info_context += xd->mode_info_stride - * cpi->encoding_thread_count; - xd->prev_mode_info_context += xd->mode_info_stride - * cpi->encoding_thread_count; - - x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; - x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; - - if (mb_row == cm->mb_rows - 1) - { - //SetEvent(cpi->h_event_main); - sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ - } - } - } - } - - //printf("exit thread %d\n", ithread); - return 0; -} - -static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) -{ - - MACROBLOCK *x = mbsrc; - MACROBLOCK *z = mbdst; - int i; - - z->ss = x->ss; - z->ss_count = x->ss_count; - z->searches_per_step = x->searches_per_step; - z->errorperbit = x->errorperbit; - - z->sadperbit16 = x->sadperbit16; - z->sadperbit4 = x->sadperbit4; - - /* - z->mv_col_min = x->mv_col_min; - z->mv_col_max = x->mv_col_max; - z->mv_row_min = x->mv_row_min; - z->mv_row_max = x->mv_row_max; - z->vector_range = x->vector_range ; - */ - - z->vp8_short_fdct4x4 = x->vp8_short_fdct4x4; - z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4; - z->short_walsh4x4 = x->short_walsh4x4; - z->quantize_b = x->quantize_b; - z->quantize_b_pair = x->quantize_b_pair; - z->optimize = x->optimize; - - /* - z->mvc = x->mvc; - z->src.y_buffer = x->src.y_buffer; - z->src.u_buffer = x->src.u_buffer; - z->src.v_buffer = x->src.v_buffer; - */ - - - vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); - z->mvcost[0] = &z->mvcosts[0][mv_max+1]; - z->mvcost[1] = &z->mvcosts[1][mv_max+1]; - z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1]; - z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1]; - - - vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs)); - vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs)); - //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); - //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost)); - vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost)); - vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost)); - vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs)); - - for (i = 0; i < 25; i++) - { - z->block[i].quant = x->block[i].quant; - z->block[i].quant_fast = x->block[i].quant_fast; - z->block[i].quant_shift = x->block[i].quant_shift; - z->block[i].zbin = x->block[i].zbin; - z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; - z->block[i].round = x->block[i].round; - /* - z->block[i].src = x->block[i].src; - */ - z->block[i].src_stride = x->block[i].src_stride; - z->block[i].eob_max_offset = x->block[i].eob_max_offset; -#if CONFIG_T8X8 - z->block[i].eob_max_offset_8x8 = x->block[i].eob_max_offset_8x8; -#endif - } - - { - MACROBLOCKD *xd = &x->e_mbd; - MACROBLOCKD *zd = &z->e_mbd; - - /* - zd->mode_info_context = xd->mode_info_context; - zd->mode_info = xd->mode_info; - - zd->mode_info_stride = xd->mode_info_stride; - zd->frame_type = xd->frame_type; - zd->up_available = xd->up_available ; - zd->left_available = xd->left_available; - zd->left_context = xd->left_context; - zd->last_frame_dc = xd->last_frame_dc; - zd->last_frame_dccons = xd->last_frame_dccons; - zd->gold_frame_dc = xd->gold_frame_dc; - zd->gold_frame_dccons = xd->gold_frame_dccons; - zd->mb_to_left_edge = xd->mb_to_left_edge; - zd->mb_to_right_edge = xd->mb_to_right_edge; - zd->mb_to_top_edge = xd->mb_to_top_edge ; - zd->mb_to_bottom_edge = xd->mb_to_bottom_edge; - zd->gf_active_ptr = xd->gf_active_ptr; - zd->frames_since_golden = xd->frames_since_golden; - zd->frames_till_alt_ref_frame = xd->frames_till_alt_ref_frame; - */ - zd->subpixel_predict = xd->subpixel_predict; - zd->subpixel_predict8x4 = xd->subpixel_predict8x4; - zd->subpixel_predict8x8 = xd->subpixel_predict8x8; - zd->subpixel_predict16x16 = xd->subpixel_predict16x16; - zd->subpixel_predict_avg8x8 = xd->subpixel_predict_avg8x8; - zd->subpixel_predict_avg16x16 = xd->subpixel_predict_avg16x16; - zd->segmentation_enabled = xd->segmentation_enabled; - zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - - vpx_memcpy(zd->segment_feature_data, - xd->segment_feature_data, - sizeof(xd->segment_feature_data)); - - vpx_memcpy(zd->segment_feature_mask, - xd->segment_feature_mask, - sizeof(xd->segment_feature_mask)); - - for (i = 0; i < 25; i++) - { - zd->block[i].dequant = xd->block[i].dequant; - } - } -} - -void vp8cx_init_mbrthread_data(VP8_COMP *cpi, - MACROBLOCK *x, - MB_ROW_COMP *mbr_ei, - int mb_row, - int count - ) -{ - - VP8_COMMON *const cm = & cpi->common; - MACROBLOCKD *const xd = & x->e_mbd; - int i; - (void) mb_row; - - for (i = 0; i < count; i++) - { - MACROBLOCK *mb = & mbr_ei[i].mb; - MACROBLOCKD *mbd = &mb->e_mbd; - - mbd->subpixel_predict = xd->subpixel_predict; - mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; - mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; - mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; - mbd->subpixel_predict_avg8x8 = xd->subpixel_predict_avg8x8; - mbd->subpixel_predict_avg16x16 = xd->subpixel_predict_avg16x16; -#if CONFIG_RUNTIME_CPU_DETECT - mbd->rtcd = xd->rtcd; -#endif - mb->gf_active_ptr = x->gf_active_ptr; - - mb->vector_range = 32; - - mbr_ei[i].totalrate = 0; - - mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); - - mbd->mode_info_context = cm->mi - + x->e_mbd.mode_info_stride * (i + 1); - mbd->prev_mode_info_context = cm->prev_mi - + x->e_mbd.mode_info_stride * (i + 1); - mbd->mode_info_stride = cm->mode_info_stride; - - mbd->frame_type = cm->frame_type; - - mbd->frames_since_golden = cm->frames_since_golden; - mbd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame; - - mb->src = * cpi->Source; - mbd->pre = cm->yv12_fb[cm->lst_fb_idx]; - mbd->dst = cm->yv12_fb[cm->new_fb_idx]; - - mb->src.y_buffer += 16 * x->src.y_stride * (i + 1); - mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1); - mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1); - - vp8_build_block_offsets(mb); - - vp8_setup_block_dptrs(mbd); - - vp8_setup_block_ptrs(mb); - - mbd->left_context = &cm->left_context; - mb->mvc = cm->fc.mvc; - - setup_mbby_copy(&mbr_ei[i].mb, x); - - mbd->fullpixel_mask = 0xffffffff; - if(cm->full_pixel) - mbd->fullpixel_mask = 0xfffffff8; - } -} - -void vp8cx_create_encoder_threads(VP8_COMP *cpi) -{ - const VP8_COMMON * cm = &cpi->common; - - cpi->b_multi_threaded = 0; - cpi->encoding_thread_count = 0; - - if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) - { - int ithread; - int th_count = cpi->oxcf.multi_threaded - 1; - - /* don't allocate more threads than cores available */ - if (cpi->oxcf.multi_threaded > cm->processor_core_count) - th_count = cm->processor_core_count - 1; - - /* we have th_count + 1 (main) threads processing one row each */ - /* no point to have more threads than the sync range allows */ - if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) - { - th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; - } - - if(th_count == 0) - return; - - CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); - CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); - CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); - vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); - CHECK_MEM_ERROR(cpi->en_thread_data, - vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); - CHECK_MEM_ERROR(cpi->mt_current_mb_col, - vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); - - sem_init(&cpi->h_event_end_encoding, 0, 0); - - cpi->b_multi_threaded = 1; - cpi->encoding_thread_count = th_count; - - /* - printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", - (cpi->encoding_thread_count +1)); - */ - - for (ithread = 0; ithread < th_count; ithread++) - { - ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread]; - - sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); - ethd->ithread = ithread; - ethd->ptr1 = (void *)cpi; - ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; - - pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); - } - - { - LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; - - sem_init(&cpi->h_event_start_lpf, 0, 0); - sem_init(&cpi->h_event_end_lpf, 0, 0); - - lpfthd->ptr1 = (void *)cpi; - pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd); - } - } - -} - -void vp8cx_remove_encoder_threads(VP8_COMP *cpi) -{ - if (cpi->b_multi_threaded) - { - //shutdown other threads - cpi->b_multi_threaded = 0; - { - int i; - - for (i = 0; i < cpi->encoding_thread_count; i++) - { - //SetEvent(cpi->h_event_mbrencoding[i]); - sem_post(&cpi->h_event_start_encoding[i]); - pthread_join(cpi->h_encoding_thread[i], 0); - - sem_destroy(&cpi->h_event_start_encoding[i]); - } - - sem_post(&cpi->h_event_start_lpf); - pthread_join(cpi->h_filter_thread, 0); - } - - sem_destroy(&cpi->h_event_end_encoding); - sem_destroy(&cpi->h_event_end_lpf); - sem_destroy(&cpi->h_event_start_lpf); - - //free thread related resources - vpx_free(cpi->h_event_start_encoding); - vpx_free(cpi->h_encoding_thread); - vpx_free(cpi->mb_row_ei); - vpx_free(cpi->en_thread_data); - vpx_free(cpi->mt_current_mb_col); - } -} -#endif diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 45b9dfa..1e170f8 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -30,7 +30,6 @@ #endif #include "vpx_mem/vpx_mem.h" #include "vp8/common/swapyv12buffer.h" -#include "vp8/common/threading.h" #include "vpx_ports/vpx_timer.h" #include "temporal_filter.h" @@ -1787,17 +1786,6 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) "Failed to allocate firstpass stats"); #endif -#if CONFIG_MULTITHREAD - if (width < 640) - cpi->mt_sync_range = 1; - else if (width <= 1280) - cpi->mt_sync_range = 4; - else if (width <= 2560) - cpi->mt_sync_range = 8; - else - cpi->mt_sync_range = 16; -#endif - vpx_free(cpi->tplist); CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); @@ -2494,10 +2482,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) init_mv_ref_counts(); #endif -#if CONFIG_MULTITHREAD - vp8cx_create_encoder_threads(cpi); -#endif - cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); @@ -2816,10 +2800,6 @@ void vp8_remove_compressor(VP8_PTR *ptr) } -#if CONFIG_MULTITHREAD - vp8cx_remove_encoder_threads(cpi); -#endif - dealloc_compressor_data(cpi); vpx_free(cpi->mb.ss); vpx_free(cpi->tok); @@ -3647,11 +3627,6 @@ void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); } -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded) - sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */ -#endif - if (cm->filter_level > 0) { vp8cx_set_alt_lf_level(cpi, cm->filter_level); @@ -4737,13 +4712,6 @@ static void encode_frame_to_data_rate cm->current_video_frame+1000); #endif -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded) - { - sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */ - } - else -#endif { loopfilter_frame(cpi, cm); } @@ -4755,12 +4723,6 @@ static void encode_frame_to_data_rate cm->refresh_entropy_probs = 0; } -#if CONFIG_MULTITHREAD - /* wait that filter_level is picked so that we can continue with stream packing */ - if (cpi->b_multi_threaded) - sem_wait(&cpi->h_event_end_lpf); -#endif - // Work out the segment probabilites if segmentation is enabled and // the map is due to be updated if (xd->segmentation_enabled && xd->update_mb_segmentation_map) @@ -4780,14 +4742,6 @@ static void encode_frame_to_data_rate // build the bitstream vp8_pack_bitstream(cpi, dest, size); -#if CONFIG_MULTITHREAD - /* wait for loopfilter thread done */ - if (cpi->b_multi_threaded) - { - sem_wait(&cpi->h_event_end_lpf); - } -#endif - /* Move storing frame_type out of the above loop since it is also * needed in motion search besides loopfilter */ cm->last_frame_type = cm->frame_type; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index f67729b..8897bd5 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -23,7 +23,6 @@ #include "encodemb.h" #include "quantize.h" #include "vp8/common/entropy.h" -#include "vp8/common/threading.h" #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" #include "mcomp.h" @@ -543,27 +542,6 @@ typedef struct VP8_COMP int cyclic_refresh_q; signed char *cyclic_refresh_map; -#if CONFIG_MULTITHREAD - // multithread data - int * mt_current_mb_col; - int mt_sync_range; - int b_multi_threaded; - int encoding_thread_count; - - pthread_t *h_encoding_thread; - pthread_t h_filter_thread; - - MB_ROW_COMP *mb_row_ei; - ENCODETHREAD_DATA *en_thread_data; - LPFTHREAD_DATA lpf_thread_data; - - //events - sem_t *h_event_start_encoding; - sem_t h_event_end_encoding; - sem_t h_event_start_lpf; - sem_t h_event_end_lpf; -#endif - TOKENLIST *tplist; unsigned int partition_sz[MAX_PARTITIONS]; // end of multithread data diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index b9ade1c..8455bb8 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -26,7 +26,6 @@ #include "vpx_scale/yv12extend.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/swapyv12buffer.h" -#include "vp8/common/threading.h" #include "vpx_ports/vpx_timer.h" #include diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 5a44cd1..c4bd283 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -58,7 +58,6 @@ VP8_COMMON_SRCS-yes += common/setupintrarecon.h VP8_COMMON_SRCS-yes += common/subpixel.h VP8_COMMON_SRCS-yes += common/swapyv12buffer.h VP8_COMMON_SRCS-yes += common/systemdependent.h -VP8_COMMON_SRCS-yes += common/threading.h VP8_COMMON_SRCS-yes += common/treecoder.h VP8_COMMON_SRCS-yes += common/invtrans.c VP8_COMMON_SRCS-yes += common/loopfilter.c diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index ab3352a..e11bea2 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -42,7 +42,6 @@ VP8_CX_SRCS-yes += encoder/encodeframe.c VP8_CX_SRCS-yes += encoder/encodeintra.c VP8_CX_SRCS-yes += encoder/encodemb.c VP8_CX_SRCS-yes += encoder/encodemv.c -VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.c VP8_CX_SRCS-yes += encoder/firstpass.c VP8_CX_SRCS-yes += encoder/generic/csystemdependent.c VP8_CX_SRCS-yes += encoder/block.h diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk index d88b595..5efae08 100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@ -60,16 +60,12 @@ VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.c VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c VP8_DX_SRCS-yes += decoder/dboolhuff.h VP8_DX_SRCS-yes += decoder/decodemv.h -VP8_DX_SRCS-yes += decoder/decoderthreading.h VP8_DX_SRCS-yes += decoder/dequantize.h VP8_DX_SRCS-yes += decoder/detokenize.h VP8_DX_SRCS-yes += decoder/onyxd_int.h VP8_DX_SRCS-yes += decoder/treereader.h VP8_DX_SRCS-yes += decoder/onyxd_if.c -VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c VP8_DX_SRCS-yes += decoder/idct_blk.c -VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h -VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))